Removing SAEs with LR != 7e-5
#7
by
Aric
- opened
This view is limited to 50 files because it contains too many changes.
See the raw diff here.
- layer_0/width_131k/average_l0_11/hparams.json +1 -0
- layer_0/width_131k/average_l0_15/hparams.json +1 -0
- layer_0/width_131k/average_l0_21/hparams.json +1 -0
- layer_0/width_131k/average_l0_30/hparams.json +1 -0
- layer_0/width_131k/average_l0_41/hparams.json +1 -0
- layer_0/width_131k/average_l0_8/hparams.json +1 -0
- layer_0/width_16k/average_l0_11/hparams.json +1 -0
- layer_0/width_16k/average_l0_129/hparams.json +1 -0
- layer_0/width_16k/average_l0_17/hparams.json +1 -0
- layer_0/width_16k/average_l0_35/hparams.json +1 -0
- layer_0/width_16k/average_l0_68/hparams.json +1 -0
- layer_1/width_131k/average_l0_13/hparams.json +1 -0
- layer_1/width_131k/average_l0_20/hparams.json +1 -0
- layer_1/width_131k/average_l0_33/hparams.json +1 -0
- layer_1/width_131k/average_l0_56/hparams.json +1 -0
- layer_1/width_131k/average_l0_6/hparams.json +1 -0
- layer_1/width_131k/average_l0_9/hparams.json +1 -0
- layer_1/width_16k/average_l0_15/hparams.json +1 -0
- layer_1/width_16k/average_l0_175/hparams.json +1 -0
- layer_1/width_16k/average_l0_31/hparams.json +1 -0
- layer_1/width_16k/average_l0_69/hparams.json +1 -0
- layer_1/width_16k/average_l0_9/hparams.json +1 -0
- layer_10/width_131k/average_l0_15/hparams.json +1 -0
- layer_10/width_131k/average_l0_151/hparams.json +1 -0
- layer_10/width_131k/average_l0_27/hparams.json +1 -0
- layer_10/width_131k/average_l0_47/hparams.json +1 -0
- layer_10/width_131k/average_l0_84/hparams.json +1 -0
- layer_10/width_131k/average_l0_9/hparams.json +1 -0
- layer_10/width_16k/average_l0_10/hparams.json +1 -0
- layer_10/width_16k/average_l0_113/hparams.json +1 -0
- layer_10/width_16k/average_l0_17/hparams.json +1 -0
- layer_10/width_16k/average_l0_243/hparams.json +1 -0
- layer_10/width_16k/average_l0_31/hparams.json +1 -0
- layer_10/width_16k/average_l0_57/hparams.json +1 -0
- layer_11/width_131k/average_l0_16/hparams.json +1 -0
- layer_11/width_131k/average_l0_162/hparams.json +1 -0
- layer_11/width_131k/average_l0_27/hparams.json +1 -0
- layer_11/width_131k/average_l0_49/hparams.json +1 -0
- layer_11/width_131k/average_l0_88/hparams.json +1 -0
- layer_11/width_131k/average_l0_9/hparams.json +1 -0
- layer_11/width_16k/average_l0_10/hparams.json +1 -0
- layer_11/width_16k/average_l0_118/hparams.json +1 -0
- layer_11/width_16k/average_l0_18/hparams.json +1 -0
- layer_11/width_16k/average_l0_255/hparams.json +1 -0
- layer_11/width_16k/average_l0_32/hparams.json +1 -0
- layer_11/width_16k/average_l0_60/hparams.json +1 -0
- layer_12/width_131k/average_l0_10/hparams.json +1 -0
- layer_12/width_131k/average_l0_17/hparams.json +1 -0
- layer_12/width_131k/average_l0_183/hparams.json +1 -0
- layer_12/width_131k/average_l0_29/hparams.json +1 -0
layer_0/width_131k/average_l0_11/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.002}
|
layer_0/width_131k/average_l0_15/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.001}
|
layer_0/width_131k/average_l0_21/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.0005}
|
layer_0/width_131k/average_l0_30/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.00025}
|
layer_0/width_131k/average_l0_41/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.000125}
|
layer_0/width_131k/average_l0_8/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.004}
|
layer_0/width_16k/average_l0_11/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.002}
|
layer_0/width_16k/average_l0_129/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.000125}
|
layer_0/width_16k/average_l0_17/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.001}
|
layer_0/width_16k/average_l0_35/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.0005}
|
layer_0/width_16k/average_l0_68/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.00025}
|
layer_1/width_131k/average_l0_13/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.001}
|
layer_1/width_131k/average_l0_20/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.0005}
|
layer_1/width_131k/average_l0_33/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.00025}
|
layer_1/width_131k/average_l0_56/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.000125}
|
layer_1/width_131k/average_l0_6/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.004}
|
layer_1/width_131k/average_l0_9/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.002}
|
layer_1/width_16k/average_l0_15/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.001}
|
layer_1/width_16k/average_l0_175/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.000125}
|
layer_1/width_16k/average_l0_31/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.0005}
|
layer_1/width_16k/average_l0_69/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.00025}
|
layer_1/width_16k/average_l0_9/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.002}
|
layer_10/width_131k/average_l0_15/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.002}
|
layer_10/width_131k/average_l0_151/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.000125}
|
layer_10/width_131k/average_l0_27/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.001}
|
layer_10/width_131k/average_l0_47/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.0005}
|
layer_10/width_131k/average_l0_84/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.00025}
|
layer_10/width_131k/average_l0_9/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.004}
|
layer_10/width_16k/average_l0_10/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.004}
|
layer_10/width_16k/average_l0_113/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.00025}
|
layer_10/width_16k/average_l0_17/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.002}
|
layer_10/width_16k/average_l0_243/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.000125}
|
layer_10/width_16k/average_l0_31/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.001}
|
layer_10/width_16k/average_l0_57/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.0005}
|
layer_11/width_131k/average_l0_16/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.002}
|
layer_11/width_131k/average_l0_162/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.000125}
|
layer_11/width_131k/average_l0_27/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.001}
|
layer_11/width_131k/average_l0_49/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.0005}
|
layer_11/width_131k/average_l0_88/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.00025}
|
layer_11/width_131k/average_l0_9/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.004}
|
layer_11/width_16k/average_l0_10/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.004}
|
layer_11/width_16k/average_l0_118/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.00025}
|
layer_11/width_16k/average_l0_18/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.002}
|
layer_11/width_16k/average_l0_255/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.000125}
|
layer_11/width_16k/average_l0_32/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.001}
|
layer_11/width_16k/average_l0_60/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.0005}
|
layer_12/width_131k/average_l0_10/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.004}
|
layer_12/width_131k/average_l0_17/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.002}
|
layer_12/width_131k/average_l0_183/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.000125}
|
layer_12/width_131k/average_l0_29/hparams.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sparsity_lambda": 0.001}
|