Upload folder using huggingface_hub
Browse files- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_0/ae.pt +3 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_0/config.json +31 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_0/eval_results.json +1 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_1/ae.pt +3 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_1/config.json +31 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_1/eval_results.json +1 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_2/ae.pt +3 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_2/config.json +31 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_2/eval_results.json +1 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_3/ae.pt +3 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_3/config.json +31 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_3/eval_results.json +1 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_4/ae.pt +3 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_4/config.json +31 -0
- random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_4/eval_results.json +1 -0
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa4e6b4f7486b1d71399e523d139b238ab923da1851b8fa21a8eb1b5afd7eec6
|
3 |
+
size 302066710
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_0/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TopKTrainer",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0003,
|
6 |
+
"steps": 244140,
|
7 |
+
"auxk_alpha": 0.03125,
|
8 |
+
"warmup_steps": 1000,
|
9 |
+
"decay_start": 195312,
|
10 |
+
"threshold_beta": 0.999,
|
11 |
+
"threshold_start_step": 1000,
|
12 |
+
"seed": 0,
|
13 |
+
"activation_dim": 2304,
|
14 |
+
"dict_size": 16384,
|
15 |
+
"k": 80,
|
16 |
+
"device": "cuda:0",
|
17 |
+
"layer": 12,
|
18 |
+
"lm_name": "google/gemma-2-2b",
|
19 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
|
20 |
+
"submodule_name": "resid_post_layer_12"
|
21 |
+
},
|
22 |
+
"buffer": {
|
23 |
+
"d_submodule": 2304,
|
24 |
+
"io": "out",
|
25 |
+
"n_ctxs": 244,
|
26 |
+
"ctx_len": 1024,
|
27 |
+
"refresh_batch_size": 4,
|
28 |
+
"out_batch_size": 2048,
|
29 |
+
"device": "cuda:0"
|
30 |
+
}
|
31 |
+
}
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_0/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 54.80125, "l1_loss": 531.72, "l0": 79.998916015625, "frac_variance_explained": 0.8495703125, "cossim": 0.922265625, "l2_ratio": 0.922890625, "relative_reconstruction_bias": 1.00115234375, "loss_original": 2.152919921875, "loss_reconstructed": 2.245849609375, "loss_zero": 12.4375, "frac_recovered": 0.99048828125, "frac_alive": 0.9967041015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3aaec3b802e11624df91aac29e1aced67a2151e27b32c9876f373ad2b4f3649c
|
3 |
+
size 302066710
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_1/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TopKTrainer",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0003,
|
6 |
+
"steps": 244140,
|
7 |
+
"auxk_alpha": 0.03125,
|
8 |
+
"warmup_steps": 1000,
|
9 |
+
"decay_start": 195312,
|
10 |
+
"threshold_beta": 0.999,
|
11 |
+
"threshold_start_step": 1000,
|
12 |
+
"seed": 1,
|
13 |
+
"activation_dim": 2304,
|
14 |
+
"dict_size": 16384,
|
15 |
+
"k": 80,
|
16 |
+
"device": "cuda:0",
|
17 |
+
"layer": 12,
|
18 |
+
"lm_name": "google/gemma-2-2b",
|
19 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_1",
|
20 |
+
"submodule_name": "resid_post_layer_12"
|
21 |
+
},
|
22 |
+
"buffer": {
|
23 |
+
"d_submodule": 2304,
|
24 |
+
"io": "out",
|
25 |
+
"n_ctxs": 244,
|
26 |
+
"ctx_len": 1024,
|
27 |
+
"refresh_batch_size": 4,
|
28 |
+
"out_batch_size": 2048,
|
29 |
+
"device": "cuda:0"
|
30 |
+
}
|
31 |
+
}
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_1/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 54.8, "l1_loss": 530.34, "l0": 79.996669921875, "frac_variance_explained": 0.84814453125, "cossim": 0.9223046875, "l2_ratio": 0.92255859375, "relative_reconstruction_bias": 0.9994140625, "loss_original": 2.152919921875, "loss_reconstructed": 2.247138671875, "loss_zero": 12.4375, "frac_recovered": 0.99025390625, "frac_alive": 0.9964599609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ec8b7ef6aa510fde79dec55e39c90043018c0ce67c0221e9d47a352171afebb
|
3 |
+
size 302066710
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_2/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TopKTrainer",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0003,
|
6 |
+
"steps": 244140,
|
7 |
+
"auxk_alpha": 0.03125,
|
8 |
+
"warmup_steps": 1000,
|
9 |
+
"decay_start": 195312,
|
10 |
+
"threshold_beta": 0.999,
|
11 |
+
"threshold_start_step": 1000,
|
12 |
+
"seed": 2,
|
13 |
+
"activation_dim": 2304,
|
14 |
+
"dict_size": 16384,
|
15 |
+
"k": 80,
|
16 |
+
"device": "cuda:0",
|
17 |
+
"layer": 12,
|
18 |
+
"lm_name": "google/gemma-2-2b",
|
19 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_2",
|
20 |
+
"submodule_name": "resid_post_layer_12"
|
21 |
+
},
|
22 |
+
"buffer": {
|
23 |
+
"d_submodule": 2304,
|
24 |
+
"io": "out",
|
25 |
+
"n_ctxs": 244,
|
26 |
+
"ctx_len": 1024,
|
27 |
+
"refresh_batch_size": 4,
|
28 |
+
"out_batch_size": 2048,
|
29 |
+
"device": "cuda:0"
|
30 |
+
}
|
31 |
+
}
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_2/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 54.7875, "l1_loss": 529.68, "l0": 79.9999365234375, "frac_variance_explained": 0.84861328125, "cossim": 0.922265625, "l2_ratio": 0.92236328125, "relative_reconstruction_bias": 0.999296875, "loss_original": 2.152919921875, "loss_reconstructed": 2.247548828125, "loss_zero": 12.4375, "frac_recovered": 0.990078125, "frac_alive": 0.9959716796875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31b181e47a10487ffb6c139563727634af5077c2063f0e86969fec5f5e5f8f2d
|
3 |
+
size 302066710
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_3/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TopKTrainer",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0003,
|
6 |
+
"steps": 244140,
|
7 |
+
"auxk_alpha": 0.03125,
|
8 |
+
"warmup_steps": 1000,
|
9 |
+
"decay_start": 195312,
|
10 |
+
"threshold_beta": 0.999,
|
11 |
+
"threshold_start_step": 1000,
|
12 |
+
"seed": 3,
|
13 |
+
"activation_dim": 2304,
|
14 |
+
"dict_size": 16384,
|
15 |
+
"k": 80,
|
16 |
+
"device": "cuda:0",
|
17 |
+
"layer": 12,
|
18 |
+
"lm_name": "google/gemma-2-2b",
|
19 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_3",
|
20 |
+
"submodule_name": "resid_post_layer_12"
|
21 |
+
},
|
22 |
+
"buffer": {
|
23 |
+
"d_submodule": 2304,
|
24 |
+
"io": "out",
|
25 |
+
"n_ctxs": 244,
|
26 |
+
"ctx_len": 1024,
|
27 |
+
"refresh_batch_size": 4,
|
28 |
+
"out_batch_size": 2048,
|
29 |
+
"device": "cuda:0"
|
30 |
+
}
|
31 |
+
}
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_3/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 54.785, "l1_loss": 531.16, "l0": 79.9999755859375, "frac_variance_explained": 0.847578125, "cossim": 0.9221484375, "l2_ratio": 0.92216796875, "relative_reconstruction_bias": 1.00029296875, "loss_original": 2.152919921875, "loss_reconstructed": 2.245234375, "loss_zero": 12.4375, "frac_recovered": 0.99060546875, "frac_alive": 0.99639892578125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e85139264518adf5acfdd3639f2464f480c2b8193efccc9dbd3d7c06515577e9
|
3 |
+
size 302066710
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_4/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TopKTrainer",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0003,
|
6 |
+
"steps": 244140,
|
7 |
+
"auxk_alpha": 0.03125,
|
8 |
+
"warmup_steps": 1000,
|
9 |
+
"decay_start": 195312,
|
10 |
+
"threshold_beta": 0.999,
|
11 |
+
"threshold_start_step": 1000,
|
12 |
+
"seed": 4,
|
13 |
+
"activation_dim": 2304,
|
14 |
+
"dict_size": 16384,
|
15 |
+
"k": 80,
|
16 |
+
"device": "cuda:0",
|
17 |
+
"layer": 12,
|
18 |
+
"lm_name": "google/gemma-2-2b",
|
19 |
+
"wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_4",
|
20 |
+
"submodule_name": "resid_post_layer_12"
|
21 |
+
},
|
22 |
+
"buffer": {
|
23 |
+
"d_submodule": 2304,
|
24 |
+
"io": "out",
|
25 |
+
"n_ctxs": 244,
|
26 |
+
"ctx_len": 1024,
|
27 |
+
"refresh_batch_size": 4,
|
28 |
+
"out_batch_size": 2048,
|
29 |
+
"device": "cuda:0"
|
30 |
+
}
|
31 |
+
}
|
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_4/eval_results.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"l2_loss": 54.755, "l1_loss": 531.58, "l0": 79.99732421875, "frac_variance_explained": 0.8508203125, "cossim": 0.9223828125, "l2_ratio": 0.92240234375, "relative_reconstruction_bias": 1.0003125, "loss_original": 2.152919921875, "loss_reconstructed": 2.24703125, "loss_zero": 12.4375, "frac_recovered": 0.99060546875, "frac_alive": 0.99603271484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
|