adamkarvonen commited on
Commit
826f4bc
·
verified ·
1 Parent(s): 4abc522

Upload folder using huggingface_hub

Browse files
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa4e6b4f7486b1d71399e523d139b238ab923da1851b8fa21a8eb1b5afd7eec6
3
+ size 302066710
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_0/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 0,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 80,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_0",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 54.80125, "l1_loss": 531.72, "l0": 79.998916015625, "frac_variance_explained": 0.8495703125, "cossim": 0.922265625, "l2_ratio": 0.922890625, "relative_reconstruction_bias": 1.00115234375, "loss_original": 2.152919921875, "loss_reconstructed": 2.245849609375, "loss_zero": 12.4375, "frac_recovered": 0.99048828125, "frac_alive": 0.9967041015625, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aaec3b802e11624df91aac29e1aced67a2151e27b32c9876f373ad2b4f3649c
3
+ size 302066710
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_1/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 1,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 80,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_1",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 54.8, "l1_loss": 530.34, "l0": 79.996669921875, "frac_variance_explained": 0.84814453125, "cossim": 0.9223046875, "l2_ratio": 0.92255859375, "relative_reconstruction_bias": 0.9994140625, "loss_original": 2.152919921875, "loss_reconstructed": 2.247138671875, "loss_zero": 12.4375, "frac_recovered": 0.99025390625, "frac_alive": 0.9964599609375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ec8b7ef6aa510fde79dec55e39c90043018c0ce67c0221e9d47a352171afebb
3
+ size 302066710
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_2/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 2,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 80,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_2",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 54.7875, "l1_loss": 529.68, "l0": 79.9999365234375, "frac_variance_explained": 0.84861328125, "cossim": 0.922265625, "l2_ratio": 0.92236328125, "relative_reconstruction_bias": 0.999296875, "loss_original": 2.152919921875, "loss_reconstructed": 2.247548828125, "loss_zero": 12.4375, "frac_recovered": 0.990078125, "frac_alive": 0.9959716796875, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31b181e47a10487ffb6c139563727634af5077c2063f0e86969fec5f5e5f8f2d
3
+ size 302066710
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_3/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 3,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 80,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_3",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 54.785, "l1_loss": 531.16, "l0": 79.9999755859375, "frac_variance_explained": 0.847578125, "cossim": 0.9221484375, "l2_ratio": 0.92216796875, "relative_reconstruction_bias": 1.00029296875, "loss_original": 2.152919921875, "loss_reconstructed": 2.245234375, "loss_zero": 12.4375, "frac_recovered": 0.99060546875, "frac_alive": 0.99639892578125, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e85139264518adf5acfdd3639f2464f480c2b8193efccc9dbd3d7c06515577e9
3
+ size 302066710
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_4/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TopKTrainer",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0003,
6
+ "steps": 244140,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": 195312,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "seed": 4,
13
+ "activation_dim": 2304,
14
+ "dict_size": 16384,
15
+ "k": 80,
16
+ "device": "cuda:0",
17
+ "layer": 12,
18
+ "lm_name": "google/gemma-2-2b",
19
+ "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_12_trainer_4",
20
+ "submodule_name": "resid_post_layer_12"
21
+ },
22
+ "buffer": {
23
+ "d_submodule": 2304,
24
+ "io": "out",
25
+ "n_ctxs": 244,
26
+ "ctx_len": 1024,
27
+ "refresh_batch_size": 4,
28
+ "out_batch_size": 2048,
29
+ "device": "cuda:0"
30
+ }
31
+ }
random_seeds_google_gemma-2-2b_top_k/resid_post_layer_12/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 54.755, "l1_loss": 531.58, "l0": 79.99732421875, "frac_variance_explained": 0.8508203125, "cossim": 0.9223828125, "l2_ratio": 0.92240234375, "relative_reconstruction_bias": 1.0003125, "loss_original": 2.152919921875, "loss_reconstructed": 2.24703125, "loss_zero": 12.4375, "frac_recovered": 0.99060546875, "frac_alive": 0.99603271484375, "hyperparameters": {"n_inputs": 200, "context_length": 1024}}