canrager commited on
Commit
c15f1b5
1 Parent(s): 962d0fc

Delete files resid_post_layer_11_checkpoints/ resid_post_layer_15/ resid_post_layer_15_checkpoints/ resid_post_layer_19/ resid_post_layer_19_checkpoints/ resid_post_layer_3/ resid_post_layer_3_checkpoints/ resid_post_layer_7/ resid_post_layer_7_checkpoints/ with huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt +0 -3
  2. resid_post_layer_11_checkpoints/trainer_0_step_0/config.json +0 -26
  3. resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json +0 -1
  4. resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt +0 -3
  5. resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json +0 -26
  6. resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json +0 -1
  7. resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt +0 -3
  8. resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json +0 -26
  9. resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json +0 -1
  10. resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt +0 -3
  11. resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json +0 -26
  12. resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json +0 -1
  13. resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt +0 -3
  14. resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json +0 -26
  15. resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json +0 -1
  16. resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt +0 -3
  17. resid_post_layer_11_checkpoints/trainer_1_step_0/config.json +0 -26
  18. resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json +0 -1
  19. resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt +0 -3
  20. resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json +0 -26
  21. resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json +0 -1
  22. resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt +0 -3
  23. resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json +0 -26
  24. resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json +0 -1
  25. resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt +0 -3
  26. resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json +0 -26
  27. resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json +0 -1
  28. resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt +0 -3
  29. resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json +0 -26
  30. resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json +0 -1
  31. resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt +0 -3
  32. resid_post_layer_11_checkpoints/trainer_2_step_0/config.json +0 -26
  33. resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json +0 -1
  34. resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt +0 -3
  35. resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json +0 -26
  36. resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json +0 -1
  37. resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt +0 -3
  38. resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json +0 -26
  39. resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json +0 -1
  40. resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt +0 -3
  41. resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json +0 -26
  42. resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json +0 -1
  43. resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt +0 -3
  44. resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json +0 -26
  45. resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json +0 -1
  46. resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt +0 -3
  47. resid_post_layer_11_checkpoints/trainer_3_step_0/config.json +0 -26
  48. resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json +0 -1
  49. resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt +0 -3
  50. resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json +0 -26
resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863
3
- size 339823416
 
 
 
 
resid_post_layer_11_checkpoints/trainer_0_step_0/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "0",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 20,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 144.1739944458008, "l1_loss": 113.89709167480468, "l0": 20.0, "frac_variance_explained": 0.06153666377067566, "cossim": 0.285680028796196, "l2_ratio": 0.1798807665705681, "relative_reconstruction_bias": 0.6311139702796936, "loss_original": 2.4489264488220215, "loss_reconstructed": 15.055834197998047, "loss_zero": 12.452933025360107, "frac_recovered": -0.26015533953905107, "frac_alive": 0.1353081613779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f56e92af04268566c7ff342c4e7210babc785a03db17bd8eaa38e3442012ce9
3
- size 339823704
 
 
 
 
resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "19528",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 20,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 63.222343826293944, "l1_loss": 284.57915954589845, "l0": 19.99583339691162, "frac_variance_explained": 0.7308643460273743, "cossim": 0.8872040271759033, "l2_ratio": 0.8889125108718872, "relative_reconstruction_bias": 1.0020551800727844, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.7907193899154663, "loss_zero": 12.452933025360107, "frac_recovered": 0.9659082174301148, "frac_alive": 0.1557074636220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5a1d888a0537e9383cc3a9d7da73404627105eedb05a2901ea9b5d8c79679bf
3
- size 339823704
 
 
 
 
resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "29292",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 20,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 63.71059875488281, "l1_loss": 272.7416564941406, "l0": 20.0, "frac_variance_explained": 0.7049950003623963, "cossim": 0.8891237080097198, "l2_ratio": 0.8906138241291046, "relative_reconstruction_bias": 1.0008544504642487, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.785463571548462, "loss_zero": 12.452933025360107, "frac_recovered": 0.9664257526397705, "frac_alive": 0.1563585102558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb73459d9ba27070497a75047dc056f86559bd5d41a888abb6cbebb52718ffed
3
- size 339823504
 
 
 
 
resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "4882",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 20,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 67.39788055419922, "l1_loss": 348.3453887939453, "l0": 20.0, "frac_variance_explained": 0.7809036612510681, "cossim": 0.8779726147651672, "l2_ratio": 0.8785419166088104, "relative_reconstruction_bias": 1.000831699371338, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.875354194641113, "loss_zero": 12.452933025360107, "frac_recovered": 0.9574480593204499, "frac_alive": 0.1525065153837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ce91e1320f81e78c9f7f2fd837f151c9fd488709b4f9aada2c4c18e38e7d585
3
- size 339823504
 
 
 
 
resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "9764",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 20,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 66.6925880432129, "l1_loss": 304.12717895507814, "l0": 20.0, "frac_variance_explained": 0.7223313331604004, "cossim": 0.8877674520015717, "l2_ratio": 0.8910203695297241, "relative_reconstruction_bias": 1.0043164610862731, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8143189668655397, "loss_zero": 12.452933025360107, "frac_recovered": 0.9635527789592743, "frac_alive": 0.1507703959941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863
3
- size 339823416
 
 
 
 
resid_post_layer_11_checkpoints/trainer_1_step_0/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "0",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 40,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 146.67851715087892, "l1_loss": 225.0533248901367, "l0": 40.0, "frac_variance_explained": 0.10613001585006714, "cossim": 0.37147045135498047, "l2_ratio": 0.2499557614326477, "relative_reconstruction_bias": 0.670610225200653, "loss_original": 2.4489264488220215, "loss_reconstructed": 13.592330741882325, "loss_zero": 12.452933025360107, "frac_recovered": -0.1137192726135254, "frac_alive": 0.2194553017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d443898512cabf167f64076cb6ce357da7b374f711e79c43b4c6c5834406617
3
- size 339823704
 
 
 
 
resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "19528",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 40,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 55.811133575439456, "l1_loss": 451.9648834228516, "l0": 40.0, "frac_variance_explained": 0.820844042301178, "cossim": 0.9156029880046844, "l2_ratio": 0.9176408350467682, "relative_reconstruction_bias": 1.0015530705451965, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.618153429031372, "loss_zero": 12.452933025360107, "frac_recovered": 0.9831358790397644, "frac_alive": 0.2994249165058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5eed5e6b2f32121b1976aaa8492cce7320da5abca20185e3d43c17daea9a225b
3
- size 339823704
 
 
 
 
resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "29292",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 40,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 55.91380500793457, "l1_loss": 427.1486541748047, "l0": 39.983334350585935, "frac_variance_explained": 0.8604467451572418, "cossim": 0.9129667043685913, "l2_ratio": 0.9151833534240723, "relative_reconstruction_bias": 1.0012677431106567, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6114421129226684, "loss_zero": 12.452933025360107, "frac_recovered": 0.9838047802448273, "frac_alive": 0.29443359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:499307aaa2265a6de8ec06ed3ce6364ca443c6ec7597187a8d0f154e86a1ed0f
3
- size 339823504
 
 
 
 
resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "4882",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 40,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 59.28143844604492, "l1_loss": 393.00905456542966, "l0": 40.0, "frac_variance_explained": 0.7567337930202485, "cossim": 0.9030908882617951, "l2_ratio": 0.9034847617149353, "relative_reconstruction_bias": 0.9998072028160095, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.678276252746582, "loss_zero": 12.452933025360107, "frac_recovered": 0.9771327614784241, "frac_alive": 0.2982313334941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2c7b989a53a1687048ca1175380bb26a0a17e1a51bb994937c36929faa3aaf7
3
- size 339823504
 
 
 
 
resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "9764",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 40,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 57.661796188354494, "l1_loss": 467.5274963378906, "l0": 40.0, "frac_variance_explained": 0.8447021842002869, "cossim": 0.9108584702014924, "l2_ratio": 0.9144052922725677, "relative_reconstruction_bias": 1.003432297706604, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.624210572242737, "loss_zero": 12.452933025360107, "frac_recovered": 0.9825274705886841, "frac_alive": 0.2957899272441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863
3
- size 339823416
 
 
 
 
resid_post_layer_11_checkpoints/trainer_2_step_0/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "0",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 80,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 137.5836051940918, "l1_loss": 414.7282440185547, "l0": 80.0, "frac_variance_explained": 0.17110393047332764, "cossim": 0.4680758684873581, "l2_ratio": 0.34816921055316924, "relative_reconstruction_bias": 0.7378275513648986, "loss_original": 2.4489264488220215, "loss_reconstructed": 14.558675193786621, "loss_zero": 12.452933025360107, "frac_recovered": -0.2103082224726677, "frac_alive": 0.3181966245174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e25c4eab862d23187a5b98b7ed83d77d3e2b3188b59d300d535208c5d34aedf7
3
- size 339823704
 
 
 
 
resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "19528",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 80,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 52.232007598876955, "l1_loss": 543.6768280029297, "l0": 79.9, "frac_variance_explained": 0.8336090505123138, "cossim": 0.9266528010368347, "l2_ratio": 0.9269256889820099, "relative_reconstruction_bias": 1.0003283321857452, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5528225183486937, "loss_zero": 12.452933025360107, "frac_recovered": 0.9896542429924011, "frac_alive": 0.46240234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:aae8444f307b4f0275f1e88a9ee35b83003813811a3d8ba90ad8eb0fb861b798
3
- size 339823704
 
 
 
 
resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "29292",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 80,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 51.525339126586914, "l1_loss": 510.0916351318359, "l0": 80.0, "frac_variance_explained": 0.8004042208194733, "cossim": 0.928622841835022, "l2_ratio": 0.9312313497066498, "relative_reconstruction_bias": 1.0022627532482147, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5504817724227906, "loss_zero": 12.452933025360107, "frac_recovered": 0.9898871839046478, "frac_alive": 0.4539930522441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:278bc3f69f3e86c9998fd15f4c04da6c2d50f1ce93c1a3ddf7c401570e19915c
3
- size 339823504
 
 
 
 
resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "4882",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 80,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 56.42006950378418, "l1_loss": 607.1793884277344, "l0": 80.0, "frac_variance_explained": 0.8453537464141846, "cossim": 0.9180462002754212, "l2_ratio": 0.9192540943622589, "relative_reconstruction_bias": 1.0001774728298187, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5884765863418577, "loss_zero": 12.452933025360107, "frac_recovered": 0.9860971808433533, "frac_alive": 0.4753689169883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:09277bb07f753cc705b46903843df2ccff8845b6887d0ed9a3bf12dab4305d09
3
- size 339823504
 
 
 
 
resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "9764",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 80,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 52.48485107421875, "l1_loss": 528.1922485351563, "l0": 80.0, "frac_variance_explained": 0.808493971824646, "cossim": 0.9282886624336243, "l2_ratio": 0.9306568443775177, "relative_reconstruction_bias": 1.0026726007461548, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.56360719203949, "loss_zero": 12.452933025360107, "frac_recovered": 0.988573682308197, "frac_alive": 0.4756944477558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863
3
- size 339823416
 
 
 
 
resid_post_layer_11_checkpoints/trainer_3_step_0/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "0",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 160,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json DELETED
@@ -1 +0,0 @@
1
- {"l2_loss": 131.11762619018555, "l1_loss": 788.7969665527344, "l0": 160.0, "frac_variance_explained": 0.25940428376197816, "cossim": 0.5706947863101959, "l2_ratio": 0.49342564642429354, "relative_reconstruction_bias": 0.849498838186264, "loss_original": 2.4489264488220215, "loss_reconstructed": 10.614991569519043, "loss_zero": 12.452933025360107, "frac_recovered": 0.18391464054584503, "frac_alive": 0.4411349892616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
 
 
resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:01f748938c4f7810260a5082db07606c1dc8e6e8e10deb0d989f8e118c5773ab
3
- size 339823704
 
 
 
 
resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "trainer": {
3
- "trainer_class": "TrainerTopK",
4
- "dict_class": "AutoEncoderTopK",
5
- "lr": 0.0001885618083164127,
6
- "steps": "19528",
7
- "seed": 0,
8
- "activation_dim": 2304,
9
- "dict_size": 18432,
10
- "k": 160,
11
- "device": "cuda:0",
12
- "layer": 11,
13
- "lm_name": "google/gemma-2-2b",
14
- "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11",
15
- "submodule_name": "resid_post_layer_11"
16
- },
17
- "buffer": {
18
- "d_submodule": 2304,
19
- "io": "out",
20
- "n_ctxs": 2000,
21
- "ctx_len": 128,
22
- "refresh_batch_size": 32,
23
- "out_batch_size": 4096,
24
- "device": "cuda:0"
25
- }
26
- }