canrager commited on
Commit
d4252e9
1 Parent(s): 60d3d0c

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_0/ae.pt +3 -0
  2. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_0/config.json +26 -0
  3. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_1/ae.pt +3 -0
  4. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_1/config.json +26 -0
  5. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_2/ae.pt +3 -0
  6. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_2/config.json +26 -0
  7. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_3/ae.pt +3 -0
  8. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_3/config.json +26 -0
  9. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_4/ae.pt +3 -0
  10. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_4/config.json +26 -0
  11. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_5/ae.pt +3 -0
  12. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_5/config.json +26 -0
  13. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt +3 -0
  14. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json +26 -0
  15. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_154/ae.pt +3 -0
  16. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_154/config.json +26 -0
  17. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_1544/ae.pt +3 -0
  18. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_1544/config.json +26 -0
  19. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_19531/ae.pt +3 -0
  20. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_19531/config.json +26 -0
  21. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_29296/ae.pt +3 -0
  22. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_29296/config.json +26 -0
  23. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_48/ae.pt +3 -0
  24. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_48/config.json +26 -0
  25. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_488/ae.pt +3 -0
  26. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_488/config.json +26 -0
  27. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt +3 -0
  28. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json +26 -0
  29. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_9765/ae.pt +3 -0
  30. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_9765/config.json +26 -0
  31. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt +3 -0
  32. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json +26 -0
  33. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_154/ae.pt +3 -0
  34. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_154/config.json +26 -0
  35. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_1544/ae.pt +3 -0
  36. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_1544/config.json +26 -0
  37. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_19531/ae.pt +3 -0
  38. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_19531/config.json +26 -0
  39. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_29296/ae.pt +3 -0
  40. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_29296/config.json +26 -0
  41. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_48/ae.pt +3 -0
  42. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_48/config.json +26 -0
  43. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_488/ae.pt +3 -0
  44. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_488/config.json +26 -0
  45. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt +3 -0
  46. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json +26 -0
  47. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_9765/ae.pt +3 -0
  48. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_9765/config.json +26 -0
  49. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt +3 -0
  50. pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json +26 -0
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f931bbd2be0cf27730cf6315612899804a86d40faa2a8acb8d6f4a2534fdaddc
3
+ size 268511016
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77c04025750f094a1207b5c13d9c0513bfb6738d7b56ccf82823b5cece85ec97
3
+ size 268511016
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_1/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:777821e6b4ba99d790da876723dc8e934cb1723b8126a848baa763bb716050ec
3
+ size 268511016
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_2/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 80,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9165f71a2c18bc3a3c5db1008be308f7dff6bf8c89227924af9cb3065de69064
3
+ size 268511016
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_3/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 160,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd61e41012e58b8d9599f2cc17d064146bf530fae5c81420128b6c6c91dd05af
3
+ size 268511016
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_4/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 320,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e946949d76acfd70c4e05de0701fee087ab718e6503dcef0651efc0f732713d1
3
+ size 268511016
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_5/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": 48828,
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 640,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6adbecf89ab611c06dd7362aeed0c6223349b95ca84867cd6c4fa1aaee62e51
3
+ size 268511032
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "0",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_154/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b67e75ad65e356011f6408fa8110bc0781b2b342a7ded331c7dda307480c614
3
+ size 268511048
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_154/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "154",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_1544/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1983dc5a21aef1fd81c3bc5f11f083c8e6eaa9702bce34ef9493c647b5884972
3
+ size 268511120
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_1544/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "1544",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_19531/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a6755014321c45b857963ffdf611282d3c2725576f5c0e106ad001843fdda12
3
+ size 268511320
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_19531/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "19531",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_29296/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5845ca9123c5cd425cfb70f950e0d791bb99e986ed461b6d71fb5071f3d63dc3
3
+ size 268511320
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_29296/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "29296",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_48/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07fe5d2d6477315d213d5eb4c4759a5186541e3be5e087512a23bb4f9cc12617
3
+ size 268511040
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_48/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "48",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_488/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bec26256c8c3ba421bb3b42a864678b00b999101543869c1ba642835be4b156
3
+ size 268511048
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_488/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "488",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4b5711335410894032df7a45fdcc95e0c3288daae6dca00dfa501df123fcf82
3
+ size 268511120
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "4882",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_9765/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45541576a35a7e1167ea8acdb8d1ef865f24163bcffd0ee92c5ddb4a1527bd2f
3
+ size 268511120
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_9765/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "9765",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 20,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6adbecf89ab611c06dd7362aeed0c6223349b95ca84867cd6c4fa1aaee62e51
3
+ size 268511032
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "0",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_154/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4caef5ae1673ad808e2d7726be2ea3624c070025e17a6e3a70e988632b003a3
3
+ size 268511048
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_154/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "154",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_1544/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baa3efb085b9a91077c06f6d7b69397242041feec0ede677f11f424c1fb30686
3
+ size 268511120
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_1544/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "1544",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_19531/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:990c2f403a04c9e4f15972f9f1e3d93eca79f878fd70d0a9f4c7571113040fa9
3
+ size 268511320
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_19531/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "19531",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_29296/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcb7920bb0cb1115a3966b3a81ad046e9220eed41e9626792278914c4f8035a7
3
+ size 268511320
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_29296/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "29296",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_48/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e0c4fee3f2d78c9a5cb1c1016c3a54dc2dd72f88e3fc2ab09e63157679d8c07
3
+ size 268511040
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_48/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "48",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_488/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8dd04b0baf20a9a97f5c63901f6d96b329fb9ca9f22167af7c23296fa087b08
3
+ size 268511048
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_488/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "488",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9861440d2992b23f094bae25bb91a4d04513194387a2ad7ff333557396280ed2
3
+ size 268511120
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "4882",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_9765/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2270228e169e6210fe2544982668f8345b32d7d0dbe2c65127f650929905054c
3
+ size 268511120
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_9765/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "9765",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 40,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6adbecf89ab611c06dd7362aeed0c6223349b95ca84867cd6c4fa1aaee62e51
3
+ size 268511032
pythia1.4b/pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "TrainerTopK",
4
+ "dict_class": "AutoEncoderTopK",
5
+ "lr": 0.0002,
6
+ "steps": "0",
7
+ "seed": 0,
8
+ "activation_dim": 2048,
9
+ "dict_size": 16384,
10
+ "k": 80,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "EleutherAI/pythia-1.4b-deduped",
14
+ "wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2048,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }