Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_0/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_0/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_1/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_1/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_2/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_2/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_3/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_3/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_4/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_4/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_5/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_5/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_154/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_154/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_1544/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_1544/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_19531/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_19531/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_29296/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_29296/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_48/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_48/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_488/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_488/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_9765/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_9765/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_154/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_154/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_1544/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_1544/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_19531/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_19531/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_29296/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_29296/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_48/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_48/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_488/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_488/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_9765/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_9765/config.json +26 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt +3 -0
- pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json +26 -0
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f931bbd2be0cf27730cf6315612899804a86d40faa2a8acb8d6f4a2534fdaddc
|
3 |
+
size 268511016
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_1/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77c04025750f094a1207b5c13d9c0513bfb6738d7b56ccf82823b5cece85ec97
|
3 |
+
size 268511016
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_1/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_2/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:777821e6b4ba99d790da876723dc8e934cb1723b8126a848baa763bb716050ec
|
3 |
+
size 268511016
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_2/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 80,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_3/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9165f71a2c18bc3a3c5db1008be308f7dff6bf8c89227924af9cb3065de69064
|
3 |
+
size 268511016
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_3/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 160,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_4/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd61e41012e58b8d9599f2cc17d064146bf530fae5c81420128b6c6c91dd05af
|
3 |
+
size 268511016
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_4/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 320,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_5/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e946949d76acfd70c4e05de0701fee087ab718e6503dcef0651efc0f732713d1
|
3 |
+
size 268511016
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11/trainer_5/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": 48828,
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 640,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6adbecf89ab611c06dd7362aeed0c6223349b95ca84867cd6c4fa1aaee62e51
|
3 |
+
size 268511032
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "0",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_154/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b67e75ad65e356011f6408fa8110bc0781b2b342a7ded331c7dda307480c614
|
3 |
+
size 268511048
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_154/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "154",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_1544/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1983dc5a21aef1fd81c3bc5f11f083c8e6eaa9702bce34ef9493c647b5884972
|
3 |
+
size 268511120
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_1544/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "1544",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_19531/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a6755014321c45b857963ffdf611282d3c2725576f5c0e106ad001843fdda12
|
3 |
+
size 268511320
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_19531/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "19531",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_29296/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5845ca9123c5cd425cfb70f950e0d791bb99e986ed461b6d71fb5071f3d63dc3
|
3 |
+
size 268511320
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_29296/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "29296",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_48/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07fe5d2d6477315d213d5eb4c4759a5186541e3be5e087512a23bb4f9cc12617
|
3 |
+
size 268511040
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_48/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "48",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_488/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bec26256c8c3ba421bb3b42a864678b00b999101543869c1ba642835be4b156
|
3 |
+
size 268511048
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_488/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "488",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4b5711335410894032df7a45fdcc95e0c3288daae6dca00dfa501df123fcf82
|
3 |
+
size 268511120
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "4882",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_9765/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45541576a35a7e1167ea8acdb8d1ef865f24163bcffd0ee92c5ddb4a1527bd2f
|
3 |
+
size 268511120
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_0_step_9765/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "9765",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 20,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6adbecf89ab611c06dd7362aeed0c6223349b95ca84867cd6c4fa1aaee62e51
|
3 |
+
size 268511032
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "0",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_154/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4caef5ae1673ad808e2d7726be2ea3624c070025e17a6e3a70e988632b003a3
|
3 |
+
size 268511048
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_154/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "154",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_1544/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:baa3efb085b9a91077c06f6d7b69397242041feec0ede677f11f424c1fb30686
|
3 |
+
size 268511120
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_1544/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "1544",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_19531/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:990c2f403a04c9e4f15972f9f1e3d93eca79f878fd70d0a9f4c7571113040fa9
|
3 |
+
size 268511320
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_19531/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "19531",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_29296/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dcb7920bb0cb1115a3966b3a81ad046e9220eed41e9626792278914c4f8035a7
|
3 |
+
size 268511320
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_29296/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "29296",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_48/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e0c4fee3f2d78c9a5cb1c1016c3a54dc2dd72f88e3fc2ab09e63157679d8c07
|
3 |
+
size 268511040
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_48/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "48",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_488/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8dd04b0baf20a9a97f5c63901f6d96b329fb9ca9f22167af7c23296fa087b08
|
3 |
+
size 268511048
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_488/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "488",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9861440d2992b23f094bae25bb91a4d04513194387a2ad7ff333557396280ed2
|
3 |
+
size 268511120
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "4882",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_9765/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2270228e169e6210fe2544982668f8345b32d7d0dbe2c65127f650929905054c
|
3 |
+
size 268511120
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_1_step_9765/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "9765",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 40,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6adbecf89ab611c06dd7362aeed0c6223349b95ca84867cd6c4fa1aaee62e51
|
3 |
+
size 268511032
|
pythia1.4b_sweep_topk_ctx128_0913/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"trainer": {
|
3 |
+
"trainer_class": "TrainerTopK",
|
4 |
+
"dict_class": "AutoEncoderTopK",
|
5 |
+
"lr": 0.0002,
|
6 |
+
"steps": "0",
|
7 |
+
"seed": 0,
|
8 |
+
"activation_dim": 2048,
|
9 |
+
"dict_size": 16384,
|
10 |
+
"k": 80,
|
11 |
+
"device": "cuda:0",
|
12 |
+
"layer": 11,
|
13 |
+
"lm_name": "EleutherAI/pythia-1.4b-deduped",
|
14 |
+
"wandb_name": "TopKTrainer-EleutherAI/pythia-1.4b-deduped-resid_post_layer_11",
|
15 |
+
"submodule_name": "resid_post_layer_11"
|
16 |
+
},
|
17 |
+
"buffer": {
|
18 |
+
"d_submodule": 2048,
|
19 |
+
"io": "out",
|
20 |
+
"n_ctxs": 2000,
|
21 |
+
"ctx_len": 128,
|
22 |
+
"refresh_batch_size": 24,
|
23 |
+
"out_batch_size": 4096,
|
24 |
+
"device": "cuda:0"
|
25 |
+
}
|
26 |
+
}
|