canrager commited on
Commit
233e550
1 Parent(s): b17a129

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_0/ae.pt +3 -0
  2. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_0/config.json +26 -0
  3. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_0/eval_results.json +1 -0
  4. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_1/ae.pt +3 -0
  5. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_1/config.json +26 -0
  6. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_1/eval_results.json +1 -0
  7. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_2/ae.pt +3 -0
  8. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_2/config.json +26 -0
  9. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_2/eval_results.json +1 -0
  10. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_3/ae.pt +3 -0
  11. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_3/config.json +26 -0
  12. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_3/eval_results.json +1 -0
  13. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_4/ae.pt +3 -0
  14. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_4/config.json +26 -0
  15. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_4/eval_results.json +1 -0
  16. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_5/ae.pt +3 -0
  17. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_5/config.json +26 -0
  18. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_5/eval_results.json +1 -0
  19. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt +3 -0
  20. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json +27 -0
  21. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json +1 -0
  22. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt +3 -0
  23. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json +27 -0
  24. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json +1 -0
  25. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt +3 -0
  26. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json +27 -0
  27. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json +1 -0
  28. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt +3 -0
  29. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json +27 -0
  30. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json +1 -0
  31. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt +3 -0
  32. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json +27 -0
  33. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json +1 -0
  34. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt +3 -0
  35. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json +27 -0
  36. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json +1 -0
  37. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt +3 -0
  38. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json +27 -0
  39. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json +1 -0
  40. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt +3 -0
  41. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json +27 -0
  42. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json +1 -0
  43. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt +3 -0
  44. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json +27 -0
  45. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json +1 -0
  46. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt +3 -0
  47. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json +27 -0
  48. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json +1 -0
  49. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt +3 -0
  50. gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json +27 -0
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3371aefeca2f4995d599fa6a14600eb0abe9cfb19d9d0f3893587ebceec3dd16
3
+ size 84964136
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.025,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 51.09783172607422, "l1_loss": 696.4310302734375, "l0": 427.0833435058594, "frac_alive": 0.00021701389050576836, "frac_variance_explained": 0.9793744087219238, "cossim": 0.9331231117248535, "l2_ratio": 0.8912414312362671, "relative_reconstruction_bias": 0.9950931072235107, "loss_original": 2.432832717895508, "loss_reconstructed": 2.5339653491973877, "loss_zero": 12.452934265136719, "frac_recovered": 0.9899070262908936, "hyperparameters": {"n_inputs": 24, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da19506f642d4877189236d721933dbb1fdb6930aaf6fd38349e25f0008e170d
3
+ size 84964136
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_1/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.035,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_1/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 61.166587829589844, "l1_loss": 347.56976318359375, "l0": 153.0, "frac_alive": 0.00021701389050576836, "frac_variance_explained": 0.6767814755439758, "cossim": 0.8976640105247498, "l2_ratio": 0.8425207138061523, "relative_reconstruction_bias": 0.9373458623886108, "loss_original": 2.432832717895508, "loss_reconstructed": 2.690847396850586, "loss_zero": 12.452934265136719, "frac_recovered": 0.974250316619873, "hyperparameters": {"n_inputs": 24, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb1dc41c217bf34e752839990fbafe63d55d3aef64bfee0da2ca71acc194d8a7
3
+ size 84964136
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_2/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_2/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 61.01639938354492, "l1_loss": 368.2674560546875, "l0": 97.91667175292969, "frac_alive": 0.00021701389050576836, "frac_variance_explained": 0.9684154987335205, "cossim": 0.9024285078048706, "l2_ratio": 0.8613905310630798, "relative_reconstruction_bias": 0.9948885440826416, "loss_original": 2.432832717895508, "loss_reconstructed": 2.804324150085449, "loss_zero": 12.452934265136719, "frac_recovered": 0.962925374507904, "hyperparameters": {"n_inputs": 24, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79626d09db121cfdd709d316f0435a2bcd0f7b7d1dc86ca711e7212f43ffa713
3
+ size 84964136
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_3/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.05,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_3/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 78.22975158691406, "l1_loss": 218.60513305664062, "l0": 50.833335876464844, "frac_alive": 0.00021701389050576836, "frac_variance_explained": 0.5495132207870483, "cossim": 0.8355358839035034, "l2_ratio": 0.7730690836906433, "relative_reconstruction_bias": 0.9281772375106812, "loss_original": 2.432832717895508, "loss_reconstructed": 3.467956066131592, "loss_zero": 12.452934265136719, "frac_recovered": 0.8966952562332153, "hyperparameters": {"n_inputs": 24, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7231f173716248635598e5075872d81b5acdf54ccc41075ebc05229e88d54dd
3
+ size 84964136
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_4/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.06,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_4/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 74.44535827636719, "l1_loss": 236.12059020996094, "l0": 24.83333396911621, "frac_alive": 0.00021701389050576836, "frac_variance_explained": 0.9570982456207275, "cossim": 0.8187623023986816, "l2_ratio": 0.7615946531295776, "relative_reconstruction_bias": 0.9943456649780273, "loss_original": 2.432832717895508, "loss_reconstructed": 4.803515434265137, "loss_zero": 12.452934265136719, "frac_recovered": 0.763407289981842, "hyperparameters": {"n_inputs": 24, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95757030f87c5e68f02d9f900611d0908f13566c58bfd78ec15c30126b8ac221
3
+ size 84964136
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_5/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.07,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11"
16
+ },
17
+ "buffer": {
18
+ "d_submodule": 2304,
19
+ "io": "out",
20
+ "n_ctxs": 2000,
21
+ "ctx_len": 128,
22
+ "refresh_batch_size": 24,
23
+ "out_batch_size": 4096,
24
+ "device": "cuda:0"
25
+ }
26
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11/trainer_5/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 91.40856170654297, "l1_loss": 147.08984375, "l0": 17.45833396911621, "frac_alive": 0.00021701389050576836, "frac_variance_explained": 0.39455533027648926, "cossim": 0.783570408821106, "l2_ratio": 0.7340455055236816, "relative_reconstruction_bias": 0.9417886734008789, "loss_original": 2.432832717895508, "loss_reconstructed": 5.698957920074463, "loss_zero": 12.452934265136719, "frac_recovered": 0.6740427017211914, "hyperparameters": {"n_inputs": 24, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c4174f2b10df5afac120f58a012b3e15fcc2a9afa2d9937340977b82e7dfd84
3
+ size 84964152
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.025,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "0"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 167.1028854370117, "l1_loss": 3259.6132568359376, "l0": 2315.0375732421876, "frac_variance_explained": -0.26188645362854, "cossim": 0.004149633987981361, "l2_ratio": 0.5866222858428956, "relative_reconstruction_bias": -41.212248992919925, "loss_original": 2.4483999013900757, "loss_reconstructed": 17.234307861328126, "loss_zero": 12.452933025360107, "frac_recovered": -0.47787620425224303, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8d84e90cb2965413f89ce0cdbb00676fdde5cd7ad47f69b6debc967559ca6e2
3
+ size 84964440
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.025,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "19528"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 52.390929794311525, "l1_loss": 664.6068298339844, "l0": 430.4250122070313, "frac_variance_explained": 0.8581684529781342, "cossim": 0.929267168045044, "l2_ratio": 0.8806805431842804, "relative_reconstruction_bias": 0.9670958697795868, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.5743808269500734, "loss_zero": 12.452933025360107, "frac_recovered": 0.9874637722969055, "frac_alive": 0.9769965410232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7862f1f0c27d1a4a51759b17e418d17d81db073068eeee02d079ec3e837b76b
3
+ size 84964440
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.025,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "29292"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 52.17484703063965, "l1_loss": 653.6545166015625, "l0": 428.90000915527344, "frac_variance_explained": 0.842798912525177, "cossim": 0.9301473379135132, "l2_ratio": 0.8885334372520447, "relative_reconstruction_bias": 0.9680565476417542, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.5747411012649537, "loss_zero": 12.452933025360107, "frac_recovered": 0.9874280393123627, "frac_alive": 0.9793837070465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09eaff8c4400b3d580bd949c5de608d2d10cccebef131e1267edd05c07cd7f25
3
+ size 84964240
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.025,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "4882"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 60.687442016601565, "l1_loss": 625.6324584960937, "l0": 339.5833465576172, "frac_variance_explained": 0.8289074659347534, "cossim": 0.9064209163188934, "l2_ratio": 0.8561354637145996, "relative_reconstruction_bias": 0.9688756346702576, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.676463174819946, "loss_zero": 12.452933025360107, "frac_recovered": 0.9772705137729645, "frac_alive": 0.5711805820465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b7b3002992ad2719bdca27b3c3ac7d5e88a20f923f149339a945b8960830d13
3
+ size 84964240
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.025,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "9764"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 53.42113838195801, "l1_loss": 660.417529296875, "l0": 428.55418090820314, "frac_variance_explained": 0.8469472169876099, "cossim": 0.9262736797332763, "l2_ratio": 0.8801985681056976, "relative_reconstruction_bias": 0.966783630847931, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.58748140335083, "loss_zero": 12.452933025360107, "frac_recovered": 0.9861565232276917, "frac_alive": 0.9040798544883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c4174f2b10df5afac120f58a012b3e15fcc2a9afa2d9937340977b82e7dfd84
3
+ size 84964152
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.035,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "0"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 175.92864990234375, "l1_loss": 3441.5954345703126, "l0": 2317.941748046875, "frac_variance_explained": -0.26664564609527586, "cossim": 0.003556889179162681, "l2_ratio": 0.5888823807239533, "relative_reconstruction_bias": -136.97222518920898, "loss_original": 2.4483999013900757, "loss_reconstructed": 17.234307861328126, "loss_zero": 12.452933025360107, "frac_recovered": -0.47787620425224303, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd031da2442ba9006771f4bf9e4d06c17748877213c6da0d774f3f4a3eb4375f
3
+ size 84964440
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.035,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "19528"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 62.69771194458008, "l1_loss": 424.6473724365234, "l0": 157.85000457763672, "frac_variance_explained": 0.8613891124725341, "cossim": 0.9007849872112275, "l2_ratio": 0.8550859570503235, "relative_reconstruction_bias": 0.9797493934631347, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.720248818397522, "loss_zero": 12.452933025360107, "frac_recovered": 0.9728983402252197, "frac_alive": 0.7018229365348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28cf681f5de9c3d12a06baf7ee1a17e356c61e2c9f0cb3153ecd89220e41054f
3
+ size 84964440
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.035,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "29292"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 62.6892448425293, "l1_loss": 391.03612365722654, "l0": 158.2791717529297, "frac_variance_explained": 0.7394937157630921, "cossim": 0.901382839679718, "l2_ratio": 0.8549536645412446, "relative_reconstruction_bias": 0.9570579826831818, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.71214439868927, "loss_zero": 12.452933025360107, "frac_recovered": 0.9737131774425507, "frac_alive": 0.7230902910232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cae7229a90784a1f2b8cd1d1f7939f633997952c77b272675e2815a2bae58c59
3
+ size 84964240
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.035,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "4882"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 73.07725524902344, "l1_loss": 347.9136077880859, "l0": 103.04583511352538, "frac_variance_explained": 0.7512376010417938, "cossim": 0.8572899758815765, "l2_ratio": 0.8055591583251953, "relative_reconstruction_bias": 0.966018682718277, "loss_original": 2.4483999013900757, "loss_reconstructed": 3.2463842153549196, "loss_zero": 12.452933025360107, "frac_recovered": 0.9202560782432556, "frac_alive": 0.2736545205116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57c6496b429688a9b48745b0e45c555118bde43217a8f06c9d76ccb37762b0ac
3
+ size 84964240
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.035,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "9764"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"l2_loss": 66.39950981140137, "l1_loss": 364.6046905517578, "l0": 141.72083892822266, "frac_variance_explained": 0.6755688190460205, "cossim": 0.8846192836761475, "l2_ratio": 0.8318478167057037, "relative_reconstruction_bias": 0.9467559456825256, "loss_original": 2.4483999013900757, "loss_reconstructed": 2.855544424057007, "loss_zero": 12.452933025360107, "frac_recovered": 0.9593764483928681, "frac_alive": 0.5112847089767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}}
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c4174f2b10df5afac120f58a012b3e15fcc2a9afa2d9937340977b82e7dfd84
3
+ size 84964152
gemma-2-2b_sweep_standard_ctx128_ef2_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "AutoEncoder",
4
+ "trainer_class": "StandardTrainer",
5
+ "activation_dim": 2304,
6
+ "dict_size": 4608,
7
+ "lr": 0.0003,
8
+ "l1_penalty": 0.04,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda:0",
12
+ "layer": 11,
13
+ "lm_name": "google/gemma-2-2b",
14
+ "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11",
15
+ "submodule_name": "resid_post_layer_11",
16
+ "steps": "0"
17
+ },
18
+ "buffer": {
19
+ "d_submodule": 2304,
20
+ "io": "out",
21
+ "n_ctxs": 2000,
22
+ "ctx_len": 128,
23
+ "refresh_batch_size": 24,
24
+ "out_batch_size": 4096,
25
+ "device": "cuda:0"
26
+ }
27
+ }