diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt b/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json b/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json deleted file mode 100644 index b8ef35097e2904943d56130b6e84abfa1336d5ce..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json b/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json deleted file mode 100644 index a55c3aa7c1e16047462c033381afa9c936e7c40a..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 144.1739944458008, "l1_loss": 113.89709167480468, "l0": 20.0, "frac_variance_explained": 0.06153666377067566, "cossim": 0.285680028796196, "l2_ratio": 0.1798807665705681, "relative_reconstruction_bias": 0.6311139702796936, "loss_original": 2.4489264488220215, "loss_reconstructed": 15.055834197998047, "loss_zero": 12.452933025360107, "frac_recovered": -0.26015533953905107, "frac_alive": 0.1353081613779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt b/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt deleted file mode 100644 index 4de40c3619ab7ecefdb05b0f83bd79c1102ca1fd..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9f56e92af04268566c7ff342c4e7210babc785a03db17bd8eaa38e3442012ce9 -size 339823704 diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json b/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json deleted file mode 100644 index ea28cd4c1a2c13a61ca56fd3ed1b196138f24422..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json b/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json deleted file mode 100644 index 2b1920c5edba20d00ae73be9686438b59aadfbc6..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 63.222343826293944, "l1_loss": 284.57915954589845, "l0": 19.99583339691162, "frac_variance_explained": 0.7308643460273743, "cossim": 0.8872040271759033, "l2_ratio": 0.8889125108718872, "relative_reconstruction_bias": 1.0020551800727844, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.7907193899154663, "loss_zero": 12.452933025360107, "frac_recovered": 0.9659082174301148, "frac_alive": 0.1557074636220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt b/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt deleted file mode 100644 index 1e89e3b691fcc4f7475aae85c91c3188429e9bc6..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a5a1d888a0537e9383cc3a9d7da73404627105eedb05a2901ea9b5d8c79679bf -size 339823704 diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json b/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json deleted file mode 100644 index 26bf7267158e6a40088c5805ef4749142ca90bf8..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json b/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json deleted file mode 100644 index 4a489d1b2612f3f23946bbff250124826b659da0..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 63.71059875488281, "l1_loss": 272.7416564941406, "l0": 20.0, "frac_variance_explained": 0.7049950003623963, "cossim": 0.8891237080097198, "l2_ratio": 0.8906138241291046, "relative_reconstruction_bias": 1.0008544504642487, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.785463571548462, "loss_zero": 12.452933025360107, "frac_recovered": 0.9664257526397705, "frac_alive": 0.1563585102558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt b/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt deleted file mode 100644 index e881e5d43f273a42084072b6da9371aa6abdede6..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bb73459d9ba27070497a75047dc056f86559bd5d41a888abb6cbebb52718ffed -size 339823504 diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json b/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json deleted file mode 100644 index f30ebe0eb2b3ae9fe54a41b65af55cc82dddbac0..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json b/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json deleted file mode 100644 index 02f3af74fc0abaddf04f0f4aeb0380b49971e0ff..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 67.39788055419922, "l1_loss": 348.3453887939453, "l0": 20.0, "frac_variance_explained": 0.7809036612510681, "cossim": 0.8779726147651672, "l2_ratio": 0.8785419166088104, "relative_reconstruction_bias": 1.000831699371338, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.875354194641113, "loss_zero": 12.452933025360107, "frac_recovered": 0.9574480593204499, "frac_alive": 0.1525065153837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt b/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt deleted file mode 100644 index 7113befee71a6ece15f6b8de9bc557c94eebf368..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3ce91e1320f81e78c9f7f2fd837f151c9fd488709b4f9aada2c4c18e38e7d585 -size 339823504 diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json b/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json deleted file mode 100644 index ff243bd8a979ba3684100625fafb897f5743386c..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json b/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json deleted file mode 100644 index a6a0f5137ca1400cfbcde546a430bb62b52ce725..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_0_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 66.6925880432129, "l1_loss": 304.12717895507814, "l0": 20.0, "frac_variance_explained": 0.7223313331604004, "cossim": 0.8877674520015717, "l2_ratio": 0.8910203695297241, "relative_reconstruction_bias": 1.0043164610862731, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8143189668655397, "loss_zero": 12.452933025360107, "frac_recovered": 0.9635527789592743, "frac_alive": 0.1507703959941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt b/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json b/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json deleted file mode 100644 index ed1a043b76a442a087c5b6dddb0d55d13f21a943..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json b/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json deleted file mode 100644 index 845a28fd23b778e770b6946269baf468cee5d600..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 146.67851715087892, "l1_loss": 225.0533248901367, "l0": 40.0, "frac_variance_explained": 0.10613001585006714, "cossim": 0.37147045135498047, "l2_ratio": 0.2499557614326477, "relative_reconstruction_bias": 0.670610225200653, "loss_original": 2.4489264488220215, "loss_reconstructed": 13.592330741882325, "loss_zero": 12.452933025360107, "frac_recovered": -0.1137192726135254, "frac_alive": 0.2194553017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt b/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt deleted file mode 100644 index faf25e636d3dc8094758000e8e77d7a3caf2626f..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9d443898512cabf167f64076cb6ce357da7b374f711e79c43b4c6c5834406617 -size 339823704 diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json b/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json deleted file mode 100644 index 8642cf3d69189561e812446c1759d1c0cbad1e9e..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json b/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json deleted file mode 100644 index 2111c937725c76aca0166ff2e4d575c8f0841e00..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 55.811133575439456, "l1_loss": 451.9648834228516, "l0": 40.0, "frac_variance_explained": 0.820844042301178, "cossim": 0.9156029880046844, "l2_ratio": 0.9176408350467682, "relative_reconstruction_bias": 1.0015530705451965, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.618153429031372, "loss_zero": 12.452933025360107, "frac_recovered": 0.9831358790397644, "frac_alive": 0.2994249165058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt b/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt deleted file mode 100644 index f9362d639c05a6e72e8d4d27236b4e7071667f30..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5eed5e6b2f32121b1976aaa8492cce7320da5abca20185e3d43c17daea9a225b -size 339823704 diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json b/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json deleted file mode 100644 index 6280cfa6e6e9442c986bf8b7e645c640572c428f..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json b/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json deleted file mode 100644 index 593b6118ddd611291d6f875db6365615c8d6415f..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 55.91380500793457, "l1_loss": 427.1486541748047, "l0": 39.983334350585935, "frac_variance_explained": 0.8604467451572418, "cossim": 0.9129667043685913, "l2_ratio": 0.9151833534240723, "relative_reconstruction_bias": 1.0012677431106567, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6114421129226684, "loss_zero": 12.452933025360107, "frac_recovered": 0.9838047802448273, "frac_alive": 0.29443359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt b/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt deleted file mode 100644 index 95a9b4510d80f4a0263b85cc80655278bede0619..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:499307aaa2265a6de8ec06ed3ce6364ca443c6ec7597187a8d0f154e86a1ed0f -size 339823504 diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json b/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json deleted file mode 100644 index ada689038d856b84b170c83257ea0c7eded38d7f..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json b/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json deleted file mode 100644 index 58b6ec10c1450df894fbcfe30c7fe12d781a59e5..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 59.28143844604492, "l1_loss": 393.00905456542966, "l0": 40.0, "frac_variance_explained": 0.7567337930202485, "cossim": 0.9030908882617951, "l2_ratio": 0.9034847617149353, "relative_reconstruction_bias": 0.9998072028160095, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.678276252746582, "loss_zero": 12.452933025360107, "frac_recovered": 0.9771327614784241, "frac_alive": 0.2982313334941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt b/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt deleted file mode 100644 index 81c3cf79023798a7353108292e59149c64eae96c..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a2c7b989a53a1687048ca1175380bb26a0a17e1a51bb994937c36929faa3aaf7 -size 339823504 diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json b/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json deleted file mode 100644 index 4122b2accfe338bb060e82db4c0bc476068789cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json b/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json deleted file mode 100644 index 137e1224ede6081a735d87a8893542898f504198..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_1_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 57.661796188354494, "l1_loss": 467.5274963378906, "l0": 40.0, "frac_variance_explained": 0.8447021842002869, "cossim": 0.9108584702014924, "l2_ratio": 0.9144052922725677, "relative_reconstruction_bias": 1.003432297706604, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.624210572242737, "loss_zero": 12.452933025360107, "frac_recovered": 0.9825274705886841, "frac_alive": 0.2957899272441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt b/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json b/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json deleted file mode 100644 index 97f0c98b5fa3d457b648a69048a8f4285367a7d8..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json b/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json deleted file mode 100644 index 6044ba15b06ebd0a70428b0df7ab7ec3c6a66fd0..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 137.5836051940918, "l1_loss": 414.7282440185547, "l0": 80.0, "frac_variance_explained": 0.17110393047332764, "cossim": 0.4680758684873581, "l2_ratio": 0.34816921055316924, "relative_reconstruction_bias": 0.7378275513648986, "loss_original": 2.4489264488220215, "loss_reconstructed": 14.558675193786621, "loss_zero": 12.452933025360107, "frac_recovered": -0.2103082224726677, "frac_alive": 0.3181966245174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt b/resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt deleted file mode 100644 index ea15722d0fb7ffcb09973e01eb0bf9b3ae5c91b8..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e25c4eab862d23187a5b98b7ed83d77d3e2b3188b59d300d535208c5d34aedf7 -size 339823704 diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json b/resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json deleted file mode 100644 index 03e9c4db094e330f009b4d9b7c90292786b1558c..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json b/resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json deleted file mode 100644 index 340cdc6fa7e9c8c7bc4981991938944a95096959..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 52.232007598876955, "l1_loss": 543.6768280029297, "l0": 79.9, "frac_variance_explained": 0.8336090505123138, "cossim": 0.9266528010368347, "l2_ratio": 0.9269256889820099, "relative_reconstruction_bias": 1.0003283321857452, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5528225183486937, "loss_zero": 12.452933025360107, "frac_recovered": 0.9896542429924011, "frac_alive": 0.46240234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt b/resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt deleted file mode 100644 index 8e8c7de8dc1b52ba15927959784430a10eb16149..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aae8444f307b4f0275f1e88a9ee35b83003813811a3d8ba90ad8eb0fb861b798 -size 339823704 diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json b/resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json deleted file mode 100644 index 727ef9a180a142db778fa0df8a048dc208603739..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json b/resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json deleted file mode 100644 index 8997f42f7d279e1851ad8e31e5e20bb91d0ffd18..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 51.525339126586914, "l1_loss": 510.0916351318359, "l0": 80.0, "frac_variance_explained": 0.8004042208194733, "cossim": 0.928622841835022, "l2_ratio": 0.9312313497066498, "relative_reconstruction_bias": 1.0022627532482147, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5504817724227906, "loss_zero": 12.452933025360107, "frac_recovered": 0.9898871839046478, "frac_alive": 0.4539930522441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt b/resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt deleted file mode 100644 index 34a53430b7f71bf8b025dcde2ca11a648feca56b..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:278bc3f69f3e86c9998fd15f4c04da6c2d50f1ce93c1a3ddf7c401570e19915c -size 339823504 diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json b/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json deleted file mode 100644 index f1d636a04a744e7967f70d7e43b15dab8fc6b2c3..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json b/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json deleted file mode 100644 index b5e8097bb67f163666b8d3356e4eff7c040019cb..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 56.42006950378418, "l1_loss": 607.1793884277344, "l0": 80.0, "frac_variance_explained": 0.8453537464141846, "cossim": 0.9180462002754212, "l2_ratio": 0.9192540943622589, "relative_reconstruction_bias": 1.0001774728298187, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5884765863418577, "loss_zero": 12.452933025360107, "frac_recovered": 0.9860971808433533, "frac_alive": 0.4753689169883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt b/resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt deleted file mode 100644 index 55d3f04f5c2a65ec372fee8af51510baf623c7f4..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:09277bb07f753cc705b46903843df2ccff8845b6887d0ed9a3bf12dab4305d09 -size 339823504 diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json b/resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json deleted file mode 100644 index 93d2c5c00cda17d38613ebdd9fe7b031b787b1f7..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json b/resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json deleted file mode 100644 index a21b936880af76966c24219a49fbb13ec3c4a299..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_2_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 52.48485107421875, "l1_loss": 528.1922485351563, "l0": 80.0, "frac_variance_explained": 0.808493971824646, "cossim": 0.9282886624336243, "l2_ratio": 0.9306568443775177, "relative_reconstruction_bias": 1.0026726007461548, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.56360719203949, "loss_zero": 12.452933025360107, "frac_recovered": 0.988573682308197, "frac_alive": 0.4756944477558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt b/resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json b/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json deleted file mode 100644 index 7d497b813772b73bbb6f799b08670bdbba4da520..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json b/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json deleted file mode 100644 index c42aafaf8b8f38c1d3f55bba570870cd4c4bf6b6..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 131.11762619018555, "l1_loss": 788.7969665527344, "l0": 160.0, "frac_variance_explained": 0.25940428376197816, "cossim": 0.5706947863101959, "l2_ratio": 0.49342564642429354, "relative_reconstruction_bias": 0.849498838186264, "loss_original": 2.4489264488220215, "loss_reconstructed": 10.614991569519043, "loss_zero": 12.452933025360107, "frac_recovered": 0.18391464054584503, "frac_alive": 0.4411349892616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt b/resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt deleted file mode 100644 index 908a6166e7677f973430ef8a71a129f63520a42a..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:01f748938c4f7810260a5082db07606c1dc8e6e8e10deb0d989f8e118c5773ab -size 339823704 diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json b/resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json deleted file mode 100644 index f37c3c35c36f347a15d2315489fbacccb764430d..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_19528/eval_results.json b/resid_post_layer_11_checkpoints/trainer_3_step_19528/eval_results.json deleted file mode 100644 index 74eef0058803f27c4fdd688d2406a5b05be7b06f..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 48.276147079467776, "l1_loss": 764.6186645507812, "l0": 160.0, "frac_variance_explained": 0.8476401448249817, "cossim": 0.9392698645591736, "l2_ratio": 0.9418351173400878, "relative_reconstruction_bias": 1.0018192648887634, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.516468143463135, "loss_zero": 12.452933025360107, "frac_recovered": 0.9932795226573944, "frac_alive": 0.6029188632965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_29292/ae.pt b/resid_post_layer_11_checkpoints/trainer_3_step_29292/ae.pt deleted file mode 100644 index 218fea8333205e946329d874934f8d71358f0846..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e372a1b16244d3752576eeb5db4d0b61c39b9e7547ac19276955a840e5858d94 -size 339823704 diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_29292/config.json b/resid_post_layer_11_checkpoints/trainer_3_step_29292/config.json deleted file mode 100644 index 7e2cbcfcae3119b699e8ed2d6337d89b24fb7c24..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_29292/eval_results.json b/resid_post_layer_11_checkpoints/trainer_3_step_29292/eval_results.json deleted file mode 100644 index 25e4b989bc3a47bea182b50a6786c5ca70030f87..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 47.30290908813477, "l1_loss": 785.4107971191406, "l0": 160.0, "frac_variance_explained": 0.8710289716720581, "cossim": 0.9411798179149627, "l2_ratio": 0.9417654693126678, "relative_reconstruction_bias": 1.0005487978458405, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.512762355804443, "loss_zero": 12.452933025360107, "frac_recovered": 0.9936471462249756, "frac_alive": 0.6195204257965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_4882/ae.pt b/resid_post_layer_11_checkpoints/trainer_3_step_4882/ae.pt deleted file mode 100644 index dd55a675467d7ee03167b92c129acfcc6e71d28d..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:486007eeb6accfbd124d2c0399b9becddbd8f22cbd6f48f68ae7e546cb5247de -size 339823504 diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json b/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json deleted file mode 100644 index 4eaadecf9e60347014cfaa6e72aec5651fb0b141..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json b/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json deleted file mode 100644 index 92f3a3abaf6152f9e94f223a2fae2f798167d333..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 49.686083221435545, "l1_loss": 879.2001892089844, "l0": 160.0, "frac_variance_explained": 0.8819718539714814, "cossim": 0.9358995020389557, "l2_ratio": 0.9381371676921845, "relative_reconstruction_bias": 1.0012032091617584, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5353710651397705, "loss_zero": 12.452933025360107, "frac_recovered": 0.9913994312286377, "frac_alive": 0.6832682490348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_9764/ae.pt b/resid_post_layer_11_checkpoints/trainer_3_step_9764/ae.pt deleted file mode 100644 index 620c6698311221cb28ad8036a19d586ab80e014b..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0c887871346532bd99ef6817684159e524641a2311d899d4951ad3d0c88a73da -size 339823504 diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_9764/config.json b/resid_post_layer_11_checkpoints/trainer_3_step_9764/config.json deleted file mode 100644 index 6fb4c55872ddcb14fe37513ab527bf0eb9e38ca7..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_3_step_9764/eval_results.json b/resid_post_layer_11_checkpoints/trainer_3_step_9764/eval_results.json deleted file mode 100644 index 605c371162d6b6d122444bed2caa62756c1c2825..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_3_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 48.30603218078613, "l1_loss": 777.0953186035156, "l0": 159.9625, "frac_variance_explained": 0.8656655848026276, "cossim": 0.9392187178134919, "l2_ratio": 0.9420062899589539, "relative_reconstruction_bias": 1.002146726846695, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.519579267501831, "loss_zero": 12.452933025360107, "frac_recovered": 0.9929662108421325, "frac_alive": 0.6369357705116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_0/ae.pt b/resid_post_layer_11_checkpoints/trainer_4_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json b/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json deleted file mode 100644 index 06c4936b9ad190edb4fd7b080175b954279bf994..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json b/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json deleted file mode 100644 index 2e096f5d2e103c73bcd179dbc9e6f8f92d95fff5..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 131.5750930786133, "l1_loss": 1586.4803344726563, "l0": 320.0, "frac_variance_explained": 0.35132617354393003, "cossim": 0.6678441643714905, "l2_ratio": 0.7069510042667388, "relative_reconstruction_bias": 1.0263331294059754, "loss_original": 2.4489264488220215, "loss_reconstructed": 6.194353532791138, "loss_zero": 12.452933025360107, "frac_recovered": 0.6258051276206971, "frac_alive": 0.5712890625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_19528/ae.pt b/resid_post_layer_11_checkpoints/trainer_4_step_19528/ae.pt deleted file mode 100644 index c706b5ac0fe97ac6932a9312cf0003b18b057f1e..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:30d61a836e43cd142f00fcacf2270e305f4f039adabe054ddd2ae8f59b1959ca -size 339823704 diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_19528/config.json b/resid_post_layer_11_checkpoints/trainer_4_step_19528/config.json deleted file mode 100644 index 299ba797af00fd375ed487dc65ac176200429c35..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_19528/eval_results.json b/resid_post_layer_11_checkpoints/trainer_4_step_19528/eval_results.json deleted file mode 100644 index db9ec2d8b9b8b1ee4225832db2bf3387fa042205..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 41.05187835693359, "l1_loss": 1132.3312622070312, "l0": 320.0, "frac_variance_explained": 0.8801694273948669, "cossim": 0.9543100357055664, "l2_ratio": 0.9556897640228271, "relative_reconstruction_bias": 1.00105140209198, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.491763734817505, "loss_zero": 12.452933025360107, "frac_recovered": 0.9957401871681213, "frac_alive": 0.7314995527267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_29292/ae.pt b/resid_post_layer_11_checkpoints/trainer_4_step_29292/ae.pt deleted file mode 100644 index 950376d684126d0a6f024127ebd3c76c4b434c12..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0fe9593ac8a93f7f52d0f64fefd0190e5411f41cad3acf803d2cddadcf5fa7b5 -size 339823704 diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_29292/config.json b/resid_post_layer_11_checkpoints/trainer_4_step_29292/config.json deleted file mode 100644 index 68c22a392783439db9250c3939b443410355e6b2..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_29292/eval_results.json b/resid_post_layer_11_checkpoints/trainer_4_step_29292/eval_results.json deleted file mode 100644 index 5d3839200cb4565ff118af29b1af0a275879db88..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 40.974717712402345, "l1_loss": 1218.4020202636718, "l0": 320.0, "frac_variance_explained": 0.9071006178855896, "cossim": 0.9544732809066773, "l2_ratio": 0.955464094877243, "relative_reconstruction_bias": 1.0005109429359436, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4892306327819824, "loss_zero": 12.452933025360107, "frac_recovered": 0.995994257926941, "frac_alive": 0.6979166865348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_4882/ae.pt b/resid_post_layer_11_checkpoints/trainer_4_step_4882/ae.pt deleted file mode 100644 index 262b5afb4be5ef593a603a59f30fdae899d939c4..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:03feb1a6bf329d1cf113f4d4461aa499a8082b9641be50690257273af9cace93 -size 339823504 diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json b/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json deleted file mode 100644 index eff325a20fd7c50fa0ecc5a215fc6c9eda320a0e..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json b/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json deleted file mode 100644 index f0be5efdfbebdd37674a36c3506a008a8b70c185..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 46.41660919189453, "l1_loss": 982.6541625976563, "l0": 320.0, "frac_variance_explained": 0.844070029258728, "cossim": 0.9450825750827789, "l2_ratio": 0.9474761605262756, "relative_reconstruction_bias": 1.0017620146274566, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5012677669525147, "loss_zero": 12.452933025360107, "frac_recovered": 0.9947916507720947, "frac_alive": 0.86083984375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_9764/ae.pt b/resid_post_layer_11_checkpoints/trainer_4_step_9764/ae.pt deleted file mode 100644 index 3932b3bba2f04972a9ce9ef90f14113df1c2bab3..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ae15e3134cd87df7aaa2e029a4d5d09a1f8e0200e246194939014c2e6f44c5c9 -size 339823504 diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_9764/config.json b/resid_post_layer_11_checkpoints/trainer_4_step_9764/config.json deleted file mode 100644 index 9ad4150060d5633f2bf26cffab03edeaf306bd7e..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_4_step_9764/eval_results.json b/resid_post_layer_11_checkpoints/trainer_4_step_9764/eval_results.json deleted file mode 100644 index 24090c7e1416f6ffdafe2766b7ff2f13ac10eebd..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_4_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 44.553976440429686, "l1_loss": 1079.3361083984375, "l0": 319.9458343505859, "frac_variance_explained": 0.8690729081630707, "cossim": 0.9506974220275879, "l2_ratio": 0.950929456949234, "relative_reconstruction_bias": 0.9997771561145783, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.494476556777954, "loss_zero": 12.452933025360107, "frac_recovered": 0.9954676687717438, "frac_alive": 0.7751736044883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_0/ae.pt b/resid_post_layer_11_checkpoints/trainer_5_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json b/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json deleted file mode 100644 index 312779ffae9874b112068781310c54791ea19c19..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json b/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json deleted file mode 100644 index b55973063dc53a17224137ada4d4710cd5bbc421..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 111.73392791748047, "l1_loss": 2608.7587646484376, "l0": 640.0, "frac_variance_explained": 0.35072124004364014, "cossim": 0.7543251395225525, "l2_ratio": 1.026421320438385, "relative_reconstruction_bias": 1.3424260139465332, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.9057929515838623, "loss_zero": 12.452933025360107, "frac_recovered": 0.8545086026191712, "frac_alive": 0.7488606572151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_19528/ae.pt b/resid_post_layer_11_checkpoints/trainer_5_step_19528/ae.pt deleted file mode 100644 index d95b6361cef9d5c5362d205e3830d6f701602c89..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:536f3f3dfcfeb2b40993f22bbc18007f1d9d126b1e353725a5d7ee8e8ad17022 -size 339823704 diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_19528/config.json b/resid_post_layer_11_checkpoints/trainer_5_step_19528/config.json deleted file mode 100644 index aba8119567e6983262708a33cf4e735dd69784c2..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_19528/eval_results.json b/resid_post_layer_11_checkpoints/trainer_5_step_19528/eval_results.json deleted file mode 100644 index 40518c07ec7c45470aab0ffec46dbb11c9ae8b02..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 34.61446113586426, "l1_loss": 2412.594775390625, "l0": 640.0, "frac_variance_explained": 0.9390624403953552, "cossim": 0.9699587464332581, "l2_ratio": 0.9699285745620727, "relative_reconstruction_bias": 0.9990258693695069, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.470791292190552, "loss_zero": 12.452933025360107, "frac_recovered": 0.9978296816349029, "frac_alive": 0.6563042402267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_29292/ae.pt b/resid_post_layer_11_checkpoints/trainer_5_step_29292/ae.pt deleted file mode 100644 index ba6957cbb7ad594d9372053ee018d1088dcfb584..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5b4ee1c73c134015ac7d9e7cc8222d61987a6e91299257a5a88121a6911a879d -size 339823704 diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_29292/config.json b/resid_post_layer_11_checkpoints/trainer_5_step_29292/config.json deleted file mode 100644 index 5f7ed73e9144d04f10badf528953ffd783872634..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_29292/eval_results.json b/resid_post_layer_11_checkpoints/trainer_5_step_29292/eval_results.json deleted file mode 100644 index c0f507836ca493425d8e1d866f71bc05af63fc99..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 34.37075271606445, "l1_loss": 2463.444091796875, "l0": 639.3208374023437, "frac_variance_explained": 0.9328433871269226, "cossim": 0.9713874399662018, "l2_ratio": 0.9715998351573945, "relative_reconstruction_bias": 0.9999610126018524, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4686097383499144, "loss_zero": 12.452933025360107, "frac_recovered": 0.9980440974235535, "frac_alive": 0.630967915058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_4882/ae.pt b/resid_post_layer_11_checkpoints/trainer_5_step_4882/ae.pt deleted file mode 100644 index 8e42d05cdd654a66a2914845a54dc163d434092e..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:64c479ce8aada5fe7623ddba03b954dded6f10be84ecfc283c737ea259629ec1 -size 339823504 diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json b/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json deleted file mode 100644 index e177e2775341977e290f95a91df37f1b209df09f..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json b/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json deleted file mode 100644 index ea1423c55399431a6fda5a7e4ed793ae2506842e..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 38.02740745544433, "l1_loss": 2213.3583374023438, "l0": 639.7375, "frac_variance_explained": 0.9239685833454132, "cossim": 0.9633342325687408, "l2_ratio": 0.9649060130119324, "relative_reconstruction_bias": 1.0006114959716796, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4730945587158204, "loss_zero": 12.452933025360107, "frac_recovered": 0.9975976288318634, "frac_alive": 0.8318684697151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_9764/ae.pt b/resid_post_layer_11_checkpoints/trainer_5_step_9764/ae.pt deleted file mode 100644 index 788127044cf2dd8414510ede54be2a471238362a..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:420b9708c7ed85a9b3c4fabe8e6c858414cd9e0a17446fe0870356d8ba9d2aac -size 339823504 diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_9764/config.json b/resid_post_layer_11_checkpoints/trainer_5_step_9764/config.json deleted file mode 100644 index 99090ca090b21563ac484c9eb86693a36378ba46..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 11, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_11", - "submodule_name": "resid_post_layer_11" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_11_checkpoints/trainer_5_step_9764/eval_results.json b/resid_post_layer_11_checkpoints/trainer_5_step_9764/eval_results.json deleted file mode 100644 index df2e6b9092afe24a2b62479083fa5743605d438a..0000000000000000000000000000000000000000 --- a/resid_post_layer_11_checkpoints/trainer_5_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 36.282081604003906, "l1_loss": 2348.363134765625, "l0": 640.0, "frac_variance_explained": 0.928521353006363, "cossim": 0.967055720090866, "l2_ratio": 0.9679634690284729, "relative_reconstruction_bias": 1.001582407951355, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.472600722312927, "loss_zero": 12.452933025360107, "frac_recovered": 0.99765043258667, "frac_alive": 0.7485893964767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15/trainer_0/ae.pt b/resid_post_layer_15/trainer_0/ae.pt deleted file mode 100644 index 639d998f2a959d008e2a59df972b53ce3f7c108c..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:50b9028ee870dd0cdf23c30ea625fe6b6a0cc03370e85500e69350fe39891cca -size 339823400 diff --git a/resid_post_layer_15/trainer_0/config.json b/resid_post_layer_15/trainer_0/config.json deleted file mode 100644 index 8d40c30e6f32643ed74d4b894e8587795e460e81..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15/trainer_0/eval_results.json b/resid_post_layer_15/trainer_0/eval_results.json deleted file mode 100644 index 432125c8ab74d9e69067ea765f4cc09e4e41096c..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 82.05952911376953, "l1_loss": 387.8919372558594, "l0": 20.0, "frac_variance_explained": 0.7454514384269715, "cossim": 0.9057712197303772, "l2_ratio": 0.9036650419235229, "relative_reconstruction_bias": 0.9980986058712006, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8554311752319337, "loss_zero": 12.452933025360107, "frac_recovered": 0.9594433963298797, "frac_alive": 0.1574435830116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15/trainer_1/ae.pt b/resid_post_layer_15/trainer_1/ae.pt deleted file mode 100644 index a74f0f160cd3b97e9b66c23ad71616f28740f13c..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_1/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:db2cba42dad3f8fe04134874e0db36512b4030c64c7d0e8ed702882079c03b11 -size 339823400 diff --git a/resid_post_layer_15/trainer_1/config.json b/resid_post_layer_15/trainer_1/config.json deleted file mode 100644 index 799390566c2f1ae55f4186ea89cf46b811b540e7..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_1/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15/trainer_1/eval_results.json b/resid_post_layer_15/trainer_1/eval_results.json deleted file mode 100644 index a00c800a3b1c4c3a8304422b09b9959614354940..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_1/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 73.52041473388672, "l1_loss": 528.9073120117188, "l0": 40.0, "frac_variance_explained": 0.7958128571510314, "cossim": 0.922706949710846, "l2_ratio": 0.9236413538455963, "relative_reconstruction_bias": 1.0000072419643402, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6429500341415406, "loss_zero": 12.452933025360107, "frac_recovered": 0.980656909942627, "frac_alive": 0.293511301279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15/trainer_2/ae.pt b/resid_post_layer_15/trainer_2/ae.pt deleted file mode 100644 index bb49191c4ee7e6e413cbd6b7bdda37aec173d2d5..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_2/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:23356391caa5794b242fe6802a172a2b76cfbe40a692536b370e47a6741bf9a4 -size 339823400 diff --git a/resid_post_layer_15/trainer_2/config.json b/resid_post_layer_15/trainer_2/config.json deleted file mode 100644 index 343e98ff257e6c669e770a76685cfd8257100a5b..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_2/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15/trainer_2/eval_results.json b/resid_post_layer_15/trainer_2/eval_results.json deleted file mode 100644 index 13a2a04828e8b8ee4c07c65ee7b47947e12711ea..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_2/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 63.84604949951172, "l1_loss": 781.8043701171875, "l0": 80.0, "frac_variance_explained": 0.886254632472992, "cossim": 0.9405465126037598, "l2_ratio": 0.9406228601932526, "relative_reconstruction_bias": 0.999057823419571, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5650960922241213, "loss_zero": 12.452933025360107, "frac_recovered": 0.988433837890625, "frac_alive": 0.4686957597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15/trainer_3/ae.pt b/resid_post_layer_15/trainer_3/ae.pt deleted file mode 100644 index f37176cf702a1b306950d3dbc886f5bd22bb24a1..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_3/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:610c23270e3a3b929a1be6cdcc7e523ed0101322f10f57c7a6bce5144768fdd2 -size 339823400 diff --git a/resid_post_layer_15/trainer_3/config.json b/resid_post_layer_15/trainer_3/config.json deleted file mode 100644 index fc4a3f63891f56258b6a2dc8e824ce60be3016e2..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_3/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15/trainer_3/eval_results.json b/resid_post_layer_15/trainer_3/eval_results.json deleted file mode 100644 index cb8c93f12ffda9052d70fb8ed506987447aaa5b2..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_3/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 56.946068572998044, "l1_loss": 1005.3937927246094, "l0": 160.0, "frac_variance_explained": 0.8639537990093231, "cossim": 0.9545037567615509, "l2_ratio": 0.9538737773895264, "relative_reconstruction_bias": 0.9995295643806458, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.524174451828003, "loss_zero": 12.452933025360107, "frac_recovered": 0.9925116181373597, "frac_alive": 0.6183810830116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15/trainer_4/ae.pt b/resid_post_layer_15/trainer_4/ae.pt deleted file mode 100644 index a00de0f9da5ff1224d35eb0f1c06aa8c13162191..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_4/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:42d3c8b256a0541ce9baeed6e80da94de368f6969e75b40f14514f1d622b2196 -size 339823400 diff --git a/resid_post_layer_15/trainer_4/config.json b/resid_post_layer_15/trainer_4/config.json deleted file mode 100644 index 810b6ce14c9c3a781c98938f3f0c02be972126db..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_4/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15/trainer_4/eval_results.json b/resid_post_layer_15/trainer_4/eval_results.json deleted file mode 100644 index 9b0b11927aa84970987fdd54bdfdd57fdc9c2959..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_4/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 49.31732063293457, "l1_loss": 1596.6747680664062, "l0": 320.0, "frac_variance_explained": 0.9155429899692535, "cossim": 0.9657738864421844, "l2_ratio": 0.9659751355648041, "relative_reconstruction_bias": 0.9998931109905242, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.494556260108948, "loss_zero": 12.452933025360107, "frac_recovered": 0.9954632163047791, "frac_alive": 0.7400173544883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15/trainer_5/ae.pt b/resid_post_layer_15/trainer_5/ae.pt deleted file mode 100644 index 7367484c4a0ccf6bef8764d67bc0ba3321ad4995..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_5/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:67becd11e239a4d90b1b22fbc4280346cd3b9eb5fb02841543ec02c3150424cd -size 339823400 diff --git a/resid_post_layer_15/trainer_5/config.json b/resid_post_layer_15/trainer_5/config.json deleted file mode 100644 index cbb4035903196bf0b07225ca1dd920b855f8c11e..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_5/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15/trainer_5/eval_results.json b/resid_post_layer_15/trainer_5/eval_results.json deleted file mode 100644 index 8bece77dd812d0a7e354529364ef6cb82cf4c932..0000000000000000000000000000000000000000 --- a/resid_post_layer_15/trainer_5/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 41.20635986328125, "l1_loss": 3198.0086669921875, "l0": 640.0, "frac_variance_explained": 0.9310864806175232, "cossim": 0.9767521739006042, "l2_ratio": 0.9770789384841919, "relative_reconstruction_bias": 1.0002823233604432, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4734957695007322, "loss_zero": 12.452933025360107, "frac_recovered": 0.997559267282486, "frac_alive": 0.7078993320465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_0/ae.pt b/resid_post_layer_15_checkpoints/trainer_0_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json b/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json deleted file mode 100644 index 65c624ea3e7a807dbe026a8ecec3ff1fd990cf0a..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json b/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json deleted file mode 100644 index f8063faa8847316c9f63ce8cfc9bb09467518f83..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 217.9843536376953, "l1_loss": 167.01827087402344, "l0": 20.0, "frac_variance_explained": 0.05807469487190246, "cossim": 0.28542253077030183, "l2_ratio": 0.17943892478942872, "relative_reconstruction_bias": 0.6283090710639954, "loss_original": 2.4489264488220215, "loss_reconstructed": 14.690529346466064, "loss_zero": 12.452933025360107, "frac_recovered": -0.22370559722185135, "frac_alive": 0.1353081613779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_19528/ae.pt b/resid_post_layer_15_checkpoints/trainer_0_step_19528/ae.pt deleted file mode 100644 index 559414d753dbd898ecb6231f05d83da541acb672..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0866dae2f0ed5a782c5653936b9c84b77e2fb0806b8d4ad20e9aeae78c1450ef -size 339823704 diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_19528/config.json b/resid_post_layer_15_checkpoints/trainer_0_step_19528/config.json deleted file mode 100644 index 350d49a9f44daed67a9ec1a99b714d7854376c18..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_19528/eval_results.json b/resid_post_layer_15_checkpoints/trainer_0_step_19528/eval_results.json deleted file mode 100644 index 156e4cb19f367c77f6f2208654be03f1488d6171..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 82.89589004516601, "l1_loss": 387.73180236816404, "l0": 19.99583339691162, "frac_variance_explained": 0.7404085159301758, "cossim": 0.9042332589626312, "l2_ratio": 0.9021734893321991, "relative_reconstruction_bias": 0.9979009211063385, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.874876618385315, "loss_zero": 12.452933025360107, "frac_recovered": 0.9574995279312134, "frac_alive": 0.1548936665058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_29292/ae.pt b/resid_post_layer_15_checkpoints/trainer_0_step_29292/ae.pt deleted file mode 100644 index 10c5dc89c423ff318d2b83beeff7329d8659d57b..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:66958d1d13df6247baa74a58110308c943fc651fcf69cab67873a0ae012acc18 -size 339823704 diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_29292/config.json b/resid_post_layer_15_checkpoints/trainer_0_step_29292/config.json deleted file mode 100644 index 1b778bcf2a3d591bd39d1768c9f830f642ade32c..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_29292/eval_results.json b/resid_post_layer_15_checkpoints/trainer_0_step_29292/eval_results.json deleted file mode 100644 index 212279316df942e12947ac2af011a8f97aef8cea..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 82.90822525024414, "l1_loss": 392.2499267578125, "l0": 19.99583339691162, "frac_variance_explained": 0.735715925693512, "cossim": 0.903123289346695, "l2_ratio": 0.9033664643764496, "relative_reconstruction_bias": 1.0007203817367554, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8645946979522705, "loss_zero": 12.452933025360107, "frac_recovered": 0.9585281014442444, "frac_alive": 0.1569010466337204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_4882/ae.pt b/resid_post_layer_15_checkpoints/trainer_0_step_4882/ae.pt deleted file mode 100644 index 682898b637c69de5e3cb07ab3036321effff820a..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b25e50ff77be32f6a22dfd34c2ea74a828b500968aa9838bbb757d7e56597459 -size 339823504 diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json b/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json deleted file mode 100644 index c229983f0fd58ddb65659d723482fc751a5f2c3d..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json b/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json deleted file mode 100644 index b92ceddf51c0e9bee267d2cde2a12e5dbfa0a3c3..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 87.38898391723633, "l1_loss": 437.2335723876953, "l0": 20.0, "frac_variance_explained": 0.7685397148132325, "cossim": 0.8913267970085144, "l2_ratio": 0.8944155275821686, "relative_reconstruction_bias": 1.001406443119049, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.9732311964035034, "loss_zero": 12.452933025360107, "frac_recovered": 0.9476731896400452, "frac_alive": 0.1499565988779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_9764/ae.pt b/resid_post_layer_15_checkpoints/trainer_0_step_9764/ae.pt deleted file mode 100644 index 94d078dfa7c56c2dcd25579caf84ab62712cece9..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:212cf600cbed48d0ada27307e3f4b91043969acdffc04d89b87d87957d909915 -size 339823504 diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_9764/config.json b/resid_post_layer_15_checkpoints/trainer_0_step_9764/config.json deleted file mode 100644 index 6823a309b6fcfce430953904a3a9bad2e3819d90..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_0_step_9764/eval_results.json b/resid_post_layer_15_checkpoints/trainer_0_step_9764/eval_results.json deleted file mode 100644 index d5955a10d98ac930840a317e2d3c25529ebdf415..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_0_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 81.98058166503907, "l1_loss": 392.0604583740234, "l0": 20.0, "frac_variance_explained": 0.741108912229538, "cossim": 0.8997536063194275, "l2_ratio": 0.9000672399997711, "relative_reconstruction_bias": 1.0006576836109162, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.9072537422180176, "loss_zero": 12.452933025360107, "frac_recovered": 0.9542666256427765, "frac_alive": 0.1556532084941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_0/ae.pt b/resid_post_layer_15_checkpoints/trainer_1_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json b/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json deleted file mode 100644 index 082c15a181fbf4daa7b8cf7a6d269663ff93486d..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json b/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json deleted file mode 100644 index c5419792413196045a3842cd0a827ab0adb7e250..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 194.64895782470703, "l1_loss": 298.46790466308596, "l0": 40.0, "frac_variance_explained": 0.10361332297325135, "cossim": 0.37167030572891235, "l2_ratio": 0.2493060812354088, "relative_reconstruction_bias": 0.6704857051372528, "loss_original": 2.4489264488220215, "loss_reconstructed": 14.783217716217042, "loss_zero": 12.452933025360107, "frac_recovered": -0.23288672268390656, "frac_alive": 0.2071397602558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_19528/ae.pt b/resid_post_layer_15_checkpoints/trainer_1_step_19528/ae.pt deleted file mode 100644 index bf2334b202e71e59f8bca8a9dac75ebe05222ab3..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:972c767c191528e384e437f3afd8a3b22b93fc213214e1633bb6705878bb7180 -size 339823704 diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_19528/config.json b/resid_post_layer_15_checkpoints/trainer_1_step_19528/config.json deleted file mode 100644 index 8b713eeef258b3e35f0f8b2a60c0963d31211eb6..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_19528/eval_results.json b/resid_post_layer_15_checkpoints/trainer_1_step_19528/eval_results.json deleted file mode 100644 index c367dee1541e4c650d12a820694c4529e6fde338..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 74.34532241821289, "l1_loss": 536.4803894042968, "l0": 39.99583358764649, "frac_variance_explained": 0.7928689062595368, "cossim": 0.9240649998188019, "l2_ratio": 0.9261127531528472, "relative_reconstruction_bias": 1.0024328827857971, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6611002922058105, "loss_zero": 12.452933025360107, "frac_recovered": 0.9788522481918335, "frac_alive": 0.289116770029068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_29292/ae.pt b/resid_post_layer_15_checkpoints/trainer_1_step_29292/ae.pt deleted file mode 100644 index 5f617ab777b9117a9a7d307b4dafaa2be96902e7..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a887797ec28feaea5f2c3435c550d97d930f8acdd65acac35b153fb59e5aa85 -size 339823704 diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_29292/config.json b/resid_post_layer_15_checkpoints/trainer_1_step_29292/config.json deleted file mode 100644 index ffe5ae0b2f470cba787ab0329afa3187e824060e..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_29292/eval_results.json b/resid_post_layer_15_checkpoints/trainer_1_step_29292/eval_results.json deleted file mode 100644 index ca256d9918b9514dc1e45e26cb09b29254031a57..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 71.34622116088867, "l1_loss": 549.1552490234375, "l0": 39.99166717529297, "frac_variance_explained": 0.8392110764980316, "cossim": 0.9306858479976654, "l2_ratio": 0.9335777938365937, "relative_reconstruction_bias": 1.0028102040290832, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.652607035636902, "loss_zero": 12.452933025360107, "frac_recovered": 0.979695725440979, "frac_alive": 0.2953559160232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_4882/ae.pt b/resid_post_layer_15_checkpoints/trainer_1_step_4882/ae.pt deleted file mode 100644 index efde6813e599faa1a6fc8f03b0a9768c682e5687..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99197ab6dabd5bc38dd6c794a2ba3865289aa7aebf83681f0f864d942ba68f1b -size 339823504 diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json b/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json deleted file mode 100644 index 9d8e3f8a73a15341e8f014e126c892a47fd4c394..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json b/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json deleted file mode 100644 index 14e233ce8cf8a926fbb6b7d70799d8c3da66355c..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 77.56395111083984, "l1_loss": 508.2853271484375, "l0": 40.0, "frac_variance_explained": 0.7621201872825623, "cossim": 0.9176748812198638, "l2_ratio": 0.9192310273647308, "relative_reconstruction_bias": 1.0023752927780152, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.713756060600281, "loss_zero": 12.452933025360107, "frac_recovered": 0.9735915839672089, "frac_alive": 0.2897135317325592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_9764/ae.pt b/resid_post_layer_15_checkpoints/trainer_1_step_9764/ae.pt deleted file mode 100644 index b6366a04095cba4c7a7bf6c8ea8c647644fe29b9..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:414bc1faa147a3f98dc05e1c8fccb87b8af9f6012fc69a54a4b387a05aab20f0 -size 339823504 diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_9764/config.json b/resid_post_layer_15_checkpoints/trainer_1_step_9764/config.json deleted file mode 100644 index abf76b86473b8114fa13542e00ad539d4848917a..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_1_step_9764/eval_results.json b/resid_post_layer_15_checkpoints/trainer_1_step_9764/eval_results.json deleted file mode 100644 index 73c9ad1bc7bb4d3e9f51d6096309b4a0ce83e060..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_1_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 75.81767044067382, "l1_loss": 621.8117370605469, "l0": 40.0, "frac_variance_explained": 0.8433825016021729, "cossim": 0.920050710439682, "l2_ratio": 0.9210049569606781, "relative_reconstruction_bias": 1.0010946571826935, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.673509454727173, "loss_zero": 12.452933025360107, "frac_recovered": 0.9776096105575561, "frac_alive": 0.2875434160232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_0/ae.pt b/resid_post_layer_15_checkpoints/trainer_2_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json b/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json deleted file mode 100644 index 37ed4b577ad5d22d8db065771a92a879a0fb1c96..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json b/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json deleted file mode 100644 index d5a17408c86b060f0af38c3229425cbcb10aa52b..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 213.44998168945312, "l1_loss": 623.8083557128906, "l0": 80.0, "frac_variance_explained": 0.16039682626724244, "cossim": 0.4678017109632492, "l2_ratio": 0.34769041538238527, "relative_reconstruction_bias": 0.7274929761886597, "loss_original": 2.4489264488220215, "loss_reconstructed": 13.109274959564209, "loss_zero": 12.452933025360107, "frac_recovered": -0.06557218059897423, "frac_alive": 0.3215060830116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_19528/ae.pt b/resid_post_layer_15_checkpoints/trainer_2_step_19528/ae.pt deleted file mode 100644 index 4afc1bb420c76f4580024974d641ebc36a150693..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7b0f864d4b8cb4c2d56b090b9ead81e17336ba865503b2d1c9d60c6002fbb65e -size 339823704 diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_19528/config.json b/resid_post_layer_15_checkpoints/trainer_2_step_19528/config.json deleted file mode 100644 index 3299fe870b7cf6653de67ba831040725a7e4c01a..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_19528/eval_results.json b/resid_post_layer_15_checkpoints/trainer_2_step_19528/eval_results.json deleted file mode 100644 index 568ae3e6a9bae8fb98da23569ab48a8dd83d52f6..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 66.70211791992188, "l1_loss": 793.0866271972657, "l0": 80.0, "frac_variance_explained": 0.870151698589325, "cossim": 0.9401491701602935, "l2_ratio": 0.9408006072044373, "relative_reconstruction_bias": 1.000737911462784, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.575274610519409, "loss_zero": 12.452933025360107, "frac_recovered": 0.9874190568923951, "frac_alive": 0.4680989682674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_29292/ae.pt b/resid_post_layer_15_checkpoints/trainer_2_step_29292/ae.pt deleted file mode 100644 index 6f47fa59bf60c5b08b5a577b0ad53ff8f5276c73..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8073114a4f2a66ecdacf92f63d119ada2b321353b72c492fb2961fc27a426116 -size 339823704 diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_29292/config.json b/resid_post_layer_15_checkpoints/trainer_2_step_29292/config.json deleted file mode 100644 index 58d4f933d1f485cb0ce829bd42238acbd37d9471..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_29292/eval_results.json b/resid_post_layer_15_checkpoints/trainer_2_step_29292/eval_results.json deleted file mode 100644 index c69b70f8ce48657b095bc3595225fd37ad2ab10f..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 64.55712928771973, "l1_loss": 725.3825256347657, "l0": 79.9, "frac_variance_explained": 0.8422860383987427, "cossim": 0.9409180641174316, "l2_ratio": 0.942666745185852, "relative_reconstruction_bias": 1.0019870102405548, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5692342281341554, "loss_zero": 12.452933025360107, "frac_recovered": 0.9880201041698455, "frac_alive": 0.4743381142616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_4882/ae.pt b/resid_post_layer_15_checkpoints/trainer_2_step_4882/ae.pt deleted file mode 100644 index 0924b5faedcaa3ea32fd60d1e5923a778471ff02..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cbb1519751de8509e141ead808835e4c5ffe50b796a12efb9f8b3f6319481485 -size 339823504 diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json b/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json deleted file mode 100644 index d1ba0f2f61718c71a88bd24d656a93af8a1f3e1c..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json b/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json deleted file mode 100644 index 0c9a2a77aa09f081516657896ef0999be0ea0ebb..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 69.76554336547852, "l1_loss": 857.1462219238281, "l0": 80.0, "frac_variance_explained": 0.8729984104633332, "cossim": 0.9301912546157837, "l2_ratio": 0.931358927488327, "relative_reconstruction_bias": 1.0026703178882599, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.610351014137268, "loss_zero": 12.452933025360107, "frac_recovered": 0.9839148700237275, "frac_alive": 0.4821506142616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_9764/ae.pt b/resid_post_layer_15_checkpoints/trainer_2_step_9764/ae.pt deleted file mode 100644 index 61b7931d757fd9d41ef8b9710f2953a404e1a115..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b9a54907b3976f0b3b1487802f0641c24873b8d25716f3ecb70f5052196d6456 -size 339823504 diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_9764/config.json b/resid_post_layer_15_checkpoints/trainer_2_step_9764/config.json deleted file mode 100644 index fd0c461722740f3b5ae423e9b6cd5bb8ef060542..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_2_step_9764/eval_results.json b/resid_post_layer_15_checkpoints/trainer_2_step_9764/eval_results.json deleted file mode 100644 index 581967c5d7bebf098dcda6ba2a2f22cd0c1d53b5..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_2_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 66.22293586730957, "l1_loss": 923.0601745605469, "l0": 80.0, "frac_variance_explained": 0.900476622581482, "cossim": 0.9411832451820373, "l2_ratio": 0.9404438555240631, "relative_reconstruction_bias": 1.0011389255523682, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.583176279067993, "loss_zero": 12.452933025360107, "frac_recovered": 0.9866274833679199, "frac_alive": 0.466796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_0/ae.pt b/resid_post_layer_15_checkpoints/trainer_3_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json b/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json deleted file mode 100644 index 8626192384a1ad31e7e14cdd733b0f0693bf3b59..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json b/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json deleted file mode 100644 index 23c21df6897d809097b2162e62f0f6f5b148b09a..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 166.00693054199218, "l1_loss": 1015.9643981933593, "l0": 160.0, "frac_variance_explained": 0.2615611255168915, "cossim": 0.5708441972732544, "l2_ratio": 0.494153892993927, "relative_reconstruction_bias": 0.8625474154949189, "loss_original": 2.4489264488220215, "loss_reconstructed": 8.447930812835693, "loss_zero": 12.452933025360107, "frac_recovered": 0.40063078701496124, "frac_alive": 0.455620676279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_19528/ae.pt b/resid_post_layer_15_checkpoints/trainer_3_step_19528/ae.pt deleted file mode 100644 index 653bc60891b772f9945a68bb9e8c5ade043e19dd..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5de2a467fc29a5a21cb70e0de1688464dfebe0a4f6f7e84fc10cdaa6af65ad3e -size 339823704 diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_19528/config.json b/resid_post_layer_15_checkpoints/trainer_3_step_19528/config.json deleted file mode 100644 index 48b29f21df699ec5534c50d7557625b501b981d0..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_19528/eval_results.json b/resid_post_layer_15_checkpoints/trainer_3_step_19528/eval_results.json deleted file mode 100644 index 40b2bbfef875a366226b11bc0c97db382aa38f02..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 58.65425682067871, "l1_loss": 1061.904736328125, "l0": 160.0, "frac_variance_explained": 0.8731440126895904, "cossim": 0.9517025589942932, "l2_ratio": 0.9524573981761932, "relative_reconstruction_bias": 1.000987672805786, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5311554431915284, "loss_zero": 12.452933025360107, "frac_recovered": 0.9918226838111878, "frac_alive": 0.63623046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_29292/ae.pt b/resid_post_layer_15_checkpoints/trainer_3_step_29292/ae.pt deleted file mode 100644 index 60ec042dffcd36cf35e3068f780afcf7d60add68..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d037d022c63614e8b8aeaa6cf499dab40b39096dea122a686fcb9f338888c2a5 -size 339823704 diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_29292/config.json b/resid_post_layer_15_checkpoints/trainer_3_step_29292/config.json deleted file mode 100644 index 270728f33062c51b34181a13603128f2ffca7658..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_29292/eval_results.json b/resid_post_layer_15_checkpoints/trainer_3_step_29292/eval_results.json deleted file mode 100644 index 6d4e36830803e9c19fd33cfcfe9b76e44ce36d53..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 58.884486770629884, "l1_loss": 1005.7183471679688, "l0": 159.75, "frac_variance_explained": 0.8559818863868713, "cossim": 0.9523160696029663, "l2_ratio": 0.9533852934837341, "relative_reconstruction_bias": 1.0005861222743988, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.527032709121704, "loss_zero": 12.452933025360107, "frac_recovered": 0.9922284483909607, "frac_alive": 0.6035698652267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_4882/ae.pt b/resid_post_layer_15_checkpoints/trainer_3_step_4882/ae.pt deleted file mode 100644 index f3e731b0ac331e01435874d3e85cd5c882813e24..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0dc1dbe41d9b82c765a24e7bdeca47ac7757f853fe0e8d107e9050c15fd596e9 -size 339823504 diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json b/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json deleted file mode 100644 index 371879e047083e9de7693ff14081a487c3d32440..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json b/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json deleted file mode 100644 index b44a42841ed122c169e39c3ffa1683c0fcbd83a6..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 63.86768913269043, "l1_loss": 1022.4768981933594, "l0": 160.0, "frac_variance_explained": 0.8641524732112884, "cossim": 0.9425764679908752, "l2_ratio": 0.9433953881263732, "relative_reconstruction_bias": 1.0005350947380065, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5517275810241697, "loss_zero": 12.452933025360107, "frac_recovered": 0.9897631585597992, "frac_alive": 0.7082248330116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_9764/ae.pt b/resid_post_layer_15_checkpoints/trainer_3_step_9764/ae.pt deleted file mode 100644 index 2bf08e368bbd212f24c49e3303a69aa993bacab7..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:12ea2f5917aa93875f1eacfe397156413cee4d8f130bafbff93654a8a0f0cab9 -size 339823504 diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_9764/config.json b/resid_post_layer_15_checkpoints/trainer_3_step_9764/config.json deleted file mode 100644 index 75bedc485fddb9f3262863395b6f1c4f700ff1f2..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_3_step_9764/eval_results.json b/resid_post_layer_15_checkpoints/trainer_3_step_9764/eval_results.json deleted file mode 100644 index 9907b1041dfdc76f6060c53051eb8c02a032b2a2..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_3_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 60.64662590026855, "l1_loss": 980.9309875488282, "l0": 160.0, "frac_variance_explained": 0.8536317765712738, "cossim": 0.9488173604011536, "l2_ratio": 0.9503636538982392, "relative_reconstruction_bias": 1.0021162509918213, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.536962103843689, "loss_zero": 12.452933025360107, "frac_recovered": 0.9912424206733703, "frac_alive": 0.6449110507965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_0/ae.pt b/resid_post_layer_15_checkpoints/trainer_4_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json b/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json deleted file mode 100644 index 8f836fdf8fbd99beb8f316dffa13bef30d985bcc..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json b/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json deleted file mode 100644 index 488dd36a437463c426e0c0170704a7ace97a966f..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 159.18578643798827, "l1_loss": 1954.7937622070312, "l0": 320.0, "frac_variance_explained": 0.3460752248764038, "cossim": 0.6690242469310761, "l2_ratio": 0.7075321555137635, "relative_reconstruction_bias": 1.0470044791698456, "loss_original": 2.4489264488220215, "loss_reconstructed": 5.50926251411438, "loss_zero": 12.452933025360107, "frac_recovered": 0.69437575340271, "frac_alive": 0.6080729365348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_19528/ae.pt b/resid_post_layer_15_checkpoints/trainer_4_step_19528/ae.pt deleted file mode 100644 index f08bc06b4a343c68ec1fe008fffff8e1e5ee3aa1..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:da5d0c0af25b3eec0cb924db456feb3f4790a15b3a93074e7521957f4059496e -size 339823704 diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_19528/config.json b/resid_post_layer_15_checkpoints/trainer_4_step_19528/config.json deleted file mode 100644 index e6e2a0c694595e7a4f79ccdad608665ead15ec50..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_19528/eval_results.json b/resid_post_layer_15_checkpoints/trainer_4_step_19528/eval_results.json deleted file mode 100644 index ad83c4081722766d0b4bdb7e0628d844eae289bb..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 51.75811920166016, "l1_loss": 1565.4125244140625, "l0": 319.5833343505859, "frac_variance_explained": 0.9095131993293762, "cossim": 0.9622929751873016, "l2_ratio": 0.9627440869808197, "relative_reconstruction_bias": 0.999713146686554, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.497648334503174, "loss_zero": 12.452933025360107, "frac_recovered": 0.9951550543308259, "frac_alive": 0.7630751132965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_29292/ae.pt b/resid_post_layer_15_checkpoints/trainer_4_step_29292/ae.pt deleted file mode 100644 index 2beb7dc4d09d2f2c15d9fd189740949c6681f0c2..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:828cf4cc2d42bd017e50159400baf82febcb101f1a6edd8b1b5bada7ccfcc121 -size 339823704 diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_29292/config.json b/resid_post_layer_15_checkpoints/trainer_4_step_29292/config.json deleted file mode 100644 index 4a3c663504f9a72c6a7c2689a66a5dd267494669..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_29292/eval_results.json b/resid_post_layer_15_checkpoints/trainer_4_step_29292/eval_results.json deleted file mode 100644 index 69754577dcdedccf96c37ac95b504a3d3f7dece5..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 51.95920066833496, "l1_loss": 1560.7103271484375, "l0": 320.0, "frac_variance_explained": 0.9020806074142456, "cossim": 0.9633390665054321, "l2_ratio": 0.9633306562900543, "relative_reconstruction_bias": 1.0002520322799682, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.495920014381409, "loss_zero": 12.452933025360107, "frac_recovered": 0.9953308701515198, "frac_alive": 0.7234700322151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_4882/ae.pt b/resid_post_layer_15_checkpoints/trainer_4_step_4882/ae.pt deleted file mode 100644 index 1a732a5d79b9ebbbd1949e4dc81f20a53e417530..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0b8568c8f93f278b9bd71fbbca337541db7f2fc377d6c20ce405f2191a5bc1b4 -size 339823504 diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json b/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json deleted file mode 100644 index 1d7d16966e81bcdabb0eceeccfca148ee3662604..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json b/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json deleted file mode 100644 index 00afbf9f297c5c2faadd33b41d5dfae0dd572459..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 57.49758071899414, "l1_loss": 1233.705810546875, "l0": 320.0, "frac_variance_explained": 0.8589035868644714, "cossim": 0.9540941476821899, "l2_ratio": 0.9550706088542938, "relative_reconstruction_bias": 1.001019501686096, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5227089881896974, "loss_zero": 12.452933025360107, "frac_recovered": 0.9926598310470581, "frac_alive": 0.8812391757965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_9764/ae.pt b/resid_post_layer_15_checkpoints/trainer_4_step_9764/ae.pt deleted file mode 100644 index 6110ee7202d778f4812a0552e2e09b2d944e2e82..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e869f3166aaa395a0a1c72ee5c075db312ffefd96c5bc79b40ff47ddb8125ee5 -size 339823504 diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_9764/config.json b/resid_post_layer_15_checkpoints/trainer_4_step_9764/config.json deleted file mode 100644 index 9043515e154628ab859cd8fd995389ee54d41c80..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_4_step_9764/eval_results.json b/resid_post_layer_15_checkpoints/trainer_4_step_9764/eval_results.json deleted file mode 100644 index e4e4e62adf8fd0a810064e072b93c4b55f323862..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_4_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 54.3418586730957, "l1_loss": 1605.89306640625, "l0": 320.0, "frac_variance_explained": 0.9077378273010254, "cossim": 0.959366899728775, "l2_ratio": 0.9596228897571564, "relative_reconstruction_bias": 1.0003112256526947, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.502269673347473, "loss_zero": 12.452933025360107, "frac_recovered": 0.9946961760520935, "frac_alive": 0.8001301884651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_0/ae.pt b/resid_post_layer_15_checkpoints/trainer_5_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json b/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json deleted file mode 100644 index 0d848c7cd1dffbab7192dc37f74b8dea767af582..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json b/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json deleted file mode 100644 index 0ef49c04d3c460fd4a3257cfbe9b0318d128f286..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 163.25163879394532, "l1_loss": 3762.0339111328126, "l0": 640.0, "frac_variance_explained": 0.3636908054351807, "cossim": 0.7548954904079437, "l2_ratio": 1.0260148525238038, "relative_reconstruction_bias": 1.3213130354881286, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.8931538105010985, "loss_zero": 12.452933025360107, "frac_recovered": 0.8558285892009735, "frac_alive": 0.7422417402267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_19528/ae.pt b/resid_post_layer_15_checkpoints/trainer_5_step_19528/ae.pt deleted file mode 100644 index 0c50242d053f00bd6db03cc4d3614abcdfd02d9e..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cf852c185d40b7f1fa3512efb7f273c3f5e19bedd79628987ec516d333b74341 -size 339823704 diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_19528/config.json b/resid_post_layer_15_checkpoints/trainer_5_step_19528/config.json deleted file mode 100644 index 2261264e639ff9139c7d68b907529c15f2b26cd6..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_19528/eval_results.json b/resid_post_layer_15_checkpoints/trainer_5_step_19528/eval_results.json deleted file mode 100644 index 8d53ffca1807d9b2780fde98fa5ec883eaed81a5..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 43.27035827636719, "l1_loss": 3213.1107421875, "l0": 639.9125, "frac_variance_explained": 0.9495538651943207, "cossim": 0.9743721067905426, "l2_ratio": 0.9743786692619324, "relative_reconstruction_bias": 0.9983949840068818, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.47742965221405, "loss_zero": 12.452933025360107, "frac_recovered": 0.9971720099449157, "frac_alive": 0.7562934160232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_29292/ae.pt b/resid_post_layer_15_checkpoints/trainer_5_step_29292/ae.pt deleted file mode 100644 index 7b6fd7ff85e22bdd224aa546a73e5b65bfac9b76..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0b495bb5534c23c6f9df51ea8df665c87a5d97cb0bacceb5da2065ec37bcefd0 -size 339823704 diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_29292/config.json b/resid_post_layer_15_checkpoints/trainer_5_step_29292/config.json deleted file mode 100644 index 70911636e8ea45b22f36cd93d726ad89523d00d1..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_29292/eval_results.json b/resid_post_layer_15_checkpoints/trainer_5_step_29292/eval_results.json deleted file mode 100644 index d87adbbf6e2da35ab8bf19c8b8ec61ded65b9f94..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 41.98759498596191, "l1_loss": 3107.3690185546875, "l0": 639.3791748046875, "frac_variance_explained": 0.9326578259468079, "cossim": 0.9757527709007263, "l2_ratio": 0.9768386065959931, "relative_reconstruction_bias": 1.0004763901233673, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.47405526638031, "loss_zero": 12.452933025360107, "frac_recovered": 0.9975033581256867, "frac_alive": 0.733018696308136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_4882/ae.pt b/resid_post_layer_15_checkpoints/trainer_5_step_4882/ae.pt deleted file mode 100644 index 371c4cff08d3e9bc17344a4d1a10e18baa08ec2c..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c79b5a2cff21b3950da197f0d736b0766d36b08084cf6fe5790c21bf60150169 -size 339823504 diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json b/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json deleted file mode 100644 index 5a2672d9a3a9ad4de399da3f3fa5a8ea66761dd1..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json b/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json deleted file mode 100644 index 6fbd031051ac6d286bcf78829d8594b14cdadec3..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 46.9846851348877, "l1_loss": 2576.3406005859374, "l0": 639.8166687011719, "frac_variance_explained": 0.9145628452301026, "cossim": 0.9695311069488526, "l2_ratio": 0.9708778202533722, "relative_reconstruction_bias": 1.0014521181583405, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4830697774887085, "loss_zero": 12.452933025360107, "frac_recovered": 0.9966086566448211, "frac_alive": 0.8044704794883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_9764/ae.pt b/resid_post_layer_15_checkpoints/trainer_5_step_9764/ae.pt deleted file mode 100644 index 282b5f5443b94c0f4d853fafcfa6c73e1adfe094..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:15a1cb1455a38b7f1af436122139de76aa4e28a26ae48dc3f4fc353192fe7caf -size 339823504 diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_9764/config.json b/resid_post_layer_15_checkpoints/trainer_5_step_9764/config.json deleted file mode 100644 index 6e5971fbd503ab2966b79d0ac6bab76adedc09ec..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 15, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_15", - "submodule_name": "resid_post_layer_15" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_15_checkpoints/trainer_5_step_9764/eval_results.json b/resid_post_layer_15_checkpoints/trainer_5_step_9764/eval_results.json deleted file mode 100644 index ad19027d2b6869c73b054f845159eb7a94ad0c23..0000000000000000000000000000000000000000 --- a/resid_post_layer_15_checkpoints/trainer_5_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 45.278759765625, "l1_loss": 2868.8764404296876, "l0": 639.9916687011719, "frac_variance_explained": 0.9181702017784119, "cossim": 0.9717496931552887, "l2_ratio": 0.9726868629455566, "relative_reconstruction_bias": 1.0011424362659453, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.482337164878845, "loss_zero": 12.452933025360107, "frac_recovered": 0.9966827273368836, "frac_alive": 0.7840169072151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19/trainer_0/ae.pt b/resid_post_layer_19/trainer_0/ae.pt deleted file mode 100644 index 2f121a52f3405cc1cad376012282d89968952807..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5f04e90893b43dae213f83f8d5f4de0f9e78ab4c653c72aee2934fffd933f165 -size 339823400 diff --git a/resid_post_layer_19/trainer_0/config.json b/resid_post_layer_19/trainer_0/config.json deleted file mode 100644 index a7f1a19f8cc3a8806909b838faf11629805a6ca1..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19/trainer_0/eval_results.json b/resid_post_layer_19/trainer_0/eval_results.json deleted file mode 100644 index 2aededac48575199b02f4d4e59853cadd153bcf2..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 125.56976852416992, "l1_loss": 637.6160888671875, "l0": 20.0, "frac_variance_explained": 0.7849961221218109, "cossim": 0.8981874704360961, "l2_ratio": 0.8985657155513763, "relative_reconstruction_bias": 0.9984611392021179, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8372591972351073, "loss_zero": 12.452933025360107, "frac_recovered": 0.9612571775913239, "frac_alive": 0.1710612028837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19/trainer_1/ae.pt b/resid_post_layer_19/trainer_1/ae.pt deleted file mode 100644 index 688bdb15a41b132b51904099acf3a7d5fab058b5..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_1/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b312965fa1a1d0087e7821d44295e39ba9f5164b3ff9302d72b1558f5988a321 -size 339823400 diff --git a/resid_post_layer_19/trainer_1/config.json b/resid_post_layer_19/trainer_1/config.json deleted file mode 100644 index cc704f0f25fc3f265722916ef4d51692f9976fac..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_1/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19/trainer_1/eval_results.json b/resid_post_layer_19/trainer_1/eval_results.json deleted file mode 100644 index beff2994bd86060c9d03f3295f8049dd1491b9b0..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_1/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 110.1037582397461, "l1_loss": 789.6739196777344, "l0": 40.0, "frac_variance_explained": 0.7854969263076782, "cossim": 0.9232691109180451, "l2_ratio": 0.9242902994155884, "relative_reconstruction_bias": 1.0012201130390168, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6683587312698362, "loss_zero": 12.452933025360107, "frac_recovered": 0.9781295597553253, "frac_alive": 0.3113064169883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19/trainer_2/ae.pt b/resid_post_layer_19/trainer_2/ae.pt deleted file mode 100644 index 8075f1800ffd6e1b117c51a864cd5b3230d977bc..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_2/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:94dfc357983f7cffc8780b47791706cd93a4164cf877a8d7b2c5a6917fb3cfdf -size 339823400 diff --git a/resid_post_layer_19/trainer_2/config.json b/resid_post_layer_19/trainer_2/config.json deleted file mode 100644 index 3f4b007e6b0b8d232fdbdf3b7504ba0020bb8e43..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_2/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19/trainer_2/eval_results.json b/resid_post_layer_19/trainer_2/eval_results.json deleted file mode 100644 index 0f72596903ec89b6ff70b949134484801357743c..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_2/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 98.38441390991211, "l1_loss": 1133.5070922851562, "l0": 80.0, "frac_variance_explained": 0.8572348475456237, "cossim": 0.9394023716449738, "l2_ratio": 0.9390686810016632, "relative_reconstruction_bias": 1.0001055598258972, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5952465295791627, "loss_zero": 12.452933025360107, "frac_recovered": 0.9854330003261567, "frac_alive": 0.4769965410232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19/trainer_3/ae.pt b/resid_post_layer_19/trainer_3/ae.pt deleted file mode 100644 index 9f5d82235f5882348a71b2ac4d36f7e9aa0def7f..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_3/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:18669400794bd3724560456c6fba4d9ad1169eccf8f622c2c6e90837d6eeb914 -size 339823400 diff --git a/resid_post_layer_19/trainer_3/config.json b/resid_post_layer_19/trainer_3/config.json deleted file mode 100644 index f30c8f88a09a9fbc1172521b4995c3107ebc97bf..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_3/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19/trainer_3/eval_results.json b/resid_post_layer_19/trainer_3/eval_results.json deleted file mode 100644 index 73f3bd08506f234d21d558d135069d8d4b54a891..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_3/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 90.80431671142578, "l1_loss": 1587.93564453125, "l0": 160.0, "frac_variance_explained": 0.8658691287040711, "cossim": 0.9485506474971771, "l2_ratio": 0.9495301187038422, "relative_reconstruction_bias": 1.0008091807365418, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.550182414054871, "loss_zero": 12.452933025360107, "frac_recovered": 0.9899285018444062, "frac_alive": 0.6395399570465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19/trainer_4/ae.pt b/resid_post_layer_19/trainer_4/ae.pt deleted file mode 100644 index f0e62ace0353c776a40b2a07b673edbcc86637bc..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_4/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d28ee09ec1fe88caecdd5600d4b31d2948f99621d20e322ff0158588ec5ff49e -size 339823400 diff --git a/resid_post_layer_19/trainer_4/config.json b/resid_post_layer_19/trainer_4/config.json deleted file mode 100644 index ebe940f7a6fe2fa4b23901dd15a8a353d494dc6d..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_4/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19/trainer_4/eval_results.json b/resid_post_layer_19/trainer_4/eval_results.json deleted file mode 100644 index 7c71f008d6cf1cdf3e4a90c0efebf7f01228c0df..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_4/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 78.99913330078125, "l1_loss": 2518.846826171875, "l0": 320.0, "frac_variance_explained": 0.8984488129615784, "cossim": 0.9610357165336609, "l2_ratio": 0.96125727891922, "relative_reconstruction_bias": 1.000020968914032, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.51603102684021, "loss_zero": 12.452933025360107, "frac_recovered": 0.9933296144008636, "frac_alive": 0.7503255009651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19/trainer_5/ae.pt b/resid_post_layer_19/trainer_5/ae.pt deleted file mode 100644 index 0e160b7924e1f8703936be89e419e068e9df3e26..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_5/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:290fcc81fe88fd3f1638fd7d9a8935b3e94072474970932ae3d9ffa53f840f32 -size 339823400 diff --git a/resid_post_layer_19/trainer_5/config.json b/resid_post_layer_19/trainer_5/config.json deleted file mode 100644 index 3aa5f96e57ad8bc39d9f73ed122dd8a1d9dbc9d9..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_5/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19/trainer_5/eval_results.json b/resid_post_layer_19/trainer_5/eval_results.json deleted file mode 100644 index c5ff3493f7d77e7d9345a3c8aa3e29aa02ff5d05..0000000000000000000000000000000000000000 --- a/resid_post_layer_19/trainer_5/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 65.1626049041748, "l1_loss": 4910.604833984375, "l0": 640.0, "frac_variance_explained": 0.9424506366252899, "cossim": 0.9730470478534698, "l2_ratio": 0.9734025001525879, "relative_reconstruction_bias": 0.9997419714927673, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4924126625061036, "loss_zero": 12.452933025360107, "frac_recovered": 0.9956821620464325, "frac_alive": 0.7953559160232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt b/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json b/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json deleted file mode 100644 index cc189e44344b86b25de2a2ca647fd91bd28be143..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json b/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json deleted file mode 100644 index 2eed4f6616c06e52c729ef46e67c186641e32dc2..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 325.32696838378905, "l1_loss": 254.30257873535157, "l0": 20.0, "frac_variance_explained": 0.0569246768951416, "cossim": 0.28904485106468203, "l2_ratio": 0.18153051435947418, "relative_reconstruction_bias": 0.6281856417655944, "loss_original": 2.4489264488220215, "loss_reconstructed": 13.573721599578857, "loss_zero": 12.452933025360107, "frac_recovered": -0.11210165843367577, "frac_alive": 0.1335178017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_19528/ae.pt b/resid_post_layer_19_checkpoints/trainer_0_step_19528/ae.pt deleted file mode 100644 index 279526de8874f0e4743cec27728bb2641adb263c..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0795bc1df303f613402934d1355e3e2839d874c736f0296615d58549a323ef20 -size 339823704 diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_19528/config.json b/resid_post_layer_19_checkpoints/trainer_0_step_19528/config.json deleted file mode 100644 index 84ff878880b123f7bca626b7a6df5371e2c82e53..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_19528/eval_results.json b/resid_post_layer_19_checkpoints/trainer_0_step_19528/eval_results.json deleted file mode 100644 index 11b1472e3c2d56b71de72bf5382246c8526e912d..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 125.60377655029296, "l1_loss": 658.4857788085938, "l0": 20.0, "frac_variance_explained": 0.7844563603401185, "cossim": 0.897296804189682, "l2_ratio": 0.9025928020477295, "relative_reconstruction_bias": 1.0041317164897918, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.856287217140198, "loss_zero": 12.452933025360107, "frac_recovered": 0.9593625724315643, "frac_alive": 0.1681315153837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_29292/ae.pt b/resid_post_layer_19_checkpoints/trainer_0_step_29292/ae.pt deleted file mode 100644 index 2b9fbf2c0d3171a7d46565691cb9b6278e4f22a5..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:23aa1c3f19bfc4e162d310243e72794d0c207ea94702a6acf88fc6fce852f4de -size 339823704 diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_29292/config.json b/resid_post_layer_19_checkpoints/trainer_0_step_29292/config.json deleted file mode 100644 index 3a403712fdcbee9ff1c46025ee905c56231298e7..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_29292/eval_results.json b/resid_post_layer_19_checkpoints/trainer_0_step_29292/eval_results.json deleted file mode 100644 index af78c3d3c0fa7f624fc13720343985022ea45320..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 127.23475341796875, "l1_loss": 601.5059997558594, "l0": 19.99583339691162, "frac_variance_explained": 0.7421157479286193, "cossim": 0.8986818313598632, "l2_ratio": 0.8988159477710724, "relative_reconstruction_bias": 0.9995011687278748, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.8491142988204956, "loss_zero": 12.452933025360107, "frac_recovered": 0.960075843334198, "frac_alive": 0.173611119389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_4882/ae.pt b/resid_post_layer_19_checkpoints/trainer_0_step_4882/ae.pt deleted file mode 100644 index 49883e98e8e641ff11620e0e25e6212edb9f567e..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dc313cda54ab50b79b2538084a8c59bfb4d6179ddd1cf2a61fceb9fabea1de85 -size 339823504 diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json b/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json deleted file mode 100644 index becc0e12de4c42bad3cefafe5015399395f0c9fe..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json b/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json deleted file mode 100644 index 891e6e4277764f282fb58c0caa6b29c464b77236..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 131.4759094238281, "l1_loss": 612.5915161132813, "l0": 20.0, "frac_variance_explained": 0.7235639810562133, "cossim": 0.8935853183269501, "l2_ratio": 0.8934765100479126, "relative_reconstruction_bias": 1.000877857208252, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.9022894859313966, "loss_zero": 12.452933025360107, "frac_recovered": 0.9547624230384827, "frac_alive": 0.1648762971162796, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_9764/ae.pt b/resid_post_layer_19_checkpoints/trainer_0_step_9764/ae.pt deleted file mode 100644 index 4a4c5cd145cb7bca54cee8c373e9af50cad3d3f2..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f76e4e2a5bf32fc67e82887a087f6188553e82fb2ad7157b3f2bfdec8d436349 -size 339823504 diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_9764/config.json b/resid_post_layer_19_checkpoints/trainer_0_step_9764/config.json deleted file mode 100644 index 86db6670c31bf2276fe749fc6deff1214c2ea9ae..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_0_step_9764/eval_results.json b/resid_post_layer_19_checkpoints/trainer_0_step_9764/eval_results.json deleted file mode 100644 index 0e99d442614bc47972d9e9cfb34bbd674526992a..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_0_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 125.47390823364258, "l1_loss": 696.7124084472656, "l0": 20.0, "frac_variance_explained": 0.8148535013198852, "cossim": 0.9014196455478668, "l2_ratio": 0.9026202023029327, "relative_reconstruction_bias": 1.0012827217578888, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.871774458885193, "loss_zero": 12.452933025360107, "frac_recovered": 0.9578073680400848, "frac_alive": 0.1685112863779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt b/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json b/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json deleted file mode 100644 index 19a6ff7ed8bc077b8948abb0a449eb3e73622f4a..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json b/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json deleted file mode 100644 index 623b01e58ef493f2c6783567f1cd742772741a71..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 291.17974548339845, "l1_loss": 452.9191162109375, "l0": 40.0, "frac_variance_explained": 0.10459646582603455, "cossim": 0.37412114143371583, "l2_ratio": 0.2523693323135376, "relative_reconstruction_bias": 0.6730610311031342, "loss_original": 2.4489264488220215, "loss_reconstructed": 12.759316158294677, "loss_zero": 12.452933025360107, "frac_recovered": -0.030534679733682422, "frac_alive": 0.2155490517616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_19528/ae.pt b/resid_post_layer_19_checkpoints/trainer_1_step_19528/ae.pt deleted file mode 100644 index fb77c3a9c9a38fd75d38685629b09cf69e0beb14..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:56b2e4cb7d6d39904b4dffd9bfe19400c0358361501627f662ab56edfe607aa6 -size 339823704 diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_19528/config.json b/resid_post_layer_19_checkpoints/trainer_1_step_19528/config.json deleted file mode 100644 index 6fb42674f704fa52761049054f91d6f087baae4c..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_19528/eval_results.json b/resid_post_layer_19_checkpoints/trainer_1_step_19528/eval_results.json deleted file mode 100644 index 2c1fe99300980b161985d89f31855b62681f30ca..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 111.32142944335938, "l1_loss": 808.7847717285156, "l0": 40.0, "frac_variance_explained": 0.7986537694931031, "cossim": 0.9199103713035583, "l2_ratio": 0.9218922436237336, "relative_reconstruction_bias": 1.001532244682312, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6824830293655397, "loss_zero": 12.452933025360107, "frac_recovered": 0.9767161905765533, "frac_alive": 0.3111436665058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_29292/ae.pt b/resid_post_layer_19_checkpoints/trainer_1_step_29292/ae.pt deleted file mode 100644 index 48d29ce2afc21bb29610a11676534e822adf507b..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:09262442e9a99045a64fd9dcca1cb848b1978389e778c49295830032f5a3bf8a -size 339823704 diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_29292/config.json b/resid_post_layer_19_checkpoints/trainer_1_step_29292/config.json deleted file mode 100644 index a81ec9ffd0b7e605fafa00e7d2f8228504a85e8c..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_29292/eval_results.json b/resid_post_layer_19_checkpoints/trainer_1_step_29292/eval_results.json deleted file mode 100644 index 79432dc8c73e54eaec23741f62e8c904b3f7d556..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 109.28896865844726, "l1_loss": 861.9073913574218, "l0": 40.0, "frac_variance_explained": 0.8541500627994537, "cossim": 0.9224141240119934, "l2_ratio": 0.9242090940475464, "relative_reconstruction_bias": 1.001121735572815, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.672131633758545, "loss_zero": 12.452933025360107, "frac_recovered": 0.977747130393982, "frac_alive": 0.3065863847732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_4882/ae.pt b/resid_post_layer_19_checkpoints/trainer_1_step_4882/ae.pt deleted file mode 100644 index 2fb8de5d6d968ef2432e45fca93bb6414b0f2e8f..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:475a01bf42171972958dbb790c5d992ab881f522e23041035a3ebaf40172158a -size 339823504 diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json b/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json deleted file mode 100644 index 0a1e47484b7478447d3ddc2f59c97b972575cf90..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json b/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json deleted file mode 100644 index 640368fd5d43a64262e97b9b801ef79f42dd70c3..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 120.10947036743164, "l1_loss": 823.4704406738281, "l0": 40.0, "frac_variance_explained": 0.7695347607135773, "cossim": 0.9084151089191437, "l2_ratio": 0.9094584107398986, "relative_reconstruction_bias": 1.0002698957920075, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.720379114151001, "loss_zero": 12.452933025360107, "frac_recovered": 0.9729329884052277, "frac_alive": 0.3050672709941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_9764/ae.pt b/resid_post_layer_19_checkpoints/trainer_1_step_9764/ae.pt deleted file mode 100644 index bfb3eb50ff7d1df2c25b4e18004f3c98a984ff14..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8e4af3abfa5abe345fa340a56f121220f65d492625fd5f640400e0301a30120e -size 339823504 diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_9764/config.json b/resid_post_layer_19_checkpoints/trainer_1_step_9764/config.json deleted file mode 100644 index ea29810f7f6e5da6f80d7febe244ac074491d77b..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_1_step_9764/eval_results.json b/resid_post_layer_19_checkpoints/trainer_1_step_9764/eval_results.json deleted file mode 100644 index c4b23db294b2138926e99a3d908d3c2d627f3a0d..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_1_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 113.54811477661133, "l1_loss": 853.2123229980468, "l0": 40.0, "frac_variance_explained": 0.8128659963607788, "cossim": 0.9165951013565063, "l2_ratio": 0.9161898851394653, "relative_reconstruction_bias": 0.9994335174560547, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6936707973480223, "loss_zero": 12.452933025360107, "frac_recovered": 0.9755988657474518, "frac_alive": 0.3001302182674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt b/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json b/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json deleted file mode 100644 index e47eb7937f1a5b7e3b9b49285bb01feb802e8fe4..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json b/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json deleted file mode 100644 index 7adca99baaa4cd50b89f003de2f3dfa4b0814637..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 296.4805191040039, "l1_loss": 888.6040588378906, "l0": 80.0, "frac_variance_explained": 0.16352716088294983, "cossim": 0.47093939781188965, "l2_ratio": 0.3511354446411133, "relative_reconstruction_bias": 0.7378839135169983, "loss_original": 2.4489264488220215, "loss_reconstructed": 11.214914798736572, "loss_zero": 12.452933025360107, "frac_recovered": 0.12416270188987255, "frac_alive": 0.3341471254825592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_19528/ae.pt b/resid_post_layer_19_checkpoints/trainer_2_step_19528/ae.pt deleted file mode 100644 index 021fc6382954e9ccdc3b60a8dfa2d3fab7f61692..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3845fd6a8dec642eaed104e1b91d9e83a63c8f04b2023b4aa6b89047fe20ae6a -size 339823704 diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_19528/config.json b/resid_post_layer_19_checkpoints/trainer_2_step_19528/config.json deleted file mode 100644 index 8b18a1db7d4f19b4946a3bd91097ef467448911c..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_19528/eval_results.json b/resid_post_layer_19_checkpoints/trainer_2_step_19528/eval_results.json deleted file mode 100644 index adf0788b7c94c50f52df64416f085412dfb4305c..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 103.5383804321289, "l1_loss": 1107.8344848632812, "l0": 80.0, "frac_variance_explained": 0.8265561103820801, "cossim": 0.9326160907745361, "l2_ratio": 0.9331199288368225, "relative_reconstruction_bias": 1.000600242614746, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.602700686454773, "loss_zero": 12.452933025360107, "frac_recovered": 0.9846857130527497, "frac_alive": 0.4645182192325592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_29292/ae.pt b/resid_post_layer_19_checkpoints/trainer_2_step_29292/ae.pt deleted file mode 100644 index 72bcd28a79b7a36bea045acc8beadbdb604ebe39..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:293d64928ed6883551dea0969db5733e884af7064f237aa9cc908f0ad3d93e70 -size 339823704 diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_29292/config.json b/resid_post_layer_19_checkpoints/trainer_2_step_29292/config.json deleted file mode 100644 index 3035a34dcaef49fc324cec42ad646beb5f250d09..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_29292/eval_results.json b/resid_post_layer_19_checkpoints/trainer_2_step_29292/eval_results.json deleted file mode 100644 index 969c92e6842ba00b720dbbb372895107671e9b53..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 101.87953567504883, "l1_loss": 1135.567333984375, "l0": 80.0, "frac_variance_explained": 0.8610418558120727, "cossim": 0.9316335439682006, "l2_ratio": 0.9330399096012115, "relative_reconstruction_bias": 1.0011163651943207, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6009809970855713, "loss_zero": 12.452933025360107, "frac_recovered": 0.9848611414432525, "frac_alive": 0.4721137285232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_4882/ae.pt b/resid_post_layer_19_checkpoints/trainer_2_step_4882/ae.pt deleted file mode 100644 index 70723653f5d8a2e84dc5cef2091aa26b4e508ba1..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39d33a7dadd33fbf4a90d6d58ff465bc3b5db245935a48fd6dea1b86b3bf43ef -size 339823504 diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json b/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json deleted file mode 100644 index 9038130ae8d45ff8ba03ec9f81bd05d33efab4d2..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json b/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json deleted file mode 100644 index ab7b944f76577f091ef69ab047ca0034c8871414..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 106.97534942626953, "l1_loss": 1321.9621337890626, "l0": 80.0, "frac_variance_explained": 0.8818290293216705, "cossim": 0.9320721805095673, "l2_ratio": 0.9306482017040253, "relative_reconstruction_bias": 1.0000323891639709, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.634953498840332, "loss_zero": 12.452933025360107, "frac_recovered": 0.9814700245857239, "frac_alive": 0.4689127504825592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_9764/ae.pt b/resid_post_layer_19_checkpoints/trainer_2_step_9764/ae.pt deleted file mode 100644 index f2f10d059ed9d780cc312ad0021fe70474b54eb6..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc82f7793b188fa07aba5e8951d99dde01db55cfb79bb63bb0168fb27b88eba6 -size 339823504 diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_9764/config.json b/resid_post_layer_19_checkpoints/trainer_2_step_9764/config.json deleted file mode 100644 index 2f64d85f4a4a9eba656e59408f66c463b0fa9346..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_2_step_9764/eval_results.json b/resid_post_layer_19_checkpoints/trainer_2_step_9764/eval_results.json deleted file mode 100644 index 358256dc315c97e70b0ec819be9b184e1201f1bb..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_2_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 104.74476852416993, "l1_loss": 1089.078857421875, "l0": 80.0, "frac_variance_explained": 0.80969318151474, "cossim": 0.9345323204994201, "l2_ratio": 0.9369295358657836, "relative_reconstruction_bias": 1.0023408353328704, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6124815225601195, "loss_zero": 12.452933025360107, "frac_recovered": 0.9837063908576965, "frac_alive": 0.4520399272441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt b/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json b/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json deleted file mode 100644 index 49337ad19cbeecf4e0d60f57609138c7f7d44873..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json b/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json deleted file mode 100644 index 4297844cfd01e1e19f1aa3d55a35639c32c2e8ff..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 249.4814208984375, "l1_loss": 1557.7815795898437, "l0": 160.0, "frac_variance_explained": 0.2693453192710876, "cossim": 0.5758010566234588, "l2_ratio": 0.5003698885440826, "relative_reconstruction_bias": 0.8689843118190765, "loss_original": 2.4489264488220215, "loss_reconstructed": 6.904741621017456, "loss_zero": 12.452933025360107, "frac_recovered": 0.5548302710056305, "frac_alive": 0.4581163227558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_19528/ae.pt b/resid_post_layer_19_checkpoints/trainer_3_step_19528/ae.pt deleted file mode 100644 index 4392eb36712edd10834bd64b1321b7a869098af9..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5c486b6f756df20448e407f3c5869f60fae9ebeeb0d4337ed978ac408683a6aa -size 339823704 diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_19528/config.json b/resid_post_layer_19_checkpoints/trainer_3_step_19528/config.json deleted file mode 100644 index d4b329592d1fce8a94c947d140f5dc6f0db38197..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_19528/eval_results.json b/resid_post_layer_19_checkpoints/trainer_3_step_19528/eval_results.json deleted file mode 100644 index 36e9f87e5b43611ee6f705238750fe32a14917f3..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 91.7035026550293, "l1_loss": 1663.0046997070312, "l0": 160.0, "frac_variance_explained": 0.9000596463680267, "cossim": 0.9463433861732483, "l2_ratio": 0.9473640978336334, "relative_reconstruction_bias": 1.0002830386161805, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.560414028167725, "loss_zero": 12.452933025360107, "frac_recovered": 0.9889058411121369, "frac_alive": 0.6365559697151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_29292/ae.pt b/resid_post_layer_19_checkpoints/trainer_3_step_29292/ae.pt deleted file mode 100644 index 7d19e9675f7a587d00a3253c87509cb4533838ff..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2c1a97b87d4e45d57e66a4e4e2c1e13510f3608183c0bf74d3a05d5a70efb810 -size 339823704 diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_29292/config.json b/resid_post_layer_19_checkpoints/trainer_3_step_29292/config.json deleted file mode 100644 index 2dd230ed1820af759087a4287a2c9c6d8c3a76cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_29292/eval_results.json b/resid_post_layer_19_checkpoints/trainer_3_step_29292/eval_results.json deleted file mode 100644 index 69c7a5fc93e20d1151073684d642c6f192326451..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 90.6346321105957, "l1_loss": 1634.7385131835938, "l0": 160.0, "frac_variance_explained": 0.8902020215988159, "cossim": 0.9480599403381348, "l2_ratio": 0.948556911945343, "relative_reconstruction_bias": 1.0000574469566346, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5537137746810914, "loss_zero": 12.452933025360107, "frac_recovered": 0.9895727157592773, "frac_alive": 0.6273871660232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_4882/ae.pt b/resid_post_layer_19_checkpoints/trainer_3_step_4882/ae.pt deleted file mode 100644 index b64182f6349465aac6d94b3f9d288be8def81a68..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fc3f48e30292452bc4b2bb715b459a98a6f8ce4782df8e84fc2d0d50320e18ca -size 339823504 diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json b/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json deleted file mode 100644 index cf314aa77c0fc30f8b86b2543fd2ec989a69bb80..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json b/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json deleted file mode 100644 index 0963d49f93e2b326df1fc7535f55fd65d32c6b9d..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 96.5266212463379, "l1_loss": 1513.4279052734375, "l0": 160.0, "frac_variance_explained": 0.8468407332897187, "cossim": 0.9394637823104859, "l2_ratio": 0.9399228096008301, "relative_reconstruction_bias": 1.0007961332798003, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.583479952812195, "loss_zero": 12.452933025360107, "frac_recovered": 0.9866007685661315, "frac_alive": 0.7013888955116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_9764/ae.pt b/resid_post_layer_19_checkpoints/trainer_3_step_9764/ae.pt deleted file mode 100644 index 777013b50cdb032398b90c696d0366a6ca8cbd4e..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b264220e43ef640c76af6bb80a0f964b1a368f6411bf4618c8288d3f9e1fe13e -size 339823504 diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_9764/config.json b/resid_post_layer_19_checkpoints/trainer_3_step_9764/config.json deleted file mode 100644 index 68d5c69e45edfbf11daccf9ca0cacd3497e98ab8..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_3_step_9764/eval_results.json b/resid_post_layer_19_checkpoints/trainer_3_step_9764/eval_results.json deleted file mode 100644 index 3c35b66695d803ab296f3c508154d7b6cd53c422..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_3_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 93.82733917236328, "l1_loss": 1625.241357421875, "l0": 160.0, "frac_variance_explained": 0.8716640174388885, "cossim": 0.9456298291683197, "l2_ratio": 0.944654256105423, "relative_reconstruction_bias": 0.9988911032676697, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5651297330856324, "loss_zero": 12.452933025360107, "frac_recovered": 0.9884312570095062, "frac_alive": 0.6468641757965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt b/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json b/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json deleted file mode 100644 index 3e45f6a4f4c39a23377131bac277674e4576d2eb..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json b/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json deleted file mode 100644 index 349e692a8aeab7f20d5d607dd6ea38bbf1595765..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 224.30277557373046, "l1_loss": 2840.585400390625, "l0": 320.0, "frac_variance_explained": 0.35540305376052855, "cossim": 0.6738833487033844, "l2_ratio": 0.7177993714809418, "relative_reconstruction_bias": 1.0651414752006532, "loss_original": 2.4489264488220215, "loss_reconstructed": 4.629237842559815, "loss_zero": 12.452933025360107, "frac_recovered": 0.782224440574646, "frac_alive": 0.611436665058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_19528/ae.pt b/resid_post_layer_19_checkpoints/trainer_4_step_19528/ae.pt deleted file mode 100644 index dc7c3d6831c6f14784b74e08abcff228678de4d6..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e013d911580da5fa930ba1a4a25a4d341694920a120ea3149ecbcc322e059a7d -size 339823704 diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_19528/config.json b/resid_post_layer_19_checkpoints/trainer_4_step_19528/config.json deleted file mode 100644 index 238bb82b236b01d84bf1a5b39ce38ca7e7f546ce..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_19528/eval_results.json b/resid_post_layer_19_checkpoints/trainer_4_step_19528/eval_results.json deleted file mode 100644 index 80b9e786cb26f52c3f9cdf7a95a895b2b6fe0aa5..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 80.71283187866212, "l1_loss": 2646.23154296875, "l0": 320.0, "frac_variance_explained": 0.9224247574806214, "cossim": 0.9600512623786926, "l2_ratio": 0.9615144371986389, "relative_reconstruction_bias": 1.0007874190807342, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5228251695632933, "loss_zero": 12.452933025360107, "frac_recovered": 0.9926568508148194, "frac_alive": 0.7572699785232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_29292/ae.pt b/resid_post_layer_19_checkpoints/trainer_4_step_29292/ae.pt deleted file mode 100644 index e9ddf92462f3de236dc23b3588ebff7b4b64be98..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc839bec30e0e84fbd64e8886828f65770e7433261e274514881123c6e628697 -size 339823704 diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_29292/config.json b/resid_post_layer_19_checkpoints/trainer_4_step_29292/config.json deleted file mode 100644 index bca7a6516587bb926415ae653764ccdf130feb3b..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_29292/eval_results.json b/resid_post_layer_19_checkpoints/trainer_4_step_29292/eval_results.json deleted file mode 100644 index fd76171dfea82ca5728a545d67f57b92f506377a..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 78.85371398925781, "l1_loss": 2574.3860595703127, "l0": 320.0, "frac_variance_explained": 0.9240821838378906, "cossim": 0.9596679151058197, "l2_ratio": 0.9616780996322631, "relative_reconstruction_bias": 0.9999951899051667, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.520471453666687, "loss_zero": 12.452933025360107, "frac_recovered": 0.9928891241550446, "frac_alive": 0.770128071308136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_4882/ae.pt b/resid_post_layer_19_checkpoints/trainer_4_step_4882/ae.pt deleted file mode 100644 index 36c36c1fe1df8931874e3192f331054b0f362d28..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:19f9f6e416cb08656235133e139d870a612d18b9960597e3e7521bd195c6356d -size 339823504 diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json b/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json deleted file mode 100644 index f54888f75155a944836265b59a5237bcaf9ed543..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json b/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json deleted file mode 100644 index 2b33a4be7e3fbf25167fdc4c63fd53423fa77ba7..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 94.2914825439453, "l1_loss": 2336.6464233398438, "l0": 320.0, "frac_variance_explained": 0.902019590139389, "cossim": 0.9459708452224731, "l2_ratio": 0.9458859682083129, "relative_reconstruction_bias": 0.9999112665653229, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5462760448455812, "loss_zero": 12.452933025360107, "frac_recovered": 0.9903128445148468, "frac_alive": 0.905381977558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_9764/ae.pt b/resid_post_layer_19_checkpoints/trainer_4_step_9764/ae.pt deleted file mode 100644 index e6e51192b4a8b849be83498d29fd1ec5b52a047f..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d8bcc0ce8ce459befe11146fb978b1714cc23cb496df3db5a2595dca848aae3c -size 339823504 diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_9764/config.json b/resid_post_layer_19_checkpoints/trainer_4_step_9764/config.json deleted file mode 100644 index fa4cc2696f8b497a43f6531d76238436e4668100..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_4_step_9764/eval_results.json b/resid_post_layer_19_checkpoints/trainer_4_step_9764/eval_results.json deleted file mode 100644 index 73e08657f7285b6790ca072433596b9a5b86df6a..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_4_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 84.83213348388672, "l1_loss": 2498.9994140625, "l0": 320.0, "frac_variance_explained": 0.8912880837917327, "cossim": 0.9524414777755738, "l2_ratio": 0.9530382812023163, "relative_reconstruction_bias": 1.0004355311393738, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5275760650634767, "loss_zero": 12.452933025360107, "frac_recovered": 0.9921787083148956, "frac_alive": 0.809190571308136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt b/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json b/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json deleted file mode 100644 index ab43fa8a3b1dcc8a9302e233d6c95ea41da7b3d7..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json b/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json deleted file mode 100644 index 4aa416c1e78f1bf3d5db083c39385e1e16e0eb83..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 221.45138397216797, "l1_loss": 5213.847412109375, "l0": 640.0, "frac_variance_explained": 0.34842745065689085, "cossim": 0.7575249195098877, "l2_ratio": 1.03416006565094, "relative_reconstruction_bias": 1.354014503955841, "loss_original": 2.4489264488220215, "loss_reconstructed": 3.551916003227234, "loss_zero": 12.452933025360107, "frac_recovered": 0.8898488104343414, "frac_alive": 0.7508680820465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_19528/ae.pt b/resid_post_layer_19_checkpoints/trainer_5_step_19528/ae.pt deleted file mode 100644 index 7bfb2096b15faef80ba3e8d6ee486db3b7e9ec75..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:48530770b62ba4820e9fb09223636e206b307007f190a9985f756540bb8d8d9f -size 339823704 diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_19528/config.json b/resid_post_layer_19_checkpoints/trainer_5_step_19528/config.json deleted file mode 100644 index c2851528a9c70533ad4fa0649f6b439e227b8fd8..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_19528/eval_results.json b/resid_post_layer_19_checkpoints/trainer_5_step_19528/eval_results.json deleted file mode 100644 index b4a51967df6ecd877a21d011f5e628ac8ca10b75..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 71.01775970458985, "l1_loss": 4621.40478515625, "l0": 640.0, "frac_variance_explained": 0.9090996205806732, "cossim": 0.9679088115692138, "l2_ratio": 0.9691721737384796, "relative_reconstruction_bias": 1.0014350891113282, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4963836431503297, "loss_zero": 12.452933025360107, "frac_recovered": 0.9952901721000671, "frac_alive": 0.777452290058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_29292/ae.pt b/resid_post_layer_19_checkpoints/trainer_5_step_29292/ae.pt deleted file mode 100644 index f434e68d27781addfb46367a6530ae825caa7915..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de04b664740de016fdea58466e0fd0fc3598ec5db45a028fe27050ac5ea9a30c -size 339823704 diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_29292/config.json b/resid_post_layer_19_checkpoints/trainer_5_step_29292/config.json deleted file mode 100644 index 72cec99e3e65cb90b7f3daa9aa1601f20e822a8b..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_29292/eval_results.json b/resid_post_layer_19_checkpoints/trainer_5_step_29292/eval_results.json deleted file mode 100644 index 57a47042d8d9db18a6ea81cb636c167a7f40b0e7..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 67.18106384277344, "l1_loss": 4894.515576171875, "l0": 640.0, "frac_variance_explained": 0.9415188729763031, "cossim": 0.9716631412506104, "l2_ratio": 0.9726373791694641, "relative_reconstruction_bias": 1.000336092710495, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.493886113166809, "loss_zero": 12.452933025360107, "frac_recovered": 0.9955352962017059, "frac_alive": 0.7851020097732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_4882/ae.pt b/resid_post_layer_19_checkpoints/trainer_5_step_4882/ae.pt deleted file mode 100644 index 236fde04af83a7a07847035bf149206e221060f2..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0aca2b9adbb2594e4b9b34115ac4d422ef8459e990aa7c761adce9a76daebe1c -size 339823504 diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json b/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json deleted file mode 100644 index d6c8b898f40d1b1a696ff15736654e1a9f67948f..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json b/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json deleted file mode 100644 index 2783a263707c4fba65782370af9bff3f99ce68dc..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 75.81502151489258, "l1_loss": 3782.3845703125, "l0": 640.0, "frac_variance_explained": 0.8975415527820587, "cossim": 0.9643395662307739, "l2_ratio": 0.9644364833831787, "relative_reconstruction_bias": 1.0002473175525666, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5044758319854736, "loss_zero": 12.452933025360107, "frac_recovered": 0.9944850146770478, "frac_alive": 0.8695204257965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_9764/ae.pt b/resid_post_layer_19_checkpoints/trainer_5_step_9764/ae.pt deleted file mode 100644 index d8b935d30213d13757b09196d273ecfb9a4ffd30..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c034c15a0c90df5968676a8966e6dcaf037a46a4f5eebac6162f05faffcff3a3 -size 339823504 diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_9764/config.json b/resid_post_layer_19_checkpoints/trainer_5_step_9764/config.json deleted file mode 100644 index 3bc8bcc51f325d78805b6a0b5dadf5795a98f312..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 19, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_19", - "submodule_name": "resid_post_layer_19" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_19_checkpoints/trainer_5_step_9764/eval_results.json b/resid_post_layer_19_checkpoints/trainer_5_step_9764/eval_results.json deleted file mode 100644 index 1bfae202920b7aa8c66910e0104ee967fb71def8..0000000000000000000000000000000000000000 --- a/resid_post_layer_19_checkpoints/trainer_5_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 70.46529960632324, "l1_loss": 4459.463427734375, "l0": 640.0, "frac_variance_explained": 0.9259386122226715, "cossim": 0.9691527128219605, "l2_ratio": 0.9700005769729614, "relative_reconstruction_bias": 1.000119686126709, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4980183124542235, "loss_zero": 12.452933025360107, "frac_recovered": 0.9951267421245575, "frac_alive": 0.8415256142616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3/trainer_0/ae.pt b/resid_post_layer_3/trainer_0/ae.pt deleted file mode 100644 index e25473d54591e8d372f58ee9c48ba7e27eb397aa..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1783285be4188c921c9b1372920d4fb2fce0a7794b4eb2d64ead7230e4d3e175 -size 339823400 diff --git a/resid_post_layer_3/trainer_0/config.json b/resid_post_layer_3/trainer_0/config.json deleted file mode 100644 index a6bdd17a52442401b071273f5cf859757586d939..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3/trainer_0/eval_results.json b/resid_post_layer_3/trainer_0/eval_results.json deleted file mode 100644 index 7035638f51dc0e2a599bcf16835c62b7ea6a3932..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 28.266572570800783, "l1_loss": 177.4275115966797, "l0": 20.0, "frac_variance_explained": 0.8498155057430268, "cossim": 0.9323096394538879, "l2_ratio": 0.9319331288337708, "relative_reconstruction_bias": 0.9994702100753784, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6027263402938843, "loss_zero": 12.452933025360107, "frac_recovered": 0.984678465127945, "frac_alive": 0.1553819477558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3/trainer_1/ae.pt b/resid_post_layer_3/trainer_1/ae.pt deleted file mode 100644 index f7a61e078dcb18762c5301d71cbd330038a0b5ef..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_1/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f66c3176ca5528168722a37816d103246e79e0daac72c90822292e5bc42ae837 -size 339823400 diff --git a/resid_post_layer_3/trainer_1/config.json b/resid_post_layer_3/trainer_1/config.json deleted file mode 100644 index 9d469874e47ba5c351b7d556f3d0437746bd300f..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_1/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3/trainer_1/eval_results.json b/resid_post_layer_3/trainer_1/eval_results.json deleted file mode 100644 index ce83f5633c8cd58cfee4924089e5592916a421e3..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_1/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 27.170896148681642, "l1_loss": 237.9600036621094, "l0": 40.0, "frac_variance_explained": 0.830749922990799, "cossim": 0.9341044247150421, "l2_ratio": 0.9342280626296997, "relative_reconstruction_bias": 0.9999212563037873, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.568475842475891, "loss_zero": 12.452933025360107, "frac_recovered": 0.9880999863147736, "frac_alive": 0.2572157084941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3/trainer_2/ae.pt b/resid_post_layer_3/trainer_2/ae.pt deleted file mode 100644 index 5981a9187d017251131185411397bdcffb9f568f..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_2/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e6b25562935e27a2a83234f01e5d00ff6c3cc57b21219319d4bfe5cf2f3b7332 -size 339823400 diff --git a/resid_post_layer_3/trainer_2/config.json b/resid_post_layer_3/trainer_2/config.json deleted file mode 100644 index c1ca2e0e1ab44d79228daa7260d60839ded5723a..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_2/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3/trainer_2/eval_results.json b/resid_post_layer_3/trainer_2/eval_results.json deleted file mode 100644 index d0ffecdcd806689e4c107c3bce57d7c80c3d46e2..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_2/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 25.94360065460205, "l1_loss": 327.79884033203126, "l0": 80.0, "frac_variance_explained": 0.8436449348926545, "cossim": 0.9419341802597045, "l2_ratio": 0.9421893358230591, "relative_reconstruction_bias": 1.000232994556427, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.544915461540222, "loss_zero": 12.452933025360107, "frac_recovered": 0.9904540121555329, "frac_alive": 0.4140625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3/trainer_3/ae.pt b/resid_post_layer_3/trainer_3/ae.pt deleted file mode 100644 index 51035fcca688b795afe4b20bdb67ae53914d05b1..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_3/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2e9b91ee7a42c10b8517ecaf0daec27c5fcd924f13a3efc6cf2ed275af058d44 -size 339823400 diff --git a/resid_post_layer_3/trainer_3/config.json b/resid_post_layer_3/trainer_3/config.json deleted file mode 100644 index 56d0b26ed0f69dd8359724abd8fcec537ec742e8..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_3/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3/trainer_3/eval_results.json b/resid_post_layer_3/trainer_3/eval_results.json deleted file mode 100644 index 1c9d7ee59937187c2bba495eb7b442a6b1f69cb3..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_3/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 23.478511238098143, "l1_loss": 456.1298492431641, "l0": 160.0, "frac_variance_explained": 0.8591484308242798, "cossim": 0.9525353133678436, "l2_ratio": 0.9534881770610809, "relative_reconstruction_bias": 1.0014338195323944, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.521990919113159, "loss_zero": 12.452933025360107, "frac_recovered": 0.992733633518219, "frac_alive": 0.609971821308136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3/trainer_4/ae.pt b/resid_post_layer_3/trainer_4/ae.pt deleted file mode 100644 index 9bc1d65435a0b2a7f324420c5ed862b519282f15..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_4/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5bce6a26ff7b79cccbf42b30f0545fdf663f0ef3cfbbdabc4635d0c6c1d79d69 -size 339823400 diff --git a/resid_post_layer_3/trainer_4/config.json b/resid_post_layer_3/trainer_4/config.json deleted file mode 100644 index 9b0da10c7c18453193b35c1f4d66618a09dc228b..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_4/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3/trainer_4/eval_results.json b/resid_post_layer_3/trainer_4/eval_results.json deleted file mode 100644 index cd0b65d0938929606930697894ad3ef923f425e0..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_4/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 22.321846961975098, "l1_loss": 799.7821960449219, "l0": 320.0, "frac_variance_explained": 0.901763665676117, "cossim": 0.9601259887218475, "l2_ratio": 0.9598486363887787, "relative_reconstruction_bias": 0.9990402400493622, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.496252703666687, "loss_zero": 12.452933025360107, "frac_recovered": 0.9952966928482055, "frac_alive": 0.8234049677848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3/trainer_5/ae.pt b/resid_post_layer_3/trainer_5/ae.pt deleted file mode 100644 index c76a9ffc3509fe980fca13b48de552b0993f3110..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_5/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dae83954f0acd9df7094785461ffe9370145533575b00780f3afe66166fdee62 -size 339823400 diff --git a/resid_post_layer_3/trainer_5/config.json b/resid_post_layer_3/trainer_5/config.json deleted file mode 100644 index 3a2f9fb3e5d9e8c2ccf02ebdb1ccede0d7f83d6f..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_5/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3/trainer_5/eval_results.json b/resid_post_layer_3/trainer_5/eval_results.json deleted file mode 100644 index 3b57cf48bd4ccb8983d93f791f214f1ccd35ae70..0000000000000000000000000000000000000000 --- a/resid_post_layer_3/trainer_5/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 19.113201332092284, "l1_loss": 1533.4474975585938, "l0": 640.0, "frac_variance_explained": 0.9071434438228607, "cossim": 0.9702927589416503, "l2_ratio": 0.9707047760486602, "relative_reconstruction_bias": 1.0007278263568877, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4749300956726072, "loss_zero": 12.452933025360107, "frac_recovered": 0.9974163830280304, "frac_alive": 0.895616352558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_0/ae.pt b/resid_post_layer_3_checkpoints/trainer_0_step_0/ae.pt deleted file mode 100644 index 2b8843ef9a81156e1d039f133a688428bf0b35cd..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53aaf9c27601b0a7b69997278058508353e77952278e2d9382de60d03f6a8863 -size 339823416 diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json b/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json deleted file mode 100644 index e7b4ac626e1ff9f7839c550c0fedf0fd2538c1fa..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "0", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json b/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json deleted file mode 100644 index dcd4560f5f84ba432abb75e13bd63bdd32c7a55c..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 81.11176300048828, "l1_loss": 66.31471557617188, "l0": 20.0, "frac_variance_explained": 0.06356927156448364, "cossim": 0.2942200303077698, "l2_ratio": 0.18611131459474564, "relative_reconstruction_bias": 0.632641339302063, "loss_original": 2.4489264488220215, "loss_reconstructed": 12.665856552124023, "loss_zero": 12.452933025360107, "frac_recovered": -0.021439347753766925, "frac_alive": 0.1322699636220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_19528/ae.pt b/resid_post_layer_3_checkpoints/trainer_0_step_19528/ae.pt deleted file mode 100644 index 0940471c6c3fc91c7d102de2d9e9ba342cfdabff..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:25f5ca6d6ca221534179e1e347bacd1f1855048bfc409b89756b1a703d1a3815 -size 339823704 diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_19528/config.json b/resid_post_layer_3_checkpoints/trainer_0_step_19528/config.json deleted file mode 100644 index 655d8fd5350c7612fc468dd85ce57119f201ed22..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_19528/eval_results.json b/resid_post_layer_3_checkpoints/trainer_0_step_19528/eval_results.json deleted file mode 100644 index 35d4021da46f45c95b1655f2f36cb4509f4d16ed..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 26.953345108032227, "l1_loss": 171.34053955078124, "l0": 20.0, "frac_variance_explained": 0.8236174821853638, "cossim": 0.934092503786087, "l2_ratio": 0.9346385538578034, "relative_reconstruction_bias": 1.0010874211788177, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.613414168357849, "loss_zero": 12.452933025360107, "frac_recovered": 0.9836098670959472, "frac_alive": 0.1463758647441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_29292/ae.pt b/resid_post_layer_3_checkpoints/trainer_0_step_29292/ae.pt deleted file mode 100644 index 2361d9bb384394c201062a11ea39a90b6236a15a..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd65bd704c1974c724c90fd165795f8e54927d9ae3644f4fe067379f21b06314 -size 339823704 diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_29292/config.json b/resid_post_layer_3_checkpoints/trainer_0_step_29292/config.json deleted file mode 100644 index da666a1c251d4e84ddf083deab18365a55d6afcd..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_29292/eval_results.json b/resid_post_layer_3_checkpoints/trainer_0_step_29292/eval_results.json deleted file mode 100644 index f9085b96b35ed74b03be31a16a0f73aaccee4b57..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 28.598044776916502, "l1_loss": 169.38932189941406, "l0": 20.0, "frac_variance_explained": 0.8030181944370269, "cossim": 0.9267681956291198, "l2_ratio": 0.9272215604782105, "relative_reconstruction_bias": 1.0012859225273132, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6079941034317016, "loss_zero": 12.452933025360107, "frac_recovered": 0.984152227640152, "frac_alive": 0.1520182341337204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_4882/ae.pt b/resid_post_layer_3_checkpoints/trainer_0_step_4882/ae.pt deleted file mode 100644 index d5851d634203353f5a8857f8a9fb2425080a920a..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4d8191818262e15a82be30423fda17808f54d0d955db062f7d8a7d6fb890d7b6 -size 339823504 diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json b/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json deleted file mode 100644 index e79b30d0815aba8308f9602f98f4d87aae6a1405..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json b/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json deleted file mode 100644 index ce64741cb8f85c343b733c77f6bed7f337ac71f9..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 31.8795015335083, "l1_loss": 167.21665802001954, "l0": 20.0, "frac_variance_explained": 0.7408388495445252, "cossim": 0.9123399138450623, "l2_ratio": 0.9136622846126556, "relative_reconstruction_bias": 1.001996898651123, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6352880716323854, "loss_zero": 12.452933025360107, "frac_recovered": 0.9814215242862702, "frac_alive": 0.1571723073720932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_9764/ae.pt b/resid_post_layer_3_checkpoints/trainer_0_step_9764/ae.pt deleted file mode 100644 index 8735ae4c0d1701a3263cab48e1463421381ef39b..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd8220baa2fa5e551b7c53a623446be4d58eab9c8756747cd0a9f92fb27bfdb7 -size 339823504 diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_9764/config.json b/resid_post_layer_3_checkpoints/trainer_0_step_9764/config.json deleted file mode 100644 index 2eaa3c0d1de4e54a30527af510d7454c49729ed0..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_0_step_9764/eval_results.json b/resid_post_layer_3_checkpoints/trainer_0_step_9764/eval_results.json deleted file mode 100644 index 90f86f3c8dd7f17e3aed93d9428f12e25b4b1b49..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_0_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 30.28138885498047, "l1_loss": 179.02954864501953, "l0": 20.0, "frac_variance_explained": 0.794158810377121, "cossim": 0.9210076034069061, "l2_ratio": 0.9214390397071839, "relative_reconstruction_bias": 1.0008983969688416, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6215444803237915, "loss_zero": 12.452933025360107, "frac_recovered": 0.9828002631664277, "frac_alive": 0.1518012136220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_1_step_19528/ae.pt b/resid_post_layer_3_checkpoints/trainer_1_step_19528/ae.pt deleted file mode 100644 index 1d3cd17799ec967d96ada109c88a1f760f7b0d7d..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_1_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:45b60e92fc788b346b1b9655a298f6ae3e73181d8247bd955ebee2e22d2c4720 -size 339823704 diff --git a/resid_post_layer_3_checkpoints/trainer_1_step_19528/config.json b/resid_post_layer_3_checkpoints/trainer_1_step_19528/config.json deleted file mode 100644 index 4fc0e6d6109389ee7879091d1360cab9c3393a87..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_1_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_1_step_19528/eval_results.json b/resid_post_layer_3_checkpoints/trainer_1_step_19528/eval_results.json deleted file mode 100644 index ba096668803c2fa8f5c65e05f77ed3eab6c2865f..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_1_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 29.35189151763916, "l1_loss": 242.6250015258789, "l0": 40.0, "frac_variance_explained": 0.8169487476348877, "cossim": 0.9274865686893463, "l2_ratio": 0.9275311231613159, "relative_reconstruction_bias": 1.000645935535431, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.575002074241638, "loss_zero": 12.452933025360107, "frac_recovered": 0.9874506533145905, "frac_alive": 0.2630208432674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_1_step_29292/ae.pt b/resid_post_layer_3_checkpoints/trainer_1_step_29292/ae.pt deleted file mode 100644 index c185178abe6711ce810bf0e261b4efe35cbc6288..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_1_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ce38ad24c15d822e29c152cf705bf309b45b8498d833b47597e21c85deff4934 -size 339823704 diff --git a/resid_post_layer_3_checkpoints/trainer_1_step_29292/config.json b/resid_post_layer_3_checkpoints/trainer_1_step_29292/config.json deleted file mode 100644 index 477bb8c2bf855610aaca42ce777ff639b6b3b05d..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_1_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_1_step_29292/eval_results.json b/resid_post_layer_3_checkpoints/trainer_1_step_29292/eval_results.json deleted file mode 100644 index 00e438b029f2a0cd0bfab7f4edb9e42a2e190bc0..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_1_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 26.325938987731934, "l1_loss": 228.18090057373047, "l0": 40.0, "frac_variance_explained": 0.8482047379016876, "cossim": 0.9400634229183197, "l2_ratio": 0.9401220500469207, "relative_reconstruction_bias": 1.0000902473926545, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.569880652427673, "loss_zero": 12.452933025360107, "frac_recovered": 0.9879599630832672, "frac_alive": 0.25244140625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_1_step_4882/ae.pt b/resid_post_layer_3_checkpoints/trainer_1_step_4882/ae.pt deleted file mode 100644 index b30f6f212337232957aace178a9660865ff07eb8..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_1_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:549873644eae478d64a552a002d66c3afff8082537f34a79e794c9681d78a908 -size 339823504 diff --git a/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json b/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json deleted file mode 100644 index 7e35951ec998f7775dc41decdb891333d1d7d5ea..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json b/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json deleted file mode 100644 index 37c19d41d5ede62175b266e0289b89fb597c1ae4..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 30.247645950317384, "l1_loss": 254.92621002197265, "l0": 40.0, "frac_variance_explained": 0.8046451151371002, "cossim": 0.9242092549800873, "l2_ratio": 0.9232668936252594, "relative_reconstruction_bias": 1.0001021921634674, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5943933486938477, "loss_zero": 12.452933025360107, "frac_recovered": 0.9855135440826416, "frac_alive": 0.2854275107383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_1_step_9764/ae.pt b/resid_post_layer_3_checkpoints/trainer_1_step_9764/ae.pt deleted file mode 100644 index 1df7cd2dbba2be883f68c5be10fa6f1f97b057a8..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_1_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:052bfdd260134c7b944868ddd50dc8f8ef8f4ebb6603069b1f422bf62879660d -size 339823504 diff --git a/resid_post_layer_3_checkpoints/trainer_1_step_9764/config.json b/resid_post_layer_3_checkpoints/trainer_1_step_9764/config.json deleted file mode 100644 index a6acf3573caf6c0e7def53bc1823f03137bfeb7f..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_1_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_1_step_9764/eval_results.json b/resid_post_layer_3_checkpoints/trainer_1_step_9764/eval_results.json deleted file mode 100644 index 626e25e329a2a84aaf8b6bebcfd823e9ce1e8739..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_1_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 30.165078926086426, "l1_loss": 254.00455780029296, "l0": 40.0, "frac_variance_explained": 0.8110504627227784, "cossim": 0.9237140834331512, "l2_ratio": 0.9251842975616456, "relative_reconstruction_bias": 1.001541006565094, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5785513877868653, "loss_zero": 12.452933025360107, "frac_recovered": 0.9870951175689697, "frac_alive": 0.2688802182674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_2_step_19528/ae.pt b/resid_post_layer_3_checkpoints/trainer_2_step_19528/ae.pt deleted file mode 100644 index 0e5f4cae9616b0a666f2767299ed3eca94eb9b51..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_2_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:66a6a144bdba9f20c71bae8d573ec10b77dc5e4ca0c1d71feb1cb33679c3fcdc -size 339823704 diff --git a/resid_post_layer_3_checkpoints/trainer_2_step_19528/config.json b/resid_post_layer_3_checkpoints/trainer_2_step_19528/config.json deleted file mode 100644 index a24b053c09e4326850807f144eba6635f67d30ac..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_2_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_2_step_19528/eval_results.json b/resid_post_layer_3_checkpoints/trainer_2_step_19528/eval_results.json deleted file mode 100644 index dd94151f66fbeb06d35cfc964f7b7442e4f249e1..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_2_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 27.827689170837402, "l1_loss": 345.77353820800784, "l0": 80.0, "frac_variance_explained": 0.834139883518219, "cossim": 0.9360387206077576, "l2_ratio": 0.937719202041626, "relative_reconstruction_bias": 1.001767522096634, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5493752717971803, "loss_zero": 12.452933025360107, "frac_recovered": 0.9900059461593628, "frac_alive": 0.4269748330116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_2_step_29292/ae.pt b/resid_post_layer_3_checkpoints/trainer_2_step_29292/ae.pt deleted file mode 100644 index 40cf7dd50113d73f565eb3e0a174a921eb07885b..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_2_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:00b6841084d208f16900d9f7a47fe0ab5f6064f5fb3253a5d5c5aaabae11e587 -size 339823704 diff --git a/resid_post_layer_3_checkpoints/trainer_2_step_29292/config.json b/resid_post_layer_3_checkpoints/trainer_2_step_29292/config.json deleted file mode 100644 index 163f03d2192d573af2aeb21d0e50a091ad72171c..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_2_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_2_step_29292/eval_results.json b/resid_post_layer_3_checkpoints/trainer_2_step_29292/eval_results.json deleted file mode 100644 index 75e7f6ea47a185c35ee91ef3d614c3cab73b560e..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_2_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 26.360229301452637, "l1_loss": 316.6804168701172, "l0": 80.0, "frac_variance_explained": 0.8179186403751373, "cossim": 0.9384184181690216, "l2_ratio": 0.9385398983955383, "relative_reconstruction_bias": 1.0003150522708892, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.545375347137451, "loss_zero": 12.452933025360107, "frac_recovered": 0.9904079556465148, "frac_alive": 0.408257395029068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_2_step_4882/ae.pt b/resid_post_layer_3_checkpoints/trainer_2_step_4882/ae.pt deleted file mode 100644 index a939222708d62e05779f9411601c5f7d05fd03de..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_2_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d059462360a75d52f51be4470da82560cee2f83925c730a6b9fc81a908818113 -size 339823504 diff --git a/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json b/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json deleted file mode 100644 index c80c90c4c73ee69c54d812aa1cb06fbc79f62c81..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json b/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json deleted file mode 100644 index ce58d3853f8ffceb02189544c505eda239c5d063..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 28.21126365661621, "l1_loss": 341.08843688964845, "l0": 80.0, "frac_variance_explained": 0.8251140594482422, "cossim": 0.9305853009223938, "l2_ratio": 0.9317696034908295, "relative_reconstruction_bias": 1.0008663594722749, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.570930075645447, "loss_zero": 12.452933025360107, "frac_recovered": 0.9878564000129699, "frac_alive": 0.4659830629825592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_2_step_9764/ae.pt b/resid_post_layer_3_checkpoints/trainer_2_step_9764/ae.pt deleted file mode 100644 index d5054a1a898416a1e7a66c9cc52b9df847b2f66e..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_2_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f03c354fbf49857a4481e823d445b181345d46d1d4086ed6f61aee403add80e2 -size 339823504 diff --git a/resid_post_layer_3_checkpoints/trainer_2_step_9764/config.json b/resid_post_layer_3_checkpoints/trainer_2_step_9764/config.json deleted file mode 100644 index d026fcfc6ef0b5fcc8cd9032c6489ef016fa6c82..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_2_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_2_step_9764/eval_results.json b/resid_post_layer_3_checkpoints/trainer_2_step_9764/eval_results.json deleted file mode 100644 index 8bf2a1842d27b559d0dc7628817914ae22b30acc..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_2_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 26.508971977233887, "l1_loss": 328.47056884765624, "l0": 80.0, "frac_variance_explained": 0.8329806983470917, "cossim": 0.9393463492393493, "l2_ratio": 0.9382982671260833, "relative_reconstruction_bias": 0.9995725989341736, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5586145877838136, "loss_zero": 12.452933025360107, "frac_recovered": 0.9890822887420654, "frac_alive": 0.4383138120174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_3_step_19528/ae.pt b/resid_post_layer_3_checkpoints/trainer_3_step_19528/ae.pt deleted file mode 100644 index 9e9372913980c3301b53b62d1c97a5e2fa7b9cb2..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_3_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a22f30e488d413ff894eb1d4ae7e8e9d52e61c37d4870d9e8e34a2de03bb8d8 -size 339823704 diff --git a/resid_post_layer_3_checkpoints/trainer_3_step_19528/config.json b/resid_post_layer_3_checkpoints/trainer_3_step_19528/config.json deleted file mode 100644 index 049c3bff99549142013ae0dbf0ab017db1fb2e49..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_3_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_3_step_19528/eval_results.json b/resid_post_layer_3_checkpoints/trainer_3_step_19528/eval_results.json deleted file mode 100644 index 56bf74947b4ddc11c4ff9fd17e81909aaca61504..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_3_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 24.973313903808595, "l1_loss": 465.1881103515625, "l0": 160.0, "frac_variance_explained": 0.849942284822464, "cossim": 0.9479575037956238, "l2_ratio": 0.9482971668243408, "relative_reconstruction_bias": 1.0010927855968474, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.525223898887634, "loss_zero": 12.452933025360107, "frac_recovered": 0.9924123585224152, "frac_alive": 0.6433376669883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_3_step_29292/ae.pt b/resid_post_layer_3_checkpoints/trainer_3_step_29292/ae.pt deleted file mode 100644 index 7bb36aafe3fe50e07f48137a0c644d7c7bedf5b3..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_3_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:53d2f4551f742547ffa64c8724c5affd7c23c137aa71fc447d400c33bb0f437c -size 339823704 diff --git a/resid_post_layer_3_checkpoints/trainer_3_step_29292/config.json b/resid_post_layer_3_checkpoints/trainer_3_step_29292/config.json deleted file mode 100644 index 606c340815c66b8622845180b77d76af1b2b0342..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_3_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_3_step_29292/eval_results.json b/resid_post_layer_3_checkpoints/trainer_3_step_29292/eval_results.json deleted file mode 100644 index 3cca04a00da9080a44e8870178addde6dd19d001..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_3_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 23.815764045715333, "l1_loss": 457.1536590576172, "l0": 160.0, "frac_variance_explained": 0.866757619380951, "cossim": 0.950747811794281, "l2_ratio": 0.9520208537578583, "relative_reconstruction_bias": 1.0018658936023712, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.520562839508057, "loss_zero": 12.452933025360107, "frac_recovered": 0.9928742706775665, "frac_alive": 0.5978190302848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_3_step_4882/ae.pt b/resid_post_layer_3_checkpoints/trainer_3_step_4882/ae.pt deleted file mode 100644 index 9984bedf25add2fc049df547fee53cb387e3b8d1..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_3_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1495469e8fcc1b44001cb33210ff62ad58f5d7a8f7b6397b3553a126a39c0754 -size 339823504 diff --git a/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json b/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json deleted file mode 100644 index 20287d723cf61918283e886b290136d03cbcc8b3..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json b/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json deleted file mode 100644 index 1c845c2e9d06a6098299ef4e0462eb34b81221e6..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 27.5632266998291, "l1_loss": 470.2460906982422, "l0": 160.0, "frac_variance_explained": 0.8464842736721039, "cossim": 0.9401612520217896, "l2_ratio": 0.940293037891388, "relative_reconstruction_bias": 1.0004801511764527, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5428218841552734, "loss_zero": 12.452933025360107, "frac_recovered": 0.9906556606292725, "frac_alive": 0.7394748330116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_3_step_9764/ae.pt b/resid_post_layer_3_checkpoints/trainer_3_step_9764/ae.pt deleted file mode 100644 index 5b3ae2762c80d6b7800661ce852c58c7ec5096c5..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_3_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3f825291082fb3bb4c8d4889f68867134f49b460e39e145690228ddd9c4946a9 -size 339823504 diff --git a/resid_post_layer_3_checkpoints/trainer_3_step_9764/config.json b/resid_post_layer_3_checkpoints/trainer_3_step_9764/config.json deleted file mode 100644 index dfbb3eb61e46e1bde614cf580f7cbdbfd5100946..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_3_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_3_step_9764/eval_results.json b/resid_post_layer_3_checkpoints/trainer_3_step_9764/eval_results.json deleted file mode 100644 index 485ad51e292191cb65256cf1633d51e5bb03e409..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_3_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 26.196580696105958, "l1_loss": 475.3064727783203, "l0": 160.0, "frac_variance_explained": 0.8526719510555267, "cossim": 0.9425513565540313, "l2_ratio": 0.9438767254352569, "relative_reconstruction_bias": 1.001193392276764, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5301116943359374, "loss_zero": 12.452933025360107, "frac_recovered": 0.9919238090515137, "frac_alive": 0.683214008808136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_4_step_19528/ae.pt b/resid_post_layer_3_checkpoints/trainer_4_step_19528/ae.pt deleted file mode 100644 index 470dc8b1746e8cb83df9b7de657acb0a9883d7ab..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_4_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:46a60ca692d035c78307b3e341d5c44248937cd5e5934b9db0cc04073f50fa0c -size 339823704 diff --git a/resid_post_layer_3_checkpoints/trainer_4_step_19528/config.json b/resid_post_layer_3_checkpoints/trainer_4_step_19528/config.json deleted file mode 100644 index 05a489c9c5dcc2dbc2894ee6a139dfae0a43402a..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_4_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_4_step_19528/eval_results.json b/resid_post_layer_3_checkpoints/trainer_4_step_19528/eval_results.json deleted file mode 100644 index 70acc9e47df2023fca7bb057d5beb6e94556b495..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_4_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 22.630137252807618, "l1_loss": 761.5822509765625, "l0": 320.0, "frac_variance_explained": 0.9073990225791931, "cossim": 0.9594317018985749, "l2_ratio": 0.9603235721588135, "relative_reconstruction_bias": 1.001180464029312, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5024558305740356, "loss_zero": 12.452933025360107, "frac_recovered": 0.9946776688098907, "frac_alive": 0.8553059697151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_4_step_29292/ae.pt b/resid_post_layer_3_checkpoints/trainer_4_step_29292/ae.pt deleted file mode 100644 index feac2cd1d3e7608a1d99a9cd445e82475a53acf5..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_4_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ce4c1969966076ff5620ec2215ef583832a9e29f4990ada8bf116e6f05d5e83f -size 339823704 diff --git a/resid_post_layer_3_checkpoints/trainer_4_step_29292/config.json b/resid_post_layer_3_checkpoints/trainer_4_step_29292/config.json deleted file mode 100644 index 9871fb1e086b3f5e6995720d083b1fc576502ebb..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_4_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_4_step_29292/eval_results.json b/resid_post_layer_3_checkpoints/trainer_4_step_29292/eval_results.json deleted file mode 100644 index f98b1f4e86a41019151cfcfb158f7efc081c84cc..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_4_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 22.81598815917969, "l1_loss": 760.7758239746094, "l0": 320.0, "frac_variance_explained": 0.8861272156238555, "cossim": 0.9564962923526764, "l2_ratio": 0.958636736869812, "relative_reconstruction_bias": 1.0027027010917664, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.499033737182617, "loss_zero": 12.452933025360107, "frac_recovered": 0.9950167834758759, "frac_alive": 0.8181965947151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_4_step_4882/ae.pt b/resid_post_layer_3_checkpoints/trainer_4_step_4882/ae.pt deleted file mode 100644 index 468d46162839b5684eba64c62d68bf069b830289..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_4_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ac3bef5c53456cf91962e2e2751c69ae92db65df8422a17b9987aafa2accda52 -size 339823504 diff --git a/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json b/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json deleted file mode 100644 index 1d0650bd71b2b875ad177e7421aabc85beb8eb95..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json b/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json deleted file mode 100644 index cedbe8407e99f38da7d001e8c0b041e96190cf99..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 27.296066665649413, "l1_loss": 635.4285339355469, "l0": 320.0, "frac_variance_explained": 0.837058961391449, "cossim": 0.9406275987625122, "l2_ratio": 0.9429730951786042, "relative_reconstruction_bias": 1.0029973804950714, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5147974491119385, "loss_zero": 12.452933025360107, "frac_recovered": 0.9934382557868957, "frac_alive": 0.9421115517616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_4_step_9764/ae.pt b/resid_post_layer_3_checkpoints/trainer_4_step_9764/ae.pt deleted file mode 100644 index 1846c1866182267ad4efa08da16557b37d7363b6..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_4_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b92ec5a5569365a19bbd71bfa2800082baf675d7b955a5ab4c50adb3c8449b9d -size 339823504 diff --git a/resid_post_layer_3_checkpoints/trainer_4_step_9764/config.json b/resid_post_layer_3_checkpoints/trainer_4_step_9764/config.json deleted file mode 100644 index 5038c3a90e8c3a78bcdecd577b1b82a6fb18228d..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_4_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_4_step_9764/eval_results.json b/resid_post_layer_3_checkpoints/trainer_4_step_9764/eval_results.json deleted file mode 100644 index 9b038c8208d6a0eb0201662e2992ca9510dcd1da..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_4_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 22.620394706726074, "l1_loss": 685.0079895019531, "l0": 320.0, "frac_variance_explained": 0.883182042837143, "cossim": 0.9556304156780243, "l2_ratio": 0.955713278055191, "relative_reconstruction_bias": 1.0008978366851806, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.506931781768799, "loss_zero": 12.452933025360107, "frac_recovered": 0.9942240417003632, "frac_alive": 0.8894856572151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_5_step_19528/ae.pt b/resid_post_layer_3_checkpoints/trainer_5_step_19528/ae.pt deleted file mode 100644 index af93d5701ca80cde2e3ec6301e627dd8917b809f..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_5_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b19ca68ced88a9c4720ec3a763aa515bb040a1f4888093c6a4a6ede7945fdef8 -size 339823704 diff --git a/resid_post_layer_3_checkpoints/trainer_5_step_19528/config.json b/resid_post_layer_3_checkpoints/trainer_5_step_19528/config.json deleted file mode 100644 index 413fe479d4379a7143cfa8584c4d83aac92d7c82..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_5_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_5_step_19528/eval_results.json b/resid_post_layer_3_checkpoints/trainer_5_step_19528/eval_results.json deleted file mode 100644 index 43f51daf8c2c4eb4b2b45f1e764700b913aff4bf..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_5_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 20.8123197555542, "l1_loss": 1378.54248046875, "l0": 640.0, "frac_variance_explained": 0.8955779373645782, "cossim": 0.9655032277107238, "l2_ratio": 0.9666714549064637, "relative_reconstruction_bias": 1.0018744826316834, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4798152446746826, "loss_zero": 12.452933025360107, "frac_recovered": 0.9969263732433319, "frac_alive": 0.9377170205116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_5_step_29292/ae.pt b/resid_post_layer_3_checkpoints/trainer_5_step_29292/ae.pt deleted file mode 100644 index 38e4558de9656383b623e08a671204bba6d52ac0..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_5_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca907c7c2c6c413a7f38b76cc74df88d049459739c33194edab99a9fe862522b -size 339823704 diff --git a/resid_post_layer_3_checkpoints/trainer_5_step_29292/config.json b/resid_post_layer_3_checkpoints/trainer_5_step_29292/config.json deleted file mode 100644 index b84b139677f5cd2d14857f646bf8e9a860dbeee4..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_5_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_5_step_29292/eval_results.json b/resid_post_layer_3_checkpoints/trainer_5_step_29292/eval_results.json deleted file mode 100644 index 2c278408ee4d888abeb4614a9cee5457068f80db..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_5_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 19.91728630065918, "l1_loss": 1506.0241333007812, "l0": 639.4, "frac_variance_explained": 0.924304074048996, "cossim": 0.9687296390533447, "l2_ratio": 0.9696931481361389, "relative_reconstruction_bias": 1.0016383588314057, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4805987596511843, "loss_zero": 12.452933025360107, "frac_recovered": 0.9968513727188111, "frac_alive": 0.914171040058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_5_step_4882/ae.pt b/resid_post_layer_3_checkpoints/trainer_5_step_4882/ae.pt deleted file mode 100644 index 1c73e61556acee1b0082b0f685a1616dd345494d..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_5_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c73325da23bdd84b78e50d01f9a40b9d0a4ed525050315e33be0ff3fcefc912d -size 339823504 diff --git a/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json b/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json deleted file mode 100644 index eb53c6b2b1e3e6acdf647a2f1e395938a1079a59..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json b/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json deleted file mode 100644 index faf34b41e70c43e757469ba4882d69e6a30fdf48..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 22.1976598739624, "l1_loss": 1065.5535888671875, "l0": 640.0, "frac_variance_explained": 0.8896034240722657, "cossim": 0.960835748910904, "l2_ratio": 0.9629217565059662, "relative_reconstruction_bias": 1.002778035402298, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.483345627784729, "loss_zero": 12.452933025360107, "frac_recovered": 0.9965746462345123, "frac_alive": 0.9876301884651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_5_step_9764/ae.pt b/resid_post_layer_3_checkpoints/trainer_5_step_9764/ae.pt deleted file mode 100644 index 746d151730067213323cc04daed1df0a510da095..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_5_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e1e479a11557720adff9b1961db0b4575ce3a3f3b00835cd77fcaed6dbeb50d7 -size 339823504 diff --git a/resid_post_layer_3_checkpoints/trainer_5_step_9764/config.json b/resid_post_layer_3_checkpoints/trainer_5_step_9764/config.json deleted file mode 100644 index 578b29824432a515fe55affc2a9d2083b811d390..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_5_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 3, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_3", - "submodule_name": "resid_post_layer_3" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_3_checkpoints/trainer_5_step_9764/eval_results.json b/resid_post_layer_3_checkpoints/trainer_5_step_9764/eval_results.json deleted file mode 100644 index e44ec2c742a69b3d561a83edd0b238e98400a7fe..0000000000000000000000000000000000000000 --- a/resid_post_layer_3_checkpoints/trainer_5_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 20.505210876464844, "l1_loss": 1237.8057373046875, "l0": 640.0, "frac_variance_explained": 0.9102000713348388, "cossim": 0.9658762633800506, "l2_ratio": 0.9668752193450928, "relative_reconstruction_bias": 1.0017230033874511, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4834362506866454, "loss_zero": 12.452933025360107, "frac_recovered": 0.9965657532215119, "frac_alive": 0.9698893427848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7/trainer_0/ae.pt b/resid_post_layer_7/trainer_0/ae.pt deleted file mode 100644 index c4e4eabde2a5083df49c0f9852e7852d47500219..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_0/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7dd5535509ef758d162d3c87918999d3034248858eaea5c25a29a59e68ca21f5 -size 339823400 diff --git a/resid_post_layer_7/trainer_0/config.json b/resid_post_layer_7/trainer_0/config.json deleted file mode 100644 index 581d92370a3215c2ea34640431e1164fb25a34c1..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_0/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7/trainer_0/eval_results.json b/resid_post_layer_7/trainer_0/eval_results.json deleted file mode 100644 index 24959af30af13c75cba4c0a0d74fffa6bab1fc6d..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_0/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 39.385193252563475, "l1_loss": 269.69310150146487, "l0": 20.0, "frac_variance_explained": 0.8598198533058167, "cossim": 0.9176318526268006, "l2_ratio": 0.9170765697956085, "relative_reconstruction_bias": 0.999529504776001, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.638276529312134, "loss_zero": 12.452933025360107, "frac_recovered": 0.9811272025108337, "frac_alive": 0.1595594584941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7/trainer_1/ae.pt b/resid_post_layer_7/trainer_1/ae.pt deleted file mode 100644 index a31a069fcbb228bb4ee46f07a4de1d9f35d6635a..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_1/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ad8281937ee0a3544da960a38f02163245c51a6ae85a6546ae4fc36f5581ac7c -size 339823400 diff --git a/resid_post_layer_7/trainer_1/config.json b/resid_post_layer_7/trainer_1/config.json deleted file mode 100644 index 2c26034300e760b863c603375dbd1194edfbf8c6..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_1/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7/trainer_1/eval_results.json b/resid_post_layer_7/trainer_1/eval_results.json deleted file mode 100644 index 6b13bf10028e03d0dab2f8333a3b247db91efbf8..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_1/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 35.6403621673584, "l1_loss": 266.82349090576173, "l0": 40.0, "frac_variance_explained": 0.8049931645393371, "cossim": 0.93520388007164, "l2_ratio": 0.9352702856063843, "relative_reconstruction_bias": 1.0001867175102235, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.551988196372986, "loss_zero": 12.452933025360107, "frac_recovered": 0.9897454440593719, "frac_alive": 0.284722238779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7/trainer_2/ae.pt b/resid_post_layer_7/trainer_2/ae.pt deleted file mode 100644 index 9d6eb5393c654e3edd71b0bd5850cef8ba47926b..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_2/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bda1599d4223b2f9c1cbbc8647f2dd7fd815fa11e1a6d11622671728e450deac -size 339823400 diff --git a/resid_post_layer_7/trainer_2/config.json b/resid_post_layer_7/trainer_2/config.json deleted file mode 100644 index 37c4235c7b8741e33bc5f5d977852f99d5d6ff0f..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_2/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7/trainer_2/eval_results.json b/resid_post_layer_7/trainer_2/eval_results.json deleted file mode 100644 index 7506913cd7ea8bd49585ca43cdf4e51bd7b7dbf9..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_2/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 32.920125007629395, "l1_loss": 377.2314727783203, "l0": 80.0, "frac_variance_explained": 0.8427301347255707, "cossim": 0.9465734004974365, "l2_ratio": 0.9462980151176452, "relative_reconstruction_bias": 0.999693489074707, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5190592050552367, "loss_zero": 12.452933025360107, "frac_recovered": 0.9930278241634369, "frac_alive": 0.4312608540058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7/trainer_3/ae.pt b/resid_post_layer_7/trainer_3/ae.pt deleted file mode 100644 index 801d26034648ce7f6689d1370e6b64b7b424925c..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_3/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cac232020de263a7df8de1cdb14bec48b2f0c4bbc06cf75038e1b03f5a702b04 -size 339823400 diff --git a/resid_post_layer_7/trainer_3/config.json b/resid_post_layer_7/trainer_3/config.json deleted file mode 100644 index ecc5290c7e5e9956ee4da47ff05ddf5822f7427f..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_3/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7/trainer_3/eval_results.json b/resid_post_layer_7/trainer_3/eval_results.json deleted file mode 100644 index bb7088505715be9e5f3839a48f390924ef53c661..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_3/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 31.11774444580078, "l1_loss": 587.7058166503906, "l0": 160.0, "frac_variance_explained": 0.8864160597324371, "cossim": 0.9515470147132874, "l2_ratio": 0.9520631015300751, "relative_reconstruction_bias": 1.0004171848297119, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4948626518249513, "loss_zero": 12.452933025360107, "frac_recovered": 0.9954373061656951, "frac_alive": 0.6174045205116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7/trainer_4/ae.pt b/resid_post_layer_7/trainer_4/ae.pt deleted file mode 100644 index 009599dc083b9b7f620e4c170b85130b29d7e427..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_4/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de5eddeb204b87d4a7d417133245c8d677966082898c7691a2590641f5ba9a56 -size 339823400 diff --git a/resid_post_layer_7/trainer_4/config.json b/resid_post_layer_7/trainer_4/config.json deleted file mode 100644 index 3737fe0fb2bd8228009b661a742b5a3d96075d73..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_4/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7/trainer_4/eval_results.json b/resid_post_layer_7/trainer_4/eval_results.json deleted file mode 100644 index 355b1bfd1c564a8bd28e411cc000b66e6cfab668..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_4/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 25.603483200073242, "l1_loss": 887.8896789550781, "l0": 320.0, "frac_variance_explained": 0.9075254380702973, "cossim": 0.9665767192840576, "l2_ratio": 0.966665506362915, "relative_reconstruction_bias": 1.000228750705719, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.477691173553467, "loss_zero": 12.452933025360107, "frac_recovered": 0.9971450984477996, "frac_alive": 0.6903212070465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7/trainer_5/ae.pt b/resid_post_layer_7/trainer_5/ae.pt deleted file mode 100644 index 3ca5d7672734d160cb26d54579aef140bc214a3a..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_5/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cb35656b2db5524c00244e221221cdb407219c3a4ca8deb29cc976c95bb1c560 -size 339823400 diff --git a/resid_post_layer_7/trainer_5/config.json b/resid_post_layer_7/trainer_5/config.json deleted file mode 100644 index 1ab67a8b4fc797e8eda2c1b258c91f387f78db80..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_5/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": 48828, - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7/trainer_5/eval_results.json b/resid_post_layer_7/trainer_5/eval_results.json deleted file mode 100644 index 3e98b3017af1fc79e7992218d65611e808bdf73d..0000000000000000000000000000000000000000 --- a/resid_post_layer_7/trainer_5/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 21.750783348083495, "l1_loss": 2076.0646728515626, "l0": 640.0, "frac_variance_explained": 0.9398650825023651, "cossim": 0.9774176478385925, "l2_ratio": 0.9777889370918273, "relative_reconstruction_bias": 1.0005208492279052, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.461538004875183, "loss_zero": 12.452933025360107, "frac_recovered": 0.9987489283084869, "frac_alive": 0.7127278447151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_0_step_19528/ae.pt b/resid_post_layer_7_checkpoints/trainer_0_step_19528/ae.pt deleted file mode 100644 index 2cd92fe8d82fea87f284ff12c09679ef94e832dc..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_0_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:80dbb596467b405448a086168d515354faf2dbc1847d3afa799c795340857e51 -size 339823704 diff --git a/resid_post_layer_7_checkpoints/trainer_0_step_19528/config.json b/resid_post_layer_7_checkpoints/trainer_0_step_19528/config.json deleted file mode 100644 index 2a46539d75d8fa358b79a532c107b705122d2d27..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_0_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_0_step_19528/eval_results.json b/resid_post_layer_7_checkpoints/trainer_0_step_19528/eval_results.json deleted file mode 100644 index 99950f45460b047126a5a6a71ff0cd0b015c244b..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_0_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 40.97785682678223, "l1_loss": 224.113671875, "l0": 20.0, "frac_variance_explained": 0.796733170747757, "cossim": 0.9149265289306641, "l2_ratio": 0.9156940042972564, "relative_reconstruction_bias": 0.9999639749526977, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6481229782104494, "loss_zero": 12.452933025360107, "frac_recovered": 0.9801438331604004, "frac_alive": 0.1609700471162796, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_0_step_29292/ae.pt b/resid_post_layer_7_checkpoints/trainer_0_step_29292/ae.pt deleted file mode 100644 index 172ee5ce5a941e5cefe9df77fdf95eeb4a5e884e..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_0_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3e13c0970a9edbdb70e8b3033b8b861df18b5b59b6a202a83c78891a06992a4f -size 339823704 diff --git a/resid_post_layer_7_checkpoints/trainer_0_step_29292/config.json b/resid_post_layer_7_checkpoints/trainer_0_step_29292/config.json deleted file mode 100644 index c15be8dd8bdd3cb276fb526b5dfae21aa8924669..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_0_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_0_step_29292/eval_results.json b/resid_post_layer_7_checkpoints/trainer_0_step_29292/eval_results.json deleted file mode 100644 index 9e8e25f7ae2128bc18ec646b51f2227801f0ddfb..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_0_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 41.959268951416014, "l1_loss": 214.32296295166014, "l0": 20.0, "frac_variance_explained": 0.7623796999454499, "cossim": 0.908824360370636, "l2_ratio": 0.9102616131305694, "relative_reconstruction_bias": 1.001514995098114, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6436038970947267, "loss_zero": 12.452933025360107, "frac_recovered": 0.980591356754303, "frac_alive": 0.1611870676279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_0_step_4882/ae.pt b/resid_post_layer_7_checkpoints/trainer_0_step_4882/ae.pt deleted file mode 100644 index b97b4e08b72f60d1ffff8ee7ece7a16f5bbdc3ce..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_0_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a5a9e11fa9d4d5e87ba7415560de1bad276af4bbacd13ee4ce63668cd0f83df -size 339823504 diff --git a/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json b/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json deleted file mode 100644 index 12dfa36d75e25bf017adc94825111a3867b04009..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json b/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json deleted file mode 100644 index 893767de981a4d86d8fd3fd51002dca8437a4c86..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 43.85188674926758, "l1_loss": 220.95366516113282, "l0": 20.0, "frac_variance_explained": 0.7532911002635956, "cossim": 0.9032427906990051, "l2_ratio": 0.9006875157356262, "relative_reconstruction_bias": 0.9967345774173737, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6938873529434204, "loss_zero": 12.452933025360107, "frac_recovered": 0.9755684912204743, "frac_alive": 0.1538628488779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_0_step_9764/ae.pt b/resid_post_layer_7_checkpoints/trainer_0_step_9764/ae.pt deleted file mode 100644 index ca1ec50048f9a8c6c3807abd5a168a92f096709b..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_0_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c472169fe268d76426f9ce9fa43da69bb1611c74d6c4e99e2de39fab4e6fa71d -size 339823504 diff --git a/resid_post_layer_7_checkpoints/trainer_0_step_9764/config.json b/resid_post_layer_7_checkpoints/trainer_0_step_9764/config.json deleted file mode 100644 index cf8c36acbf8d2dff1e9acec518fa300dcb78ef6a..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_0_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 20, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_0_step_9764/eval_results.json b/resid_post_layer_7_checkpoints/trainer_0_step_9764/eval_results.json deleted file mode 100644 index 509229230282626da8dcbaf2a003d06e91c47fb5..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_0_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 44.27531471252441, "l1_loss": 227.16014251708984, "l0": 20.0, "frac_variance_explained": 0.7727083802223206, "cossim": 0.9027702331542968, "l2_ratio": 0.9043409764766693, "relative_reconstruction_bias": 1.0013017654418945, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.6606419324874877, "loss_zero": 12.452933025360107, "frac_recovered": 0.9788933575153351, "frac_alive": 0.1574435830116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_1_step_19528/ae.pt b/resid_post_layer_7_checkpoints/trainer_1_step_19528/ae.pt deleted file mode 100644 index 8c40ca022ce7bba589bebce6952f61d92124b068..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_1_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fedd2a6888b06a935a49a996fcfc3711e57cdc58c6fe378224a6c7bff78a4809 -size 339823704 diff --git a/resid_post_layer_7_checkpoints/trainer_1_step_19528/config.json b/resid_post_layer_7_checkpoints/trainer_1_step_19528/config.json deleted file mode 100644 index b74072331f2d9f312d862c90cc5e95a41b0f5c71..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_1_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_1_step_19528/eval_results.json b/resid_post_layer_7_checkpoints/trainer_1_step_19528/eval_results.json deleted file mode 100644 index b22764017c520a38716064263c188e90f535ebac..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_1_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 36.736992263793944, "l1_loss": 305.2062561035156, "l0": 40.0, "frac_variance_explained": 0.8604458928108215, "cossim": 0.9312281787395478, "l2_ratio": 0.9330651462078094, "relative_reconstruction_bias": 1.001002162694931, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.556801986694336, "loss_zero": 12.452933025360107, "frac_recovered": 0.9892597913742065, "frac_alive": 0.28857421875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_1_step_29292/ae.pt b/resid_post_layer_7_checkpoints/trainer_1_step_29292/ae.pt deleted file mode 100644 index 913ad3f324b2c11268bfc23b154ae572b928bfbb..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_1_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ccf390e5488e679acb56688308584bc285597c8915dca6135e010955223bf4e5 -size 339823704 diff --git a/resid_post_layer_7_checkpoints/trainer_1_step_29292/config.json b/resid_post_layer_7_checkpoints/trainer_1_step_29292/config.json deleted file mode 100644 index 4d6d3e71f718b028621bc364786e84e1f3f1644e..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_1_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_1_step_29292/eval_results.json b/resid_post_layer_7_checkpoints/trainer_1_step_29292/eval_results.json deleted file mode 100644 index b300a4f82101283d134a294d9fb84a60940f496a..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_1_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 35.72008857727051, "l1_loss": 292.7100036621094, "l0": 40.0, "frac_variance_explained": 0.8577439606189727, "cossim": 0.9344999670982361, "l2_ratio": 0.9352131724357605, "relative_reconstruction_bias": 1.0005686342716218, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.555193829536438, "loss_zero": 12.452933025360107, "frac_recovered": 0.9894222438335418, "frac_alive": 0.2881944477558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_1_step_4882/ae.pt b/resid_post_layer_7_checkpoints/trainer_1_step_4882/ae.pt deleted file mode 100644 index 94b81921a6f10899c7a59e01712446058a165f29..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_1_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9de66bd462f9b5a826d4e48f53b6e21a9f8c0badad1748c74cb44f281598eb20 -size 339823504 diff --git a/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json b/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json deleted file mode 100644 index 6b8252e906c73149d35de504f1dd0915b73f8e1a..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json b/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json deleted file mode 100644 index 79ffaf71187f61feb1df0bf74ae01e8c549b3da2..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 39.11642150878906, "l1_loss": 308.71179046630857, "l0": 40.0, "frac_variance_explained": 0.820146119594574, "cossim": 0.9230989933013916, "l2_ratio": 0.9211092412471771, "relative_reconstruction_bias": 0.9978638172149659, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.58606071472168, "loss_zero": 12.452933025360107, "frac_recovered": 0.9863392353057862, "frac_alive": 0.2887912392616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_1_step_9764/ae.pt b/resid_post_layer_7_checkpoints/trainer_1_step_9764/ae.pt deleted file mode 100644 index 356dcb2b21591d88fd101d458ffba7ad9bce1d71..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_1_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7e890993b3c29a1b4d0cbccf7c878e413c6adeb39dff2d1a27946949b9dfed9d -size 339823504 diff --git a/resid_post_layer_7_checkpoints/trainer_1_step_9764/config.json b/resid_post_layer_7_checkpoints/trainer_1_step_9764/config.json deleted file mode 100644 index 6167b8b1220acaedbabefc1167c3b2cbe871233c..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_1_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 40, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_1_step_9764/eval_results.json b/resid_post_layer_7_checkpoints/trainer_1_step_9764/eval_results.json deleted file mode 100644 index f4664a72c2740431f814020dc401a1db81797f63..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_1_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 37.83385543823242, "l1_loss": 287.71506958007814, "l0": 40.0, "frac_variance_explained": 0.810006731748581, "cossim": 0.9278073966503143, "l2_ratio": 0.9288983643054962, "relative_reconstruction_bias": 1.0017506897449493, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5620718479156492, "loss_zero": 12.452933025360107, "frac_recovered": 0.9887315511703492, "frac_alive": 0.285210520029068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_2_step_19528/ae.pt b/resid_post_layer_7_checkpoints/trainer_2_step_19528/ae.pt deleted file mode 100644 index 5fdd67c33659449fafe656785e2171eec02bb6ed..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_2_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f901321da52a015fa48c2ec5f200f541f396c53cb5b46edb521832aa3d989a5 -size 339823704 diff --git a/resid_post_layer_7_checkpoints/trainer_2_step_19528/config.json b/resid_post_layer_7_checkpoints/trainer_2_step_19528/config.json deleted file mode 100644 index 17dc4ac5cc6132f53ff3c7a3c6b81a8c8067b388..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_2_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_2_step_19528/eval_results.json b/resid_post_layer_7_checkpoints/trainer_2_step_19528/eval_results.json deleted file mode 100644 index 6b1d35b94db6ca07b965083653ab8b9fa92d16d1..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_2_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 34.54673595428467, "l1_loss": 380.01134033203124, "l0": 80.0, "frac_variance_explained": 0.8351876437664032, "cossim": 0.9389269590377808, "l2_ratio": 0.9404280722141266, "relative_reconstruction_bias": 1.0014681100845337, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.526151704788208, "loss_zero": 12.452933025360107, "frac_recovered": 0.9923255324363709, "frac_alive": 0.434136301279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_2_step_29292/ae.pt b/resid_post_layer_7_checkpoints/trainer_2_step_29292/ae.pt deleted file mode 100644 index ab75e0188b2ca0aa50194b7a83195303fe0c83a6..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_2_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:93d4cd4cae2c54bbed4ba155c31660b499de9c2e0efc88ab0a3494a80c2c9ddf -size 339823704 diff --git a/resid_post_layer_7_checkpoints/trainer_2_step_29292/config.json b/resid_post_layer_7_checkpoints/trainer_2_step_29292/config.json deleted file mode 100644 index 99aa125ad9e207c99ebabb7970233a236b8d5eb3..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_2_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_2_step_29292/eval_results.json b/resid_post_layer_7_checkpoints/trainer_2_step_29292/eval_results.json deleted file mode 100644 index bdc2bc9956a80212f6521797ef60c4b6b267f3df..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_2_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 33.95244846343994, "l1_loss": 395.76544494628905, "l0": 80.0, "frac_variance_explained": 0.8481647431850433, "cossim": 0.9428203880786896, "l2_ratio": 0.9439214825630188, "relative_reconstruction_bias": 1.000917023420334, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.522923541069031, "loss_zero": 12.452933025360107, "frac_recovered": 0.9926421821117402, "frac_alive": 0.44482421875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_2_step_4882/ae.pt b/resid_post_layer_7_checkpoints/trainer_2_step_4882/ae.pt deleted file mode 100644 index 818ba7c1fa3701075b0ed808aff87430ca922673..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_2_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99a1a1142ae3b94324310d21485ae4a519facaa75ab3d1195d48bcd54d8d23fe -size 339823504 diff --git a/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json b/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json deleted file mode 100644 index e5d16fcfa7132f9db658b9f3e858e19fadd8897d..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json b/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json deleted file mode 100644 index 6e26a88730b98c70e9e32163f0d7ef3f055b8985..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 36.37924575805664, "l1_loss": 385.1722045898438, "l0": 80.0, "frac_variance_explained": 0.8252128601074219, "cossim": 0.9325456142425537, "l2_ratio": 0.934538209438324, "relative_reconstruction_bias": 1.001760905981064, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5465892553329468, "loss_zero": 12.452933025360107, "frac_recovered": 0.9902855455875397, "frac_alive": 0.4774305522441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_2_step_9764/ae.pt b/resid_post_layer_7_checkpoints/trainer_2_step_9764/ae.pt deleted file mode 100644 index c53fee34947342cf0a36e77f0076e53695084ecc..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_2_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8e7793e4e61d242572fc41a027d27038773114bf38017bd1ef31c78042ff20e7 -size 339823504 diff --git a/resid_post_layer_7_checkpoints/trainer_2_step_9764/config.json b/resid_post_layer_7_checkpoints/trainer_2_step_9764/config.json deleted file mode 100644 index 9e9afcd584aa060126b44de0b39e9b5b1367e1e1..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_2_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 80, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_2_step_9764/eval_results.json b/resid_post_layer_7_checkpoints/trainer_2_step_9764/eval_results.json deleted file mode 100644 index f450c911cd08194fd817fbd7512d1d2d5bf31117..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_2_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 34.86564693450928, "l1_loss": 385.8026153564453, "l0": 80.0, "frac_variance_explained": 0.836561405658722, "cossim": 0.9394849181175232, "l2_ratio": 0.939175671339035, "relative_reconstruction_bias": 0.9997531950473786, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5293116569519043, "loss_zero": 12.452933025360107, "frac_recovered": 0.9920073211193084, "frac_alive": 0.4495442807674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_3_step_19528/ae.pt b/resid_post_layer_7_checkpoints/trainer_3_step_19528/ae.pt deleted file mode 100644 index c5fbf2b11d214e4bbcd91f62b9755c85d2e4442d..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_3_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:68fbbb7ee8c90f8e14639a56b55c2adcbf6b0792936e10554dd267f17f2a2ac8 -size 339823704 diff --git a/resid_post_layer_7_checkpoints/trainer_3_step_19528/config.json b/resid_post_layer_7_checkpoints/trainer_3_step_19528/config.json deleted file mode 100644 index f6bb17e0e46e1d3d6d4d43e4d6adc628f5fc043c..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_3_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_3_step_19528/eval_results.json b/resid_post_layer_7_checkpoints/trainer_3_step_19528/eval_results.json deleted file mode 100644 index 33fe552ca93cae294474cfac9fdb36e6e1190cd4..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_3_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 31.254188537597656, "l1_loss": 522.552392578125, "l0": 160.0, "frac_variance_explained": 0.8500134646892548, "cossim": 0.9505284130573273, "l2_ratio": 0.9510213494300842, "relative_reconstruction_bias": 1.0003615200519562, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.498537039756775, "loss_zero": 12.452933025360107, "frac_recovered": 0.995071941614151, "frac_alive": 0.5815972089767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_3_step_29292/ae.pt b/resid_post_layer_7_checkpoints/trainer_3_step_29292/ae.pt deleted file mode 100644 index 4a60d31cf8da2dbdf9cacdbd39f4f979977fbdac..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_3_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:38fa3fecf4e7755a2e389cc9ba90cb0a8cea6bd07b32f76e52fbde21606c9dd6 -size 339823704 diff --git a/resid_post_layer_7_checkpoints/trainer_3_step_29292/config.json b/resid_post_layer_7_checkpoints/trainer_3_step_29292/config.json deleted file mode 100644 index cbd2a0393321b7a4d930e8f65da54f8552873ee5..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_3_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_3_step_29292/eval_results.json b/resid_post_layer_7_checkpoints/trainer_3_step_29292/eval_results.json deleted file mode 100644 index d496252e61b15e8ef49b7def041767f706d8d85e..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_3_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 31.243211936950683, "l1_loss": 553.372265625, "l0": 160.0, "frac_variance_explained": 0.8649967312812805, "cossim": 0.950848001241684, "l2_ratio": 0.9528935134410859, "relative_reconstruction_bias": 1.0015978693962098, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.497640013694763, "loss_zero": 12.452933025360107, "frac_recovered": 0.9951586544513702, "frac_alive": 0.6100803017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_3_step_4882/ae.pt b/resid_post_layer_7_checkpoints/trainer_3_step_4882/ae.pt deleted file mode 100644 index f2fc33cf9e198248c2039455feaabe687087018a..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_3_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9781a49f0d23a21bbbd874c1168920bd625f188a50a71c87cbbc25da8ee5409c -size 339823504 diff --git a/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json b/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json deleted file mode 100644 index 08c98a5f9541c975fb079ca53d07e5cd5f40b066..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json b/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json deleted file mode 100644 index 41634ff94dceba39a96a313e146f8bb8f6e5d13b..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 35.319954299926756, "l1_loss": 523.6402954101562, "l0": 160.0, "frac_variance_explained": 0.8372977256774903, "cossim": 0.9389589726924896, "l2_ratio": 0.939053213596344, "relative_reconstruction_bias": 0.9996998012065887, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.519640302658081, "loss_zero": 12.452933025360107, "frac_recovered": 0.9929706990718842, "frac_alive": 0.704698383808136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_3_step_9764/ae.pt b/resid_post_layer_7_checkpoints/trainer_3_step_9764/ae.pt deleted file mode 100644 index 0edd10f70b635e1a01c282e9471a27ea60577c75..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_3_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:24a98ade7695b316a939a242444f8b143183c23d9f25534b99ac564cedbb720b -size 339823504 diff --git a/resid_post_layer_7_checkpoints/trainer_3_step_9764/config.json b/resid_post_layer_7_checkpoints/trainer_3_step_9764/config.json deleted file mode 100644 index 89730e5ac10a496de4840c865f9bff0aa4767514..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_3_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 160, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_3_step_9764/eval_results.json b/resid_post_layer_7_checkpoints/trainer_3_step_9764/eval_results.json deleted file mode 100644 index 2d6b6128cc9cda0c96d054a71174a5d4de162454..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_3_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 34.36524696350098, "l1_loss": 555.1797607421875, "l0": 160.0, "frac_variance_explained": 0.8513323128223419, "cossim": 0.945607328414917, "l2_ratio": 0.9461180448532105, "relative_reconstruction_bias": 1.0003768801689148, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.5059152603149415, "loss_zero": 12.452933025360107, "frac_recovered": 0.9943358719348907, "frac_alive": 0.6633571982383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_4_step_19528/ae.pt b/resid_post_layer_7_checkpoints/trainer_4_step_19528/ae.pt deleted file mode 100644 index eb9dc23883309e6165f0e0b4a63f08e1691e1149..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_4_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a5551598b9ef49e75ed6297e30d02ce4e58b03b401c911b8fd7bcdd0678891f8 -size 339823704 diff --git a/resid_post_layer_7_checkpoints/trainer_4_step_19528/config.json b/resid_post_layer_7_checkpoints/trainer_4_step_19528/config.json deleted file mode 100644 index 49ee9bcd605ce777e6e423957d98f22b80200d39..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_4_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_4_step_19528/eval_results.json b/resid_post_layer_7_checkpoints/trainer_4_step_19528/eval_results.json deleted file mode 100644 index eb4ed9ff62cd2da0e66bb055e297bb4fe1375a09..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_4_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 27.15417594909668, "l1_loss": 891.0104736328125, "l0": 320.0, "frac_variance_explained": 0.9106251835823059, "cossim": 0.9636231184005737, "l2_ratio": 0.9649718701839447, "relative_reconstruction_bias": 1.0003974497318269, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4835067272186278, "loss_zero": 12.452933025360107, "frac_recovered": 0.9965704083442688, "frac_alive": 0.7428928017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_4_step_29292/ae.pt b/resid_post_layer_7_checkpoints/trainer_4_step_29292/ae.pt deleted file mode 100644 index 77968b4ad473e49fbfc5155ed6f7f3cdd1c7f8e2..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_4_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6df77d32d13353f21e7346c1b6733b3b82437d345fb6772f1dcc22cfb9a41c08 -size 339823704 diff --git a/resid_post_layer_7_checkpoints/trainer_4_step_29292/config.json b/resid_post_layer_7_checkpoints/trainer_4_step_29292/config.json deleted file mode 100644 index aae2c845863467858e26f06ad16169c0fa33cbf3..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_4_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_4_step_29292/eval_results.json b/resid_post_layer_7_checkpoints/trainer_4_step_29292/eval_results.json deleted file mode 100644 index be174fc2f739cd7b2fa63b90d980ee21b90aea10..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_4_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 26.882869529724122, "l1_loss": 908.14228515625, "l0": 320.0, "frac_variance_explained": 0.9116247355937958, "cossim": 0.9638674855232239, "l2_ratio": 0.9651795566082001, "relative_reconstruction_bias": 1.0015326201915742, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4805299997329713, "loss_zero": 12.452933025360107, "frac_recovered": 0.9968637824058533, "frac_alive": 0.7108832597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_4_step_4882/ae.pt b/resid_post_layer_7_checkpoints/trainer_4_step_4882/ae.pt deleted file mode 100644 index 103869fb96e3df1ff2cbcdebede450daa5d64816..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_4_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b8d3830d2b7cee7c478d31a2e3121cd418d06044dab23d688a935c8c415b608b -size 339823504 diff --git a/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json b/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json deleted file mode 100644 index 749867021ed493c8597171f69e3253b1930d7797..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json b/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json deleted file mode 100644 index dc5613685175946837d5b030796d3a1a3dad1165..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 31.69829444885254, "l1_loss": 762.1271850585938, "l0": 320.0, "frac_variance_explained": 0.8956417977809906, "cossim": 0.9532331645488739, "l2_ratio": 0.9554112255573273, "relative_reconstruction_bias": 1.003282082080841, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.494775891304016, "loss_zero": 12.452933025360107, "frac_recovered": 0.9954480111598969, "frac_alive": 0.9065212607383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_4_step_9764/ae.pt b/resid_post_layer_7_checkpoints/trainer_4_step_9764/ae.pt deleted file mode 100644 index bd88250ded3c97af5b8de6340d7ea10419e50b86..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_4_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2a5a45999524bc0678fe5fd7db4d92a3ec2de7aae5b53c7ee77123275066bfc3 -size 339823504 diff --git a/resid_post_layer_7_checkpoints/trainer_4_step_9764/config.json b/resid_post_layer_7_checkpoints/trainer_4_step_9764/config.json deleted file mode 100644 index 258dad0207b66848f6f441de9c92d715fd90d91d..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_4_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 320, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_4_step_9764/eval_results.json b/resid_post_layer_7_checkpoints/trainer_4_step_9764/eval_results.json deleted file mode 100644 index cf16565bc8defc41519091f119aa4b72ff602e5c..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_4_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 29.911967849731447, "l1_loss": 843.8339965820312, "l0": 320.0, "frac_variance_explained": 0.8969981431961059, "cossim": 0.9574037551879883, "l2_ratio": 0.9580581486225128, "relative_reconstruction_bias": 0.9999774336814881, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4840301752090452, "loss_zero": 12.452933025360107, "frac_recovered": 0.996515566110611, "frac_alive": 0.8080512285232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_5_step_19528/ae.pt b/resid_post_layer_7_checkpoints/trainer_5_step_19528/ae.pt deleted file mode 100644 index af7711e67b9ba2f596f6e8f3714b06abc056b31e..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_5_step_19528/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fbf5c85cf95af657bf925a5a2fda9b482629c36c8b52c93d1067a28f763926b2 -size 339823704 diff --git a/resid_post_layer_7_checkpoints/trainer_5_step_19528/config.json b/resid_post_layer_7_checkpoints/trainer_5_step_19528/config.json deleted file mode 100644 index 5b0c9addf97ca39ccb34ccfabd235a1c3a936e9b..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_5_step_19528/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "19528", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_5_step_19528/eval_results.json b/resid_post_layer_7_checkpoints/trainer_5_step_19528/eval_results.json deleted file mode 100644 index 5f9c786f3bc04e1ae125fdcd5a61e16c098e895a..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_5_step_19528/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 22.559234428405762, "l1_loss": 1923.7697143554688, "l0": 640.0, "frac_variance_explained": 0.9371443152427673, "cossim": 0.9742399871349334, "l2_ratio": 0.9747077763080597, "relative_reconstruction_bias": 0.9993009388446807, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.465738582611084, "loss_zero": 12.452933025360107, "frac_recovered": 0.9983337461948395, "frac_alive": 0.7103407382965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_5_step_29292/ae.pt b/resid_post_layer_7_checkpoints/trainer_5_step_29292/ae.pt deleted file mode 100644 index b1539f441e4bc4cb4350173526d2cc2fa2c57e54..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_5_step_29292/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:15ccee6ed117ae160a9222bde8466e5129ef209ff2178ce8d787f8966ea50300 -size 339823704 diff --git a/resid_post_layer_7_checkpoints/trainer_5_step_29292/config.json b/resid_post_layer_7_checkpoints/trainer_5_step_29292/config.json deleted file mode 100644 index ee0c8409f06918cc03bee77ce90e1494ac64a249..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_5_step_29292/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "29292", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_5_step_29292/eval_results.json b/resid_post_layer_7_checkpoints/trainer_5_step_29292/eval_results.json deleted file mode 100644 index 4bec5f2efff6e4ba84ca3c5b33d62c605c1bf012..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_5_step_29292/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 21.497556304931642, "l1_loss": 1915.4122436523437, "l0": 640.0, "frac_variance_explained": 0.9303101122379303, "cossim": 0.9766062378883362, "l2_ratio": 0.9777574419975281, "relative_reconstruction_bias": 1.001092267036438, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4637466192245485, "loss_zero": 12.452933025360107, "frac_recovered": 0.9985301256179809, "frac_alive": 0.6819118857383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_5_step_4882/ae.pt b/resid_post_layer_7_checkpoints/trainer_5_step_4882/ae.pt deleted file mode 100644 index a9d0e77f3156120ac1f86b52a2436940108460b3..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_5_step_4882/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:053ab640cd5e138d2b39389fc15815b882d391cc36ba79cb6f2c33411235e4d1 -size 339823504 diff --git a/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json b/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json deleted file mode 100644 index 7dbbc38438f17caf2b2b64a4ee181e159c9d35bd..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "4882", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json b/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json deleted file mode 100644 index fcc250067cbeb27d34147076ad6eea8e81ee4f5f..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 26.21021385192871, "l1_loss": 1639.9731567382812, "l0": 640.0, "frac_variance_explained": 0.9196356236934662, "cossim": 0.966629022359848, "l2_ratio": 0.9678321003913879, "relative_reconstruction_bias": 1.0004817128181458, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.4677053689956665, "loss_zero": 12.452933025360107, "frac_recovered": 0.9981360375881195, "frac_alive": 0.8402235507965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_5_step_9764/ae.pt b/resid_post_layer_7_checkpoints/trainer_5_step_9764/ae.pt deleted file mode 100644 index 0a56a20dc4f3dfac36a5e6520172b41f52fea95e..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_5_step_9764/ae.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4d1328d5e8e00ff98f42923fb9456fa6703f7e05af83710f6c689e04c3f0b2d7 -size 339823504 diff --git a/resid_post_layer_7_checkpoints/trainer_5_step_9764/config.json b/resid_post_layer_7_checkpoints/trainer_5_step_9764/config.json deleted file mode 100644 index 1b58af69c4861d2c86857bec9f9de7ee9cfaedb0..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_5_step_9764/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "trainer": { - "trainer_class": "TrainerTopK", - "dict_class": "AutoEncoderTopK", - "lr": 0.0001885618083164127, - "steps": "9764", - "seed": 0, - "activation_dim": 2304, - "dict_size": 18432, - "k": 640, - "device": "cuda:0", - "layer": 7, - "lm_name": "google/gemma-2-2b", - "wandb_name": "TopKTrainer-google/gemma-2-2b-resid_post_layer_7", - "submodule_name": "resid_post_layer_7" - }, - "buffer": { - "d_submodule": 2304, - "io": "out", - "n_ctxs": 2000, - "ctx_len": 128, - "refresh_batch_size": 32, - "out_batch_size": 4096, - "device": "cuda:0" - } -} \ No newline at end of file diff --git a/resid_post_layer_7_checkpoints/trainer_5_step_9764/eval_results.json b/resid_post_layer_7_checkpoints/trainer_5_step_9764/eval_results.json deleted file mode 100644 index b539be0b8f2c6545be4342c7e6d29fab75ec8934..0000000000000000000000000000000000000000 --- a/resid_post_layer_7_checkpoints/trainer_5_step_9764/eval_results.json +++ /dev/null @@ -1 +0,0 @@ -{"l2_loss": 23.994207000732423, "l1_loss": 1734.673193359375, "l0": 640.0, "frac_variance_explained": 0.9146796822547912, "cossim": 0.972093015909195, "l2_ratio": 0.973492443561554, "relative_reconstruction_bias": 1.0015430510044099, "loss_original": 2.4489264488220215, "loss_reconstructed": 2.466175389289856, "loss_zero": 12.452933025360107, "frac_recovered": 0.9982897996902466, "frac_alive": 0.7409396767616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file