diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..dff15b7dd860d2c1de9f4cf7766d170240e4bcb0 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dcfa434a82a9bbcc9863757599777159bb9db31af0e68383e9dba21883bbafb +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f45ab1e9904e6f12d2e0805d6c2839fc554eacd7 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5f68192364b7c5e67681ee29cb347a63ef8338b9 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.7389051914215088, "l1_loss": 131.95904541015625, "l0": 407.3849792480469, "frac_variance_explained": 0.9963776767253876, "cossim": 0.9922364354133606, "l2_ratio": 0.9922811985015869, "relative_reconstruction_bias": 1.0007711052894592, "loss_original": 5.109375, "loss_reconstructed": 5.203125, "loss_zero": 13.5625, "frac_recovered": 0.98828125, "frac_alive": 0.45361328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7459f05449716a1e2bb04d020ee68ea1d8bec98 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32354d48b22953b9d7e305ace8ccb667512ab1a385fdafe6cb7d1c62f0a232f6 +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f9a2ed58bb7b737be6e4277b59929b395bdfd51 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..77bd56c02579a99f081ffed3e62c1cd598f9ec52 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.42098043859004974, "l1_loss": 221.11109161376953, "l0": 606.5899658203125, "frac_variance_explained": 0.9997320473194122, "cossim": 0.9995822012424469, "l2_ratio": 1.0001044273376465, "relative_reconstruction_bias": 1.0014447569847107, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.48046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a8582d98a54e7d6a339f2f96fb324856a734a6e --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69a08bdd1d11bca24e9dc22bbb206ed65d68bc33bb227bc94fea19a342440309 +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3497e7b1612df00ea087431ffa03f49078fbe4d0 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.5, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6b5d30aaf6a69383ce0f910574777fa70efc8645 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_10/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.303561568260193, "l1_loss": 71.1842155456543, "l0": 130.38500213623047, "frac_variance_explained": 0.953961968421936, "cossim": 0.9726590514183044, "l2_ratio": 0.9666237831115723, "relative_reconstruction_bias": 0.9969744980335236, "loss_original": 5.109375, "loss_reconstructed": 5.34375, "loss_zero": 13.5625, "frac_recovered": 0.96875, "frac_alive": 0.29925537109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..eddba1ed95c543b0794fea9e47b36d32f4cbd2fc --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fea5a4cbcffc318ae74d8f96ee6a5e7cf9844a7f9ba8897d06ff2dcecf2504f +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d9c864a75265dda3b617ebdbdd1a36c60cf515ee --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.5, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..de9fc4438feffe20a430b5355084a279ded83400 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_11/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.5663819313049316, "l1_loss": 122.03194046020508, "l0": 241.2249984741211, "frac_variance_explained": 0.9915377795696259, "cossim": 0.9824479222297668, "l2_ratio": 0.9817214906215668, "relative_reconstruction_bias": 0.9983848631381989, "loss_original": 5.109375, "loss_reconstructed": 5.234375, "loss_zero": 13.5625, "frac_recovered": 0.984375, "frac_alive": 0.363037109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0bc7f9f1117510e85a800a1c87adb7fa9252642 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3885681809f2fddbf063b10077898ed4f516532fd29577202f34a0222d339bef +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b8b5fccb66500ccbea6863b22d8798b08ca7c60b --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.7, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..af86c4f1782d5eaf41fe2b7f1ac03aad3a7694e9 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_12/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.748379111289978, "l1_loss": 60.0474910736084, "l0": 96.12499618530273, "frac_variance_explained": 0.9261168241500854, "cossim": 0.9605966508388519, "l2_ratio": 0.9601201415061951, "relative_reconstruction_bias": 1.0017150044441223, "loss_original": 5.109375, "loss_reconstructed": 5.453125, "loss_zero": 13.5625, "frac_recovered": 0.9609375, "frac_alive": 0.797119140625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c183dfa97de79bf07bd3200ce406d47db1a2534 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79ec504fa45bfb3d62d2d94b88fccad6d89ec472399fc161535aa113b1db683e +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/config.json new file mode 100644 index 0000000000000000000000000000000000000000..519f17f14ddd2dd6ab41391b25ee3d2522b58ab8 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.7, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b1424c9d877d293c33b56e3aa796fddaef29a0c8 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_13/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.400195837020874, "l1_loss": 103.83464813232422, "l0": 125.91999816894531, "frac_variance_explained": 0.9450815618038177, "cossim": 0.9684658944606781, "l2_ratio": 0.9677082598209381, "relative_reconstruction_bias": 1.0002705752849579, "loss_original": 5.109375, "loss_reconstructed": 5.375, "loss_zero": 13.5625, "frac_recovered": 0.96875, "frac_alive": 0.603515625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a49d3b834e8429e5d7a27efa1df1efcb74a8d1f --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:055b53bbbde076f75d8811d691b8dd5a31779e183f50b504cc7aafd44887cd2c +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7cee69ba50745eb06f02544340ad83b96f35c8e2 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.7, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0dda43792617ad833205d9466c76a5122fe40386 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_14/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.701678156852722, "l1_loss": 59.957834243774414, "l0": 87.77499771118164, "frac_variance_explained": 0.9801019430160522, "cossim": 0.964617520570755, "l2_ratio": 0.9765737950801849, "relative_reconstruction_bias": 1.0015305280685425, "loss_original": 5.109375, "loss_reconstructed": 5.40625, "loss_zero": 13.5625, "frac_recovered": 0.96875, "frac_alive": 0.25762939453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b21c1926ce46a393c901d2bf9bac81d87b6f19f --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9019bb938a02b4dadebcef1b98fd6800876b6630d62c7058d1ea506f9670ee00 +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/config.json new file mode 100644 index 0000000000000000000000000000000000000000..734143ca13da26d310ede37249f2d465377b01f2 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.7, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e0deccd4a6eda3a14c9102f9e45abd65898f2b27 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_15/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.244529366493225, "l1_loss": 120.72209167480469, "l0": 134.03500366210938, "frac_variance_explained": 0.9876659214496613, "cossim": 0.9710160791873932, "l2_ratio": 0.9653101563453674, "relative_reconstruction_bias": 0.9983204305171967, "loss_original": 5.109375, "loss_reconstructed": 5.328125, "loss_zero": 13.5625, "frac_recovered": 0.97265625, "frac_alive": 0.2999267578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..846567e526e3ba101d0a84b8d065b1fdc9373c39 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a48ac1307ad50ac232ab00d4f4637d4d6bb41ae53baf63d9278f73e94cda1e8c +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..03962032c5e0e66fcd20bff401c3cd07f5d7442c --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.9, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..29c95fbe8eccc95f9f0038c380a1f097bff2e8ed --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_16/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.192893028259277, "l1_loss": 51.17982292175293, "l0": 68.81499862670898, "frac_variance_explained": 0.8996331989765167, "cossim": 0.9526893496513367, "l2_ratio": 0.957265168428421, "relative_reconstruction_bias": 1.0063791275024414, "loss_original": 5.109375, "loss_reconstructed": 5.5, "loss_zero": 13.5625, "frac_recovered": 0.95703125, "frac_alive": 0.694091796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae6cba50747262504e0c808f84d0df8a60b6de78 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a4a5cb499690d90aa667bf599608380f4c2c09248a1adb47959266238071574 +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a07c500680f21e9bdc45424d7649f0216b66f803 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.9, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7e024266229cb061803a653b3ba699396673a45a --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_17/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.077711820602417, "l1_loss": 99.79027938842773, "l0": 79.9749984741211, "frac_variance_explained": 0.9596592485904694, "cossim": 0.9543067216873169, "l2_ratio": 0.9473299384117126, "relative_reconstruction_bias": 0.9983561635017395, "loss_original": 5.109375, "loss_reconstructed": 5.546875, "loss_zero": 13.5625, "frac_recovered": 0.951171875, "frac_alive": 0.476318359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..63402e50e281c7443599899d75be724781ad540c --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e34d8d1ae12ae97e6bda7ff5c02fdf6e88ae88689fe1bddae57d001aec7fa6a +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b3a1d3006ed6f8ac8bf7d6fd374e8fc3b041c0e4 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.9, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5b02e56bb7d0dba4cdc052a6e7fb4ba39a9f78f8 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_18/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.091695547103882, "l1_loss": 48.56572914123535, "l0": 60.5049991607666, "frac_variance_explained": 0.956131637096405, "cossim": 0.9561952650547028, "l2_ratio": 0.9431371688842773, "relative_reconstruction_bias": 0.9953996241092682, "loss_original": 5.109375, "loss_reconstructed": 5.46875, "loss_zero": 13.5625, "frac_recovered": 0.95703125, "frac_alive": 0.2049560546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..66800cb76e98fdd79894918195c421cbf055a90c --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d0c40f77fa82b3ee99367aa1c9c6fddfa4568a522b2950f72000e5e92086124 +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d2e2e97c0a113bdcf1109eac00d4248561d99e53 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.9, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2f22a877f1c98ef537f78c9594950501ce42cd22 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_19/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.802935004234314, "l1_loss": 91.14981079101562, "l0": 90.1349983215332, "frac_variance_explained": 0.9635432958602905, "cossim": 0.9614428579807281, "l2_ratio": 0.9609856605529785, "relative_reconstruction_bias": 0.9999611675739288, "loss_original": 5.109375, "loss_reconstructed": 5.390625, "loss_zero": 13.5625, "frac_recovered": 0.96875, "frac_alive": 0.2374267578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d020615b837dca4d82647ba5514a96079d1f07ba --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbda330eb07bb354e45b709ed66b72e46e00ff36053039ab13a79c508a30aafc +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..898d0d606161366195bf7578169f68db46f6cfbd --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..59ceaa92278e1db22bddc073573fe81adbb53aa7 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.8509351015090942, "l1_loss": 131.13249969482422, "l0": 396.88499450683594, "frac_variance_explained": 0.9975889325141907, "cossim": 0.9916780889034271, "l2_ratio": 0.9986166059970856, "relative_reconstruction_bias": 1.0027380585670471, "loss_original": 5.109375, "loss_reconstructed": 5.234375, "loss_zero": 13.5625, "frac_recovered": 0.984375, "frac_alive": 0.1112060546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f29a7e42869ae65fc0b346078694b7d163abf4c --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ee0bbb022a986edae00574b8fbbfa1fca21624fe23929d848901c0aad59c508 +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c95ae9e6339c3393f323df624b786f7c64c2d1ad --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..129abc7907eedcd3b69e3ddaf921424ce886222e --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.43181225657463074, "l1_loss": 216.08472442626953, "l0": 590.9849853515625, "frac_variance_explained": 0.9997637271881104, "cossim": 0.9995618760585785, "l2_ratio": 1.0001021027565002, "relative_reconstruction_bias": 1.0001211762428284, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.15557861328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..edf3615c27f45339cc5ba5d2a5fcd09b98bf7a24 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc4d313ec47bc3cd65ef3b01a0f2eeb3c11cfc32cb71d09162755f2e7283e7da +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3860470e34210d3a4f909c1d6b519c7eb058b29d --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.3, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6a087f7db1b661da2a05733fa112202fe709a87a --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.3242627382278442, "l1_loss": 103.32945251464844, "l0": 252.79999542236328, "frac_variance_explained": 0.9789170026779175, "cossim": 0.9860871434211731, "l2_ratio": 0.9804092943668365, "relative_reconstruction_bias": 0.9959313869476318, "loss_original": 5.109375, "loss_reconstructed": 5.234375, "loss_zero": 13.5625, "frac_recovered": 0.984375, "frac_alive": 0.826416015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..60fa1e9d4098ebad1a471cbf21e5db58646db66b --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23f9e5f6e411e78c58f132b1da3a717b5ce8175edca4916da2a0c98fa18ddc07 +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a05e4bd39536f00f0759a0d60bf7ada8832b0418 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.3, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d30f85ca84bdc796400fd917888eb146e1c17e16 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.2129348516464233, "l1_loss": 197.0015640258789, "l0": 379.8599853515625, "frac_variance_explained": 0.9937676191329956, "cossim": 0.9962087273597717, "l2_ratio": 0.9963479340076447, "relative_reconstruction_bias": 1.0004808902740479, "loss_original": 5.109375, "loss_reconstructed": 5.140625, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.5693359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..10bbc2597055a75ed930ed0b23328a586d5dbe4c --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08572b71d671661971159a29db5a44e18796435245ba85ea709d45e041a82edc +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/config.json new file mode 100644 index 0000000000000000000000000000000000000000..78d0d994c8b67c16a04e0a2ad5e8bfc20092bdf0 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.3, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..735e2b9385d1af3896c67c8f0b719b83856fa5e6 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_6/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.5198302268981934, "l1_loss": 99.1658821105957, "l0": 223.1649932861328, "frac_variance_explained": 0.9766861200332642, "cossim": 0.9830348789691925, "l2_ratio": 0.980072021484375, "relative_reconstruction_bias": 0.9981227815151215, "loss_original": 5.109375, "loss_reconstructed": 5.28125, "loss_zero": 13.5625, "frac_recovered": 0.984375, "frac_alive": 0.25921630859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_7/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_7/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..3913569dc82fb74f6a1c9e4ce87fdca03ae64690 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_7/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7ef56a63576896e7c510405b9baf81b06a8f322ff3ee3e5f0c9a4ac3d73e0dc +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_7/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_7/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ee2f97835788bc00ef308183c67d104c45df8e47 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_7/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.3, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_7/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_7/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e8af9267f4c1e761e7bb34c57acfca09f3d0232f --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_7/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.375364363193512, "l1_loss": 178.87802124023438, "l0": 459.0299987792969, "frac_variance_explained": 0.9939996600151062, "cossim": 0.9952470064163208, "l2_ratio": 0.99808269739151, "relative_reconstruction_bias": 1.0008546710014343, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.35467529296875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_8/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_8/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..bacbb184c620abe9c945284fdd82da0306d398c1 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_8/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aacd6b0edfda1ffa462f9b9b933e936ecc5e14fe4a959eb0e60b62e1d1e1cdb +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_8/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_8/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3e27f5e68ef67873d5a1021ef00b2b0a3e276117 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_8/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.5, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_8/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_8/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c5d95f2639a78078186387209ad60cc19c0698a3 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_8/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.177458882331848, "l1_loss": 76.50387954711914, "l0": 147.06499481201172, "frac_variance_explained": 0.9791415333747864, "cossim": 0.9747806787490845, "l2_ratio": 0.9815819263458252, "relative_reconstruction_bias": 1.0009117126464844, "loss_original": 5.109375, "loss_reconstructed": 5.34375, "loss_zero": 13.5625, "frac_recovered": 0.97265625, "frac_alive": 0.866943359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_9/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_9/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca68d2a4dc51e72341487b521f29c6330e4b49b9 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_9/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bffdfd9da1c16f62a8abcb06d558e500d4d97034911155779b117cb4c75799d5 +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_9/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_9/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9d87a02c0f06cf5f12ecc198b98a033f5f1846d9 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_9/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.5, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_9/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_9/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9fbb16e75502b467c75cf76a08664f8280077847 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_3/trainer_9/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.5680607557296753, "l1_loss": 132.91940307617188, "l0": 224.09999084472656, "frac_variance_explained": 0.9906136989593506, "cossim": 0.9822552502155304, "l2_ratio": 0.9823748171329498, "relative_reconstruction_bias": 1.0007079243659973, "loss_original": 5.109375, "loss_reconstructed": 5.234375, "loss_zero": 13.5625, "frac_recovered": 0.984375, "frac_alive": 0.68505859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_0/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..889bba3db488ac4b00448b1c599183b612cb24a1 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a52adbd818cc717f76bc74cf4cebed71c18ae4bae1fb57c266fb9965ec5755 +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_0/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f6547bbb9f29734136a561ee8fa16aa8360530c --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_0/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_0/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e05d5ce1a5e057c4940cbbbf48d864cab305492b --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.1015034914016724, "l1_loss": 157.79371643066406, "l0": 447.23500061035156, "frac_variance_explained": 0.9945869445800781, "cossim": 0.9909184873104095, "l2_ratio": 0.9937534928321838, "relative_reconstruction_bias": 1.0032199025154114, "loss_original": 5.109375, "loss_reconstructed": 5.265625, "loss_zero": 10.25, "frac_recovered": 0.970703125, "frac_alive": 0.57080078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_1/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b36e8fc99d024d4c98348862b32a1a664c3a71e4 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd4c3e097e92af09a68ea483d21120ab2c010f690f4d9ddc13f8f4478e52f04 +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_1/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..273440390f7e92bd8d7e976806c25d2c06e71d5c --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_1/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_1/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3951c757d7335ec6eecb999a33a86fa8bcbc2d8b --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.4438294917345047, "l1_loss": 243.34272003173828, "l0": 670.1249694824219, "frac_variance_explained": 0.9996143281459808, "cossim": 0.9995978474617004, "l2_ratio": 0.9998750388622284, "relative_reconstruction_bias": 1.000118374824524, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 10.25, "frac_recovered": 0.99609375, "frac_alive": 0.51904296875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_10/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_10/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e20cb14beb3426cf3cb8f043c9264b60cd72fdd --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_10/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec371091a96c1a8c88d2a154b109c8c2e243813bf4ffd65878561fdc0cff96b4 +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_10/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_10/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ac272437302743c87af488f8eca4575f6e4127cb --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_10/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.5, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_10/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_10/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..08c3404960eaed6bf575ed73bef3b08930d0e31a --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_10/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.355391263961792, "l1_loss": 89.64961242675781, "l0": 185.51499938964844, "frac_variance_explained": 0.9566273391246796, "cossim": 0.976550430059433, "l2_ratio": 0.9768743813037872, "relative_reconstruction_bias": 1.0025873184204102, "loss_original": 5.109375, "loss_reconstructed": 5.296875, "loss_zero": 10.25, "frac_recovered": 0.962890625, "frac_alive": 0.39385986328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_11/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_11/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..41dbb938a841ce7c165e726bffc161bae8741606 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_11/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3169fe90b3adae567c3911e2b5396c4cb16401cd881004cb7777b78c7edec1f4 +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_11/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_11/config.json new file mode 100644 index 0000000000000000000000000000000000000000..558e663f90494aa59dc9e8bc798ce0f0c6d4a03e --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_11/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.5, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_11/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_11/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9caa6463b62a5f75f1dbec1277f051c25ee99e54 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_11/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.5382686853408813, "l1_loss": 136.68033599853516, "l0": 349.23500061035156, "frac_variance_explained": 0.9688377976417542, "cossim": 0.9864452183246613, "l2_ratio": 0.9831381738185883, "relative_reconstruction_bias": 0.997850775718689, "loss_original": 5.109375, "loss_reconstructed": 5.1875, "loss_zero": 10.25, "frac_recovered": 0.984375, "frac_alive": 0.4822998046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_12/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_12/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d2ddc65031ab88dfe0c487c8443ef309764e262e --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_12/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0473de572ce24c302f22983887678fec2e5fff50b7ab4198b908e4d931ce4ecc +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_12/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_12/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1fc96b3f411228a9336377bf1d437648d53e9977 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_12/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.7, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_12/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_12/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..563edb272c88466da017cdf6b7142167dae2732e --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_12/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.10287070274353, "l1_loss": 80.71371841430664, "l0": 140.92499542236328, "frac_variance_explained": 0.9564054310321808, "cossim": 0.96378493309021, "l2_ratio": 0.9669409096240997, "relative_reconstruction_bias": 1.001811444759369, "loss_original": 5.109375, "loss_reconstructed": 5.328125, "loss_zero": 10.25, "frac_recovered": 0.958984375, "frac_alive": 0.927978515625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_13/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_13/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1aeb82dbe114795952077f1b59af95626bb565b --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_13/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b94052c89e4e837cf57c415627377dea04f35acf917f12eeb52da824de5975f +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_13/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_13/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b00343a7360f21184b1b834c93c07b3e3178d372 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_13/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.7, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_13/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_13/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5d108642d3c41234006570784f0b3d3ca701b7f2 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_13/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.6238642930984497, "l1_loss": 111.26651000976562, "l0": 181.40999603271484, "frac_variance_explained": 0.9572663903236389, "cossim": 0.9720774590969086, "l2_ratio": 0.9714520275592804, "relative_reconstruction_bias": 1.0007032454013824, "loss_original": 5.109375, "loss_reconstructed": 5.25, "loss_zero": 10.25, "frac_recovered": 0.97265625, "frac_alive": 0.820556640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_14/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_14/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9d9ea71207c2beb6b8516b798b9d68e07f27397 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_14/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ad90fbcb69a6ae9bb5db3076ea5d6ccf37a5f02527f444325761112cf07ebd +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_14/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_14/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6cc082ae028ee9e8506dd4ace188d05ae9a65aa8 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_14/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.7, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_14/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_14/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c41bc689491a034547cffea4703b557eec7624ca --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_14/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.100494861602783, "l1_loss": 71.07941436767578, "l0": 126.03999710083008, "frac_variance_explained": 0.9571558833122253, "cossim": 0.9647511839866638, "l2_ratio": 0.955089807510376, "relative_reconstruction_bias": 0.9961054027080536, "loss_original": 5.109375, "loss_reconstructed": 5.34375, "loss_zero": 10.25, "frac_recovered": 0.955078125, "frac_alive": 0.35986328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_15/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_15/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..11de66495f4e2019d15316a1d06272d9f1ec01ba --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_15/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f939535090aa7b33fe3e324269ca48a84753e68aeff85bf99d5f9193fd76b583 +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_15/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_15/config.json new file mode 100644 index 0000000000000000000000000000000000000000..be1396d56fcf37586d293572c8b695591387674f --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_15/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.7, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_15/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_15/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..637b4ce8b9701342ff9b77adeab546840f70a93a --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_15/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.4895445108413696, "l1_loss": 96.07304000854492, "l0": 210.3699951171875, "frac_variance_explained": 0.9376392662525177, "cossim": 0.9741920530796051, "l2_ratio": 0.9717768132686615, "relative_reconstruction_bias": 0.9996769726276398, "loss_original": 5.109375, "loss_reconstructed": 5.265625, "loss_zero": 10.25, "frac_recovered": 0.96875, "frac_alive": 0.43914794921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_16/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_16/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d746075f50e4ad06bb51522ea83679ef9e9939c --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_16/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70fc7d47b2b1f356bad6dbe8e0b5df024d670f67737af21ab5601a986e3af309 +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_16/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..84ad329d0c970dd5aa8c290b1b7ee9c935fad1d3 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_16/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.9, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_16/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_16/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6293dbeed92add458651aae1d4199e1f12ff996a --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_16/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.652043342590332, "l1_loss": 65.01736259460449, "l0": 91.68999481201172, "frac_variance_explained": 0.9426365494728088, "cossim": 0.9524129331111908, "l2_ratio": 0.9474399089813232, "relative_reconstruction_bias": 0.9983129799365997, "loss_original": 5.109375, "loss_reconstructed": 5.4375, "loss_zero": 10.25, "frac_recovered": 0.935546875, "frac_alive": 0.84130859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_17/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_17/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..56a120e3431dcd7af3f4da4cca276d2953a42266 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_17/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d72503cfac6080442f67f0e4d08cd799d9640be2313e5bce7460119a3fa1861 +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_17/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fdca6322c0d1cb8e0d5bbd385e31570ba6f6cf40 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_17/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.9, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_17/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_17/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..078c5820ffe30e6fd60073d8f39480e891d86f8b --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_17/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.453660726547241, "l1_loss": 90.71004486083984, "l0": 121.21999740600586, "frac_variance_explained": 0.9363422095775604, "cossim": 0.9571953117847443, "l2_ratio": 0.9606445729732513, "relative_reconstruction_bias": 1.0029930472373962, "loss_original": 5.109375, "loss_reconstructed": 5.34375, "loss_zero": 10.25, "frac_recovered": 0.955078125, "frac_alive": 0.770751953125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_18/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_18/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2af259d9ab0d6708abd02ccdd7a5e928ba7372b4 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_18/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51f86985140a396b7e00301dd2e5365aed0df58f461d32c3c5e0cf662b9abdb9 +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_18/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_18/config.json new file mode 100644 index 0000000000000000000000000000000000000000..beb499e0d667ae2a0094c173d892da51e90f0286 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_18/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.9, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_18/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_18/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..31c762fd7e752302e4b9cc764ad2bf8889ab6659 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_18/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.544029951095581, "l1_loss": 64.54390335083008, "l0": 93.29499816894531, "frac_variance_explained": 0.9530273377895355, "cossim": 0.9571136236190796, "l2_ratio": 0.9798197150230408, "relative_reconstruction_bias": 1.0121071338653564, "loss_original": 5.109375, "loss_reconstructed": 5.328125, "loss_zero": 10.25, "frac_recovered": 0.958984375, "frac_alive": 0.31329345703125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_19/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_19/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9136b143c653ceb6b3981088b587370823522d64 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_19/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0f3bf186541734348fa5fa7392396e9d10f576ab8a3485be289d3e2b75ea55f +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_19/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7d98a4c31317da80f8b07b6e662c5204bb777a3a --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_19/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.9, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_19/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_19/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7f800e7c0ad7c47e18270a98cc538b9a41253877 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_19/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.1556220054626465, "l1_loss": 85.76207733154297, "l0": 132.45499420166016, "frac_variance_explained": 0.9457806944847107, "cossim": 0.9629827737808228, "l2_ratio": 0.9646439850330353, "relative_reconstruction_bias": 1.002303421497345, "loss_original": 5.109375, "loss_reconstructed": 5.3125, "loss_zero": 10.25, "frac_recovered": 0.9609375, "frac_alive": 0.3802490234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_2/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e95f658978f92180f62bfc0973507b37b9caac16 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd83f7a405b932f0caddf73fa6fec769e8cbb6098cceaacc50438ac13f1a4e9 +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_2/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..25c3b1a68afa9c4b70c14c7345634ab9b6c08a65 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_2/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_2/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f575099dc342f83e7cecaeb0da7dc7f93c624d61 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.022075593471527, "l1_loss": 159.20934295654297, "l0": 437.47999572753906, "frac_variance_explained": 0.9937213957309723, "cossim": 0.991720050573349, "l2_ratio": 1.0012033581733704, "relative_reconstruction_bias": 1.010353922843933, "loss_original": 5.109375, "loss_reconstructed": 5.171875, "loss_zero": 10.25, "frac_recovered": 0.98828125, "frac_alive": 0.13287353515625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_3/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d38283c86e89cc9bfe8246c3aea2dfbb12fa083 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f64b507c50a1ed1c807dc7e065816fa3be9bb9fd538224c5a71d57d85debc31 +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_3/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cc6251bf863fdc7ac6c66632f7c559e2d50c7c74 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_3/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_3/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f57db145a6ee1658a09dc233dd24d79c5069e85a --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.4588934928178787, "l1_loss": 251.64488983154297, "l0": 626.2799682617188, "frac_variance_explained": 0.999695897102356, "cossim": 0.9996055960655212, "l2_ratio": 0.9995828866958618, "relative_reconstruction_bias": 1.0001797676086426, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.16845703125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_4/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f747bfce426ac0e73eb786934232614a085a303 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3206745529020dcb6e89d5a423f5eb6379b18426b746ea2807715a79164f29eb +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_4/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1622145e79df80ae332ed4591f8328e36a8fac18 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_4/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.3, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_4/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2e5270dd06de695fd4c69c850306ca0bec0ebecb --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.5167651176452637, "l1_loss": 138.07901763916016, "l0": 320.7649841308594, "frac_variance_explained": 0.976967453956604, "cossim": 0.9864054322242737, "l2_ratio": 0.9795602262020111, "relative_reconstruction_bias": 0.9929051101207733, "loss_original": 5.109375, "loss_reconstructed": 5.15625, "loss_zero": 10.25, "frac_recovered": 0.990234375, "frac_alive": 0.845947265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_5/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5776666aa25cbd3cfae635ce2eec5f26dba4c53 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bb3496923bd86cb2a621baea0cd94aeace666b9f727dd9ed089b73763dcd875 +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_5/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d80e5783eb0c670d587ff285bd37b9de876d79b4 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_5/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.3, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_5/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ac4b8904e41244ab7b34beba6903bc74bae9e131 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.1042667031288147, "l1_loss": 231.8655776977539, "l0": 416.26499938964844, "frac_variance_explained": 0.9974260926246643, "cossim": 0.9976780116558075, "l2_ratio": 0.9985581636428833, "relative_reconstruction_bias": 1.0010928511619568, "loss_original": 5.109375, "loss_reconstructed": 5.140625, "loss_zero": 10.25, "frac_recovered": 0.9921875, "frac_alive": 0.610595703125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_6/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_6/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d7a089e7e577689bea22173b8844d345a7a94a3 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_6/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05d028bd8eee0b5597f02f4d928b9311ee65a5e6ec6f240d22e657c8174c2138 +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_6/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_6/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3b8be631ca044dabbc0a257507a46ec912c75dfa --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_6/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.3, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_6/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_6/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7e1ec1527e7e2795b3ad8a00734ddb3d6e890367 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_6/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.697478413581848, "l1_loss": 130.88229370117188, "l0": 293.4949951171875, "frac_variance_explained": 0.9774413704872131, "cossim": 0.9855411946773529, "l2_ratio": 0.9804334044456482, "relative_reconstruction_bias": 0.9974142014980316, "loss_original": 5.109375, "loss_reconstructed": 5.15625, "loss_zero": 10.25, "frac_recovered": 0.990234375, "frac_alive": 0.28546142578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_7/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_7/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea63809f24e341cd73a8adee28a51030e510f302 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_7/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ac68da5383e8fe7aa05c551ac91823f6fd9af48a88e926c1f09db74cef855f +size 67309718 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_7/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_7/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6e09ed8896743995469a8952a4ec83d589505a08 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_7/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.3, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_7/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_7/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..69d0366a334d284794d56ef49632406f1d14940b --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_7/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.298915684223175, "l1_loss": 212.2872543334961, "l0": 552.8450012207031, "frac_variance_explained": 0.9958517253398895, "cossim": 0.9966045916080475, "l2_ratio": 0.9983577728271484, "relative_reconstruction_bias": 1.0026415586471558, "loss_original": 5.109375, "loss_reconstructed": 5.140625, "loss_zero": 10.25, "frac_recovered": 0.9921875, "frac_alive": 0.431396484375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_8/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_8/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fedcb9d9af3124c68fdce67408f14f2c9f608db5 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_8/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4995c37696daf7f88a767b84cd6eea4ba4f3e8769dccb1d04bce062fabcdf50c +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_8/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_8/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f1c85baa77ebe8828a01de5d8a2308e12e26c1a --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_8/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.5, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_8/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_8/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..73c77140e8fcaa2493d471568e7930cb9320e3c2 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_8/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.303896188735962, "l1_loss": 101.02452087402344, "l0": 211.1449966430664, "frac_variance_explained": 0.9402211010456085, "cossim": 0.976602166891098, "l2_ratio": 0.9772749245166779, "relative_reconstruction_bias": 1.001644790172577, "loss_original": 5.109375, "loss_reconstructed": 5.234375, "loss_zero": 10.25, "frac_recovered": 0.9765625, "frac_alive": 0.94970703125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_9/ae.pt b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_9/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e775de0dd2b14ab05671509304e146f8dda856d --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_9/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:562112841b266b9a76000fa8b273b66df5711aecb103889759a895be1e5e1005 +size 16830614 diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_9/config.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_9/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3e36ed8294135bcd85cefe5c8ef841de4d89baa1 --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_9/config.json @@ -0,0 +1,25 @@ +{ + "trainer": { + "dict_class": "GatedAutoEncoder", + "trainer_class": "GatedSAETrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.5, + "warmup_steps": 1000, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "GatedSAETrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_9/eval_results.json b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_9/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4c37a80b3bf0bed67f90fd702aaf6e80ca5b4f8a --- /dev/null +++ b/pythia70m_sweep_gated_ctx128_0730/resid_post_layer_4/trainer_9/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.4558699131011963, "l1_loss": 160.01457977294922, "l0": 303.6599884033203, "frac_variance_explained": 0.9797276258468628, "cossim": 0.9876122772693634, "l2_ratio": 0.9878036081790924, "relative_reconstruction_bias": 1.0016292035579681, "loss_original": 5.109375, "loss_reconstructed": 5.203125, "loss_zero": 10.25, "frac_recovered": 0.98046875, "frac_alive": 0.809814453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_0/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f25278fb2681b3d312d1f5f3bffca47d36f6334d --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c18b2eceb76fc9471a2281476127f41998c71bd2e798190293a215bfecaf826c +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_0/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..913f3820cb4cc3d36f5f7f2fffafb985f0e579a1 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_0/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.01, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_0/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..419fbe71f1d310a7b24a580513bff660e414c9cd --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.21609389036893845, "l1_loss": 150.67198944091797, "l0": 510.75498962402344, "frac_variance_explained": 0.99986132979393, "cossim": 0.9998809993267059, "l2_ratio": 0.9997950494289398, "relative_reconstruction_bias": 0.9997367262840271, "loss_original": 5.109375, "loss_reconstructed": 5.140625, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.253662109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_1/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f51f894a5f087232c9a4171acef8337fbd9e57fe --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4305ecf766abd68b196d02ebcdb81db5a20da1383c220e924ec75b21fe7e73 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_1/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..72fd06a09f83b5427ba9ef82319ddd067cbcd7cb --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_1/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.01, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_1/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..32b3ff2d2efdf6e5bc5fe663fed76f0144ca9eda --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.02932343166321516, "l1_loss": 155.51602172851562, "l0": 587.2949829101562, "frac_variance_explained": 0.9999975860118866, "cossim": 0.9999980926513672, "l2_ratio": 1.0003092885017395, "relative_reconstruction_bias": 1.000268816947937, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.288330078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_10/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_10/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d2df0f3a9a85532f1ec0a4ea05f268681d9310b --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_10/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:507f4eaa36c87b047771d0829fa6b6be7c15b2a00d6d55a4801a28411c629795 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_10/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_10/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9e4e93b04058b6bdd657e2b78dd6808bd882a9f4 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_10/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.03, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_10/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_10/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4c656f4feed1ef0953cc50c3189fd14a1d8523a7 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_10/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.0480795204639435, "l1_loss": 131.7548370361328, "l0": 421.4849853515625, "frac_variance_explained": 0.9954095184803009, "cossim": 0.9975515305995941, "l2_ratio": 0.9946234822273254, "relative_reconstruction_bias": 0.9981786608695984, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.0633544921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_11/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_11/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..204f430f96a45cbf1b63da606150737e4e9eaf4a --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_11/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59b1c66afb50f3e6d3e55ae99983a21d39c3393b7e69985cc080cc806f51c649 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_11/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_11/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1b4a07e7d9984197503084d2abf0ed1069770003 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_11/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.03, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_11/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_11/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5ca0861e1badaa5a19995eca2f3575d6fdba8d69 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_11/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.1948772370815277, "l1_loss": 139.05583953857422, "l0": 522.8099670410156, "frac_variance_explained": 0.9995833039283752, "cossim": 0.9999565184116364, "l2_ratio": 0.9990783631801605, "relative_reconstruction_bias": 0.999512255191803, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.07342529296875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_12/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_12/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2df11e362d6fc27c1a07b62007245464d5272044 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_12/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2163d0bd2b7424c78349c84d9343998ba66f1d77988fe7c05c4795aa146042 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_12/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_12/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b6a00d65116a6803e0859cf273b68bb28638f491 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_12/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.04, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_12/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_12/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d833b9de747b39d5be569f659b6d9459258a7245 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_12/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.416576623916626, "l1_loss": 43.03148651123047, "l0": 66.84499740600586, "frac_variance_explained": 0.891884982585907, "cossim": 0.9436231255531311, "l2_ratio": 0.9368505477905273, "relative_reconstruction_bias": 0.996953547000885, "loss_original": 5.109375, "loss_reconstructed": 5.671875, "loss_zero": 13.5625, "frac_recovered": 0.935546875, "frac_alive": 0.611572265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_13/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_13/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..abbc2ac75c7f573e2cc29af8a34e83fc3ae1090a --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_13/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c93fb604a85e8c1369c11be2883547814a6a1571ad3c575abb61fd64fd9274 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_13/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_13/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c766b8c67e19bc5b2ef993e88f43d4a43f78967e --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_13/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.04, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_13/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_13/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e72dba630cb2ff0a6cbcde394d7912f809b966fb --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_13/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.8315067291259766, "l1_loss": 51.60461616516113, "l0": 104.375, "frac_variance_explained": 0.9285378754138947, "cossim": 0.959200382232666, "l2_ratio": 0.9464247524738312, "relative_reconstruction_bias": 0.9940671026706696, "loss_original": 5.109375, "loss_reconstructed": 5.515625, "loss_zero": 13.5625, "frac_recovered": 0.955078125, "frac_alive": 0.77294921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_14/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_14/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..42c177713621951bd63acc55c106b734cb8ff95a --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_14/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df7fee42b0e4422166b5b7549c9a89be29f8149d706728c662548a53004b29eb +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_14/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_14/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bfb101725fd1c1da5f8cd8e929c664c935085d27 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_14/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.04, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_14/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_14/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..332d7c4d4eeeae88dce075a462646c372d2cac3c --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_14/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.26875376701355, "l1_loss": 41.876617431640625, "l0": 57.474998474121094, "frac_variance_explained": 0.9721391499042511, "cossim": 0.9500290751457214, "l2_ratio": 0.9454397559165955, "relative_reconstruction_bias": 0.9992501735687256, "loss_original": 5.109375, "loss_reconstructed": 5.5625, "loss_zero": 13.5625, "frac_recovered": 0.94921875, "frac_alive": 0.19366455078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_15/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_15/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8854d0773cc3483c7b78d88b8cf36f4474d27668 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_15/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db44d0781d46116f9337c3c3c57abe9a1769fa19b28663fb568445d62788b8e +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_15/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_15/config.json new file mode 100644 index 0000000000000000000000000000000000000000..90670bf7a2fd113f9c3347a3e4a555bef62f8211 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_15/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.04, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_15/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_15/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9a94f782fe506ef7c89ed8f33845d16706fab58b --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_15/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.572789192199707, "l1_loss": 51.8031005859375, "l0": 100.07999801635742, "frac_variance_explained": 0.9845757782459259, "cossim": 0.963876485824585, "l2_ratio": 0.9531786143779755, "relative_reconstruction_bias": 0.9983784556388855, "loss_original": 5.109375, "loss_reconstructed": 5.40625, "loss_zero": 13.5625, "frac_recovered": 0.966796875, "frac_alive": 0.308837890625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_16/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_16/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d522dd4bd29ea29769d4f94b158629d37bb2b63d --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_16/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f78950bee4fdea609e0e470f40e693ca6124f739f0b87b5f8a3cb0cdd4d354a8 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_16/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0ca9ce346eced3b2d7cb237badc198d4b6d35d8b --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_16/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.05, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_16/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_16/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..91adba2bd42b8c8feed6bc3658523df56941fff0 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_16/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.12757682800293, "l1_loss": 29.82408618927002, "l0": 32.66999816894531, "frac_variance_explained": 0.8443773984909058, "cossim": 0.9261208176612854, "l2_ratio": 0.9158139526844025, "relative_reconstruction_bias": 0.9933127462863922, "loss_original": 5.109375, "loss_reconstructed": 5.9375, "loss_zero": 13.5625, "frac_recovered": 0.90234375, "frac_alive": 0.48193359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_17/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_17/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2930d5bc2ee850f3d8aa9bc13345be05ee4601a --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_17/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe31742b1510dc0c197d1b5a5609e2627e75055b9823ae5e653a5dc261fad646 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_17/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..25442755707ea62dc857150907b98d7c2f007bd9 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_17/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.05, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_17/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_17/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a83fe880b6dad930d1a327a1715c30dc94770881 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_17/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.712599039077759, "l1_loss": 34.4686164855957, "l0": 43.97999954223633, "frac_variance_explained": 0.9446156024932861, "cossim": 0.9378908276557922, "l2_ratio": 0.9257955849170685, "relative_reconstruction_bias": 0.9962490499019623, "loss_original": 5.109375, "loss_reconstructed": 5.9375, "loss_zero": 13.5625, "frac_recovered": 0.90234375, "frac_alive": 0.551025390625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_18/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_18/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f7542dbc95942a3f1f07412c24be036099ddd20 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_18/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c85e5224a4d07a538b97db81a6f8319b836c50c9f07e3c4534d34cd636bb52a6 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_18/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_18/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e97fe761a28bcfce8bed4d986d458c1ec9089e6e --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_18/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.05, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_18/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_18/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..02e30f1ccda6237846ca738f46620cea2a8e75d1 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_18/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.838374376296997, "l1_loss": 30.684948921203613, "l0": 31.459999084472656, "frac_variance_explained": 0.9372002482414246, "cossim": 0.936735600233078, "l2_ratio": 0.9288395345211029, "relative_reconstruction_bias": 0.9983713626861572, "loss_original": 5.109375, "loss_reconstructed": 5.765625, "loss_zero": 13.5625, "frac_recovered": 0.923828125, "frac_alive": 0.138916015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_19/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_19/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..adcf4703f2f2899a74afe10f17e8b62007e93c74 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_19/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ce3e9b474d4eb2aa23f7ec9b0a673af1cdc053f161b3590a56dc31a0382ab3a +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_19/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b703b3381f4a49869ff2f0ebbec444cde1015979 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_19/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.05, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_19/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_19/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..83420bceb8335f613d313bd5242681998837045e --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_19/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.419987201690674, "l1_loss": 34.6947078704834, "l0": 48.209999084472656, "frac_variance_explained": 0.9495670795440674, "cossim": 0.9466882050037384, "l2_ratio": 0.929094672203064, "relative_reconstruction_bias": 0.994190514087677, "loss_original": 5.109375, "loss_reconstructed": 5.59375, "loss_zero": 13.5625, "frac_recovered": 0.943359375, "frac_alive": 0.20941162109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_2/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad45cfa2421043603a89fe18778a31e5deac7552 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de926e37c393d19e56f2048a41d3d2f2ac1ce5054b1d04afdd0db2b103b34ea9 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_2/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1a591689ba0471664fb9188c4433628eb30aa253 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_2/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.01, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_2/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8a5ffc8ea140a25b5e7b73fcf8df4e61859f68a6 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.22388017922639847, "l1_loss": 155.8218002319336, "l0": 508.22499084472656, "frac_variance_explained": 0.9999211132526398, "cossim": 0.999888002872467, "l2_ratio": 0.9996789395809174, "relative_reconstruction_bias": 1.000136375427246, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.0628662109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_20/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_20/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..555eacc65b700a6d05ef446fa02adcb9c5a127d7 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_20/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e938228c4e6f9ae6c36e102752534b27fabc3d7a9a70a03288fb6c589ed1e6d4 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_20/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_20/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d24987b63026d77f4485bc74ce3b67f348c280fa --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_20/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.075, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_20/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_20/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1f9657cd3c4886698477edd8ed522d9e235fb2ea --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_20/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.81648063659668, "l1_loss": 20.48844051361084, "l0": 12.8149995803833, "frac_variance_explained": 0.8898823261260986, "cossim": 0.9034929275512695, "l2_ratio": 0.8993695676326752, "relative_reconstruction_bias": 0.9981004297733307, "loss_original": 5.109375, "loss_reconstructed": 6.53125, "loss_zero": 13.5625, "frac_recovered": 0.83203125, "frac_alive": 0.249755859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_21/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_21/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ccdcbb1f291e6104279b27b9bfe8f08f9bfeaa64 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_21/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbf277dd47726dede855febb5a81b3133f5ccaf4bc605f9f5b07ba5f851667db +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_21/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_21/config.json new file mode 100644 index 0000000000000000000000000000000000000000..77976d906c625bd2c65dd6b41e479f7d10c990e8 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_21/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.075, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_21/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_21/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7775050f9c83f1ced4a1672ab1d78d54d23ed5c7 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_21/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.78309178352356, "l1_loss": 21.81944751739502, "l0": 15.394999980926514, "frac_variance_explained": 0.8957757949829102, "cossim": 0.9020546674728394, "l2_ratio": 0.8816862106323242, "relative_reconstruction_bias": 0.9936496317386627, "loss_original": 5.109375, "loss_reconstructed": 6.484375, "loss_zero": 13.5625, "frac_recovered": 0.837890625, "frac_alive": 0.24560546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_22/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_22/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a3ca3cb66c310f5eee1895a79839683717d3e18 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_22/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a95a1aad853c399d9fd63c4b6bea826533b0a5de3c9d65f0e453eb70c2b45eee +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_22/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_22/config.json new file mode 100644 index 0000000000000000000000000000000000000000..69ad92c93c71792e21b53d90ffd03d5293053258 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_22/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.075, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_22/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_22/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..48d4a2a63619df77eb5f8ac473b32c7d62d22d68 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_22/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.818094968795776, "l1_loss": 20.369800567626953, "l0": 13.639999866485596, "frac_variance_explained": 0.8261236250400543, "cossim": 0.9050480723381042, "l2_ratio": 0.8951642513275146, "relative_reconstruction_bias": 0.9956602454185486, "loss_original": 5.109375, "loss_reconstructed": 6.265625, "loss_zero": 13.5625, "frac_recovered": 0.865234375, "frac_alive": 0.0767822265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_23/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_23/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..54ef8938325a3d921675f30306c87992413ad425 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_23/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9d162c25c8f1d4544fb0b75bd8496b7e5d2496f47216e6218bc8b71964106d9 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_23/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_23/config.json new file mode 100644 index 0000000000000000000000000000000000000000..be8fc4ba3a339471085b8b379aa206f4321edb56 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_23/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.075, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_23/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_23/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..302712ea3d001a8b57024d26ecfc21142affcad9 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_23/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.472687244415283, "l1_loss": 25.47298240661621, "l0": 17.045000076293945, "frac_variance_explained": 0.9460815191268921, "cossim": 0.9174149036407471, "l2_ratio": 0.9021809995174408, "relative_reconstruction_bias": 0.9954875409603119, "loss_original": 5.109375, "loss_reconstructed": 6.1875, "loss_zero": 13.5625, "frac_recovered": 0.875, "frac_alive": 0.0833740234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_24/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_24/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c2711998058ede59ba5b9a160586aed7895dedf --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_24/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f945e928e4ffba155ae213d5d72cafac089d0d1999e578053c53cf3051243e6 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_24/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_24/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6f898d965f3952ab15fe9a30d1eb0a0667f725a2 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_24/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.1, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_24/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_24/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..217b01a124ee67445aa97471fbab7baf9754e0ae --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_24/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.858829021453857, "l1_loss": 13.847587585449219, "l0": 6.924999952316284, "frac_variance_explained": 0.6133486926555634, "cossim": 0.8654587268829346, "l2_ratio": 0.8523955047130585, "relative_reconstruction_bias": 0.9852319955825806, "loss_original": 5.109375, "loss_reconstructed": 6.9375, "loss_zero": 13.5625, "frac_recovered": 0.78515625, "frac_alive": 0.14599609375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_25/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_25/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..872c01b849661a167c280bdfa19ceb411495055b --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_25/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff5621fae5e0f52ccb4a4ba4c2e9fec2c74f69d730dad0ba079e77f281517d8 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_25/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_25/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3d8e38cb38535f6ea41f2ee184f29896c877aa1e --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_25/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.1, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_25/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_25/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f35b8d3db43dc7cbd675bc79109dff36b8303dde --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_25/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.82185959815979, "l1_loss": 19.77766704559326, "l0": 7.565000057220459, "frac_variance_explained": 0.9435884654521942, "cossim": 0.8683286011219025, "l2_ratio": 0.8457324802875519, "relative_reconstruction_bias": 0.9974140822887421, "loss_original": 5.109375, "loss_reconstructed": 7.03125, "loss_zero": 13.5625, "frac_recovered": 0.7734375, "frac_alive": 0.119873046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_26/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_26/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b441d34ca9d7ede90dd758cd67ca5dc293fed9d5 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_26/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d18cd391458807c220787902ddb3f8350efc11d9f5cf9d873c25d36402d1f226 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_26/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_26/config.json new file mode 100644 index 0000000000000000000000000000000000000000..62b33d9cdaea6cb40a10db3d049033da5bcf6278 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_26/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.1, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_26/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_26/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5f27e4967614ef306d0ecf6813ca6b315ca65e47 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_26/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.5307347774505615, "l1_loss": 16.44059658050537, "l0": 7.454999923706055, "frac_variance_explained": 0.7779999077320099, "cossim": 0.87916100025177, "l2_ratio": 0.8708495199680328, "relative_reconstruction_bias": 0.9962940216064453, "loss_original": 5.109375, "loss_reconstructed": 6.75, "loss_zero": 13.5625, "frac_recovered": 0.80859375, "frac_alive": 0.04559326171875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_27/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_27/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..89f41c105b28ee320719e338b9ab571e6a906f1d --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_27/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87a7785905890808818fb5f8e17e6bf98af8786b396681ed7f62dd1fd503b3af +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_27/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_27/config.json new file mode 100644 index 0000000000000000000000000000000000000000..444e801dd798993bda6c01fde0caf15d6273e12f --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_27/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.1, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_27/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_27/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..06af8a4ff82ce65c22f318abb413cbdc51eeeb13 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_27/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.400873184204102, "l1_loss": 19.371691703796387, "l0": 8.315000057220459, "frac_variance_explained": 0.9406805038452148, "cossim": 0.8801274299621582, "l2_ratio": 0.8533273041248322, "relative_reconstruction_bias": 0.998054027557373, "loss_original": 5.109375, "loss_reconstructed": 6.828125, "loss_zero": 13.5625, "frac_recovered": 0.798828125, "frac_alive": 0.03961181640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_3/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1df68f9ea68259f96e9b4e1bd114ce808eef45d4 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0dec011d598b06cb583fbeaf81cddd9cfef85c317aab5a4e64484a0cf93383c +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_3/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dd7f7a50ae90ca8e321089f5d08912a308077451 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_3/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.01, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_3/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ac264fcfbecff00d2bfd7ffc60fbdeb5f0ff7b35 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.03100821003317833, "l1_loss": 153.1534652709961, "l0": 602.719970703125, "frac_variance_explained": 0.9999979734420776, "cossim": 0.9999979734420776, "l2_ratio": 0.9995081424713135, "relative_reconstruction_bias": 0.9999050796031952, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.07452392578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_4/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6321f54e2e536e60b127a9a93a13c7d17c5eabb --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2c38f55da931a46f691f2f19617aec8b55350198bbf571b97d305a60c331a87 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_4/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..60b22e1fecb3c87e52f85bd42fdd7eb9de8c2b71 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_4/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.02, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_4/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d379cbe514d96f4846e3a2e552f52290ca902f88 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.38318856060504913, "l1_loss": 144.01636505126953, "l0": 491.97499084472656, "frac_variance_explained": 0.9992590248584747, "cossim": 0.9996412992477417, "l2_ratio": 0.9988088011741638, "relative_reconstruction_bias": 0.9999025464057922, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.252197265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_5/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..9248d115a07cef9de32ed522b67c609dd8b1600d --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc2dffd3fb64d64cbdf6d8a71e8f5cc1a3d69cf367c041c47d7efbfc25acddb +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_5/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4ac8f8978b1884aba791a1f02100ffc5cd470c2a --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_5/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.02, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_5/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..41ef1bf909799d8b227fffb9d72ef18145c877b5 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.06978094391524792, "l1_loss": 145.9539794921875, "l0": 597.3599853515625, "frac_variance_explained": 0.999978095293045, "cossim": 0.9999900460243225, "l2_ratio": 0.999765932559967, "relative_reconstruction_bias": 0.9999020397663116, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.298828125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_6/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_6/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8234607641182fb61d10ab452ca80d9eb934c2b6 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_6/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb66b059e3804be824739f4842748b7801ba3545f3029431fd287ef30bca6c70 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_6/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_6/config.json new file mode 100644 index 0000000000000000000000000000000000000000..593dfae64967b9d0c3f89e3e3c5491e6df315e32 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_6/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.02, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_6/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_6/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fc74951da615e4ae1f37c5ddd865d531a55b4841 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_6/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.42643897235393524, "l1_loss": 148.5589370727539, "l0": 490.9599914550781, "frac_variance_explained": 0.9992411434650421, "cossim": 0.9995614290237427, "l2_ratio": 0.9989446997642517, "relative_reconstruction_bias": 0.9999562203884125, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.06304931640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_7/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_7/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e85e826ba284dff3e459a4cb41628579fe650709 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_7/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31a8b0ab5cb46b243e73e63d32e6e3158e7d882062bbe51d0a26b6dc47c55d21 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_7/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_7/config.json new file mode 100644 index 0000000000000000000000000000000000000000..941f2634bed6343194e290047b8170db032e1516 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_7/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.02, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_7/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_7/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..89e717716f805000fbf9608261ca1799a2bfb5dc --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_7/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.0824446864426136, "l1_loss": 146.33584594726562, "l0": 621.1149597167969, "frac_variance_explained": 0.999934583902359, "cossim": 0.9999889731407166, "l2_ratio": 0.9997517764568329, "relative_reconstruction_bias": 0.9995286762714386, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.0792236328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_8/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_8/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fe5d58ee07c22aa851af0e0c53ca72d5bcecd0f --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_8/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f349b3800d5f2ea0541665fd1db611545b1819b11cf0ee9b68277f14cbf36a23 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_8/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_8/config.json new file mode 100644 index 0000000000000000000000000000000000000000..eb0d4b2931064852f18df9d914d4ec29d259107d --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_8/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.03, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_8/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_8/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..72fafe6d4946ad5f8fdccdcd1222e8507e45e651 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_8/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.1386768817901611, "l1_loss": 130.00470733642578, "l0": 418.114990234375, "frac_variance_explained": 0.9965454936027527, "cossim": 0.9970986843109131, "l2_ratio": 0.9937970340251923, "relative_reconstruction_bias": 0.9983652234077454, "loss_original": 5.109375, "loss_reconstructed": 5.15625, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.252685546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_9/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_9/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae35f8b384d684cf8fa847b2f2fdaf30910e8754 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_9/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60685691e9030464973a695095824f9b3521d497786505316b4ef88787311eb4 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_9/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_9/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2cde3e19c962a2b5b4bc0ed9c30b216400f0f109 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_9/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.03, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_9/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_9/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9e71b462f1dedb3606b87c0a5122924b5a99253f --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_3/trainer_9/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.1700434610247612, "l1_loss": 141.11553192138672, "l0": 525.3999938964844, "frac_variance_explained": 0.9998012483119965, "cossim": 0.9999567270278931, "l2_ratio": 0.9991698265075684, "relative_reconstruction_bias": 0.9995779097080231, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.282958984375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_0/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fbdd04d564a329243849c64f98b72d5e3bac82d --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3601d0063efbc096c8894ec3cbf8921e1b27a29f1a0a355e0375708aa558a6aa +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_0/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..23a20a503c358ac03d4fd58b6d42107f56f63d72 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_0/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.01, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_0/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..13339f247bdde37aa8c984f78fe24609eba1675a --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.23356567323207855, "l1_loss": 175.58765411376953, "l0": 510.95997619628906, "frac_variance_explained": 0.9997811317443848, "cossim": 0.9998801350593567, "l2_ratio": 0.9993935525417328, "relative_reconstruction_bias": 0.9995065033435822, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.252685546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_1/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..55c5b09ac06c243ff0b3164ee932f7c08e322d13 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06bbf7e63603ff6e8eaf80da24bf7d602811714f60111bc45e5d4d53cb2b1110 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_1/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..77d6306d00887a871ba79abb66d15fb61420a3dc --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_1/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.01, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_1/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..052a89a8b67201a7f24f74e06cbcc9df7655c29b --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.032060159370303154, "l1_loss": 182.47359466552734, "l0": 572.1199951171875, "frac_variance_explained": 0.9999948740005493, "cossim": 0.9999981820583344, "l2_ratio": 1.000036358833313, "relative_reconstruction_bias": 0.9999651610851288, "loss_original": 5.109375, "loss_reconstructed": 5.09375, "loss_zero": 10.25, "frac_recovered": 1.00390625, "frac_alive": 0.28125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_10/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_10/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..74a7955715cde0b892aed46dc5c85fbadcbab58b --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_10/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4420172d3372301af83169cca7ccaebfdfbee264e509db2609cb96c35395964f +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_10/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_10/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ba11ff976a57eaf836820fc8a887894dc10c0ae --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_10/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.03, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_10/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_10/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b6267dbc30727e24e9087c1dfb3d6689062bbdf1 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_10/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.9199702441692352, "l1_loss": 165.20439910888672, "l0": 451.4700012207031, "frac_variance_explained": 0.9977467358112335, "cossim": 0.9984953701496124, "l2_ratio": 0.9962930977344513, "relative_reconstruction_bias": 0.999160885810852, "loss_original": 5.109375, "loss_reconstructed": 5.140625, "loss_zero": 10.25, "frac_recovered": 0.9921875, "frac_alive": 0.06317138671875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_11/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_11/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c955957621e3937ae15c2afaaac02b46c68ad4d --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_11/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc28e084b4bd4fab0f01f778f0ed96444fb3ae88c0b1b71548c0d79bbc0c3bb1 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_11/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_11/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5e08d1788003108200f3aac9b789ccfc9aa83678 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_11/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.03, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_11/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_11/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e17e4eb517ab7d2601dfe21e191e0c0a2e4eb357 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_11/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.15277034044265747, "l1_loss": 171.46498107910156, "l0": 542.3349914550781, "frac_variance_explained": 0.9996510744094849, "cossim": 0.9999645054340363, "l2_ratio": 0.9995242953300476, "relative_reconstruction_bias": 1.0008074045181274, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.07183837890625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_12/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_12/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..57e9489010e9561a4b976e2a714872e3310addab --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_12/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fa9639ed5b3ca9ca16b2f4d9d0ad55a1fa722013eb3e38ce3886e00575f5e65 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_12/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_12/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e4171cbe95d8bf168684138c53448fc0bdb13183 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_12/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.04, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_12/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_12/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fa67540dab11e2fa0c9d663af81972352d8bfd2f --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_12/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.951264142990112, "l1_loss": 55.17291450500488, "l0": 87.83499908447266, "frac_variance_explained": 0.9058527648448944, "cossim": 0.9457570016384125, "l2_ratio": 0.9423305094242096, "relative_reconstruction_bias": 0.9989176988601685, "loss_original": 5.109375, "loss_reconstructed": 5.578125, "loss_zero": 10.25, "frac_recovered": 0.91015625, "frac_alive": 0.647705078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_13/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_13/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..175188c405a7405c2a355bd5b8913456614d31da --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_13/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cee41a1d29f809a9d6e9cf015312c1ed246d559bd0e7368359c034dd37cb26a4 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_13/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_13/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0fa8d16596a20e255e80408aa410f32c078603d7 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_13/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.04, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_13/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_13/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..acd8d79f6ee4455daa6b3acfd6b64452c8b3e5e1 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_13/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.268487215042114, "l1_loss": 70.76881408691406, "l0": 150.58499908447266, "frac_variance_explained": 0.9484507739543915, "cossim": 0.9612212479114532, "l2_ratio": 0.9500514268875122, "relative_reconstruction_bias": 0.994495689868927, "loss_original": 5.109375, "loss_reconstructed": 5.421875, "loss_zero": 10.25, "frac_recovered": 0.939453125, "frac_alive": 0.7890625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_14/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_14/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..01c7af948f6f803d89825698662f2af78c55ad2f --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_14/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86f541a8f7352c2e2e089c00788f7f08a580d4ae6484796f73f138cce3b7ff8a +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_14/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_14/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c319794181c8491232237e0789ed4096748f0ba3 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_14/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.04, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_14/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_14/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..858637f4617737c744b3649c274825c6e2a5d4e8 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_14/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.925657272338867, "l1_loss": 50.43879508972168, "l0": 81.7699966430664, "frac_variance_explained": 0.8834878504276276, "cossim": 0.9458162486553192, "l2_ratio": 0.9414721131324768, "relative_reconstruction_bias": 0.9992666244506836, "loss_original": 5.109375, "loss_reconstructed": 5.46875, "loss_zero": 10.25, "frac_recovered": 0.9296875, "frac_alive": 0.22235107421875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_15/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_15/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..818638e5e15c72ac39144e7924956327164e7510 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_15/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf3af52cc34774129cae3c031ea2c1d07ee69d657c0cbdcc640b4da45e42dcf +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_15/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_15/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bf860fb412445278a6319839f924c998dcf70b06 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_15/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.04, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_15/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_15/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9fa5933f3711675494f3499dc872742f30ea7789 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_15/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.141657829284668, "l1_loss": 65.56314468383789, "l0": 151.83999633789062, "frac_variance_explained": 0.9252040386199951, "cossim": 0.9616039991378784, "l2_ratio": 0.9477808773517609, "relative_reconstruction_bias": 0.9942740797996521, "loss_original": 5.109375, "loss_reconstructed": 5.34375, "loss_zero": 10.25, "frac_recovered": 0.955078125, "frac_alive": 0.39404296875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_16/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_16/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d02929dc6fdf8681a47e6462e29875424f1dba7e --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_16/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7de4991aa7eb9ca6deac90e7ef960b9d3117d79b5341e166198ee4d1f2de6511 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_16/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4cc689359fbb03bd7f218f2cffa5114b58c26a8c --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_16/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.05, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_16/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_16/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d5c09cc7d8765c92a76eeb9c3fefac4c24a3e346 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_16/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.974883556365967, "l1_loss": 34.8562068939209, "l0": 37.68499946594238, "frac_variance_explained": 0.8844049274921417, "cossim": 0.9215801656246185, "l2_ratio": 0.9130880832672119, "relative_reconstruction_bias": 0.9982720017433167, "loss_original": 5.109375, "loss_reconstructed": 5.84375, "loss_zero": 10.25, "frac_recovered": 0.857421875, "frac_alive": 0.5322265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_17/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_17/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..11e22ddb9e62a7543b2325b1490b7f9a7fab611d --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_17/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b06e2dc7a245ce90480997b7d9702dc7f6e743c6653caa397d8e1152f3752969 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_17/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..41869cd196e98d4f34ad761c298a83c7ba2e9380 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_17/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.05, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_17/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_17/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..14410878bedcd05a81229f7d73bab611829b1bce --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_17/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.503576755523682, "l1_loss": 39.25530242919922, "l0": 55.60999870300293, "frac_variance_explained": 0.8706925809383392, "cossim": 0.9321731925010681, "l2_ratio": 0.9155045449733734, "relative_reconstruction_bias": 0.991199404001236, "loss_original": 5.109375, "loss_reconstructed": 5.8125, "loss_zero": 10.25, "frac_recovered": 0.86328125, "frac_alive": 0.615966796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_18/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_18/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba0818a0e2bc9fc370ce8a9f2d488b0b4e279061 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_18/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d93cf0754123901d54ccc0e06076f1a64cee2a98ddb37843bb218ffd96fa4fa6 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_18/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_18/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e70273ed49ecf3c0b18e4302abb65ec96b886c5c --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_18/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.05, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_18/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_18/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fc45d01d8d1975ce78215dc7771c91f5eb12c5ee --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_18/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.816593885421753, "l1_loss": 34.53453254699707, "l0": 37.15999984741211, "frac_variance_explained": 0.8668506741523743, "cossim": 0.9258524179458618, "l2_ratio": 0.9174408316612244, "relative_reconstruction_bias": 0.9962100386619568, "loss_original": 5.109375, "loss_reconstructed": 5.671875, "loss_zero": 10.25, "frac_recovered": 0.890625, "frac_alive": 0.17987060546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_19/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_19/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8786595f12ef8a0e798ee44b5d58fd6a197c22f7 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_19/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e287f27143c7f08b0982bda076406aa9df0e7f1c3c944a395025dc39bbdae2f +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_19/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7214604d7032fbf6ce89d07b6bead2c039ba4516 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_19/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.05, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_19/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_19/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..02646e1d6ee4379a622349da24da410d4fa4f196 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_19/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.219098091125488, "l1_loss": 40.202070236206055, "l0": 63.424997329711914, "frac_variance_explained": 0.9080729484558105, "cossim": 0.9381819069385529, "l2_ratio": 0.9213614463806152, "relative_reconstruction_bias": 0.9927374124526978, "loss_original": 5.109375, "loss_reconstructed": 5.5625, "loss_zero": 10.25, "frac_recovered": 0.912109375, "frac_alive": 0.2655029296875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_2/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d4b6005cd7783eae6b49ddc80e07e86010cbad6 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b345b26e59936490743bc156fbd3caa62a90c1632adaf634c9538efd58829e44 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_2/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f0b233272e81cd062568d7aafb656f97077b4cd1 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_2/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.01, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_2/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d7d4802154090346037036ee31c7a5b4d50fc6e4 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.23447784781455994, "l1_loss": 177.55458068847656, "l0": 507.4149932861328, "frac_variance_explained": 0.999827116727829, "cossim": 0.9998936355113983, "l2_ratio": 0.9987513720989227, "relative_reconstruction_bias": 0.9991304874420166, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.06280517578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_20/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_20/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3ddd7dd918ba8162645fd8d5e64231a568d2b5d --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_20/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b7e2f0f4e3c56df7934e501db4980b105dbf79ba3d413152c2dbe99a8a3639 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_20/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_20/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ea21b5be734446236601a2488794c90b520abace --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_20/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.075, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_20/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_20/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bb017801cf91e862c3ff82079564026bc7213b3a --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_20/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.155976057052612, "l1_loss": 20.506074905395508, "l0": 12.434999465942383, "frac_variance_explained": 0.8012629151344299, "cossim": 0.8825390338897705, "l2_ratio": 0.8612985014915466, "relative_reconstruction_bias": 0.9897655546665192, "loss_original": 5.109375, "loss_reconstructed": 6.265625, "loss_zero": 10.25, "frac_recovered": 0.775390625, "frac_alive": 0.27685546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_21/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_21/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..574372ccc49bddeb272825471b980f64b32c0de8 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_21/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:779d86d5cfff2230f4352212722e0c14c7451dbede4f555ec0eab6e27253d29c +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_21/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_21/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a8d477d01af107b758f6624ccbbe4f327f39fa59 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_21/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.075, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_21/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_21/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b8d1d9c937621379b765a279969d6a17163bbff1 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_21/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.070794343948364, "l1_loss": 21.63303852081299, "l0": 16.039999961853027, "frac_variance_explained": 0.703086793422699, "cossim": 0.888356477022171, "l2_ratio": 0.8531085550785065, "relative_reconstruction_bias": 0.9709645509719849, "loss_original": 5.109375, "loss_reconstructed": 6.46875, "loss_zero": 10.25, "frac_recovered": 0.734375, "frac_alive": 0.283935546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_22/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_22/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1040390dedc244c4291c49168e6dac7117fbe46 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_22/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3befa1f910e526d38dd4be124cd22ad54b254cb2b14b55a66d4254cbb99431e +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_22/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_22/config.json new file mode 100644 index 0000000000000000000000000000000000000000..55b2d4f955c434287886a2d95ea9a32db869f58e --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_22/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.075, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_22/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_22/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3767b56915baddb12607ea43d032fef8c060c486 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_22/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.888845920562744, "l1_loss": 21.864633560180664, "l0": 13.72499942779541, "frac_variance_explained": 0.8301891088485718, "cossim": 0.8908891975879669, "l2_ratio": 0.8768014907836914, "relative_reconstruction_bias": 0.994087278842926, "loss_original": 5.109375, "loss_reconstructed": 6.09375, "loss_zero": 10.25, "frac_recovered": 0.80859375, "frac_alive": 0.09033203125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_23/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_23/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0f1cdf66e774b6b07e2807c3a954f11253e73b4 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_23/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a82ad08bd0efd3fd7efa3b896ec5cd73c3cd822e3b864eb13e127cdb6b564fb1 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_23/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_23/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9032fcccdbb29b4336b3ed0ebef135bf7e501a0b --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_23/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.075, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_23/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_23/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1af6f8b3d9d6f125e62fbc81ddc580dea4056826 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_23/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.76258659362793, "l1_loss": 23.331228256225586, "l0": 18.010000228881836, "frac_variance_explained": 0.783660888671875, "cossim": 0.8971452713012695, "l2_ratio": 0.8641332983970642, "relative_reconstruction_bias": 0.9775561690330505, "loss_original": 5.109375, "loss_reconstructed": 6.046875, "loss_zero": 10.25, "frac_recovered": 0.818359375, "frac_alive": 0.10064697265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_24/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_24/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..76cd27fb3cffb5cdc6a41e73123b396f65132200 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_24/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247b38a5027c55b44e3428db2f482526be862808643f8385ad9218e3dd8caa4b +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_24/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_24/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7319a8b86c6c98ff46daa43e98ac32a899734e9c --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_24/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.1, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_24/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_24/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..96c066516ccd459649e100fc1e82954b5db02606 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_24/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.836331367492676, "l1_loss": 14.558531761169434, "l0": 6.504999876022339, "frac_variance_explained": 0.595892608165741, "cossim": 0.8519119620323181, "l2_ratio": 0.8318629562854767, "relative_reconstruction_bias": 0.9796263873577118, "loss_original": 5.109375, "loss_reconstructed": 6.890625, "loss_zero": 10.25, "frac_recovered": 0.65234375, "frac_alive": 0.16943359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_25/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_25/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..701f25e22737edb6dcdcae3203369e4d0b4996e7 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_25/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c8d47ce5e2e5d21c9484efc9fd2ffe928121a320f4086336f65f3b0cb61c3e +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_25/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_25/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d98ec6f0402f8079ceb1932ca7581612179a959b --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_25/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.1, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_25/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_25/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4b4c6ae584dbcb15d761a2ccf9e714aaca609a2a --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_25/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 8.214652299880981, "l1_loss": 16.31633949279785, "l0": 7.799999952316284, "frac_variance_explained": 0.6720309853553772, "cossim": 0.8515195250511169, "l2_ratio": 0.8166981935501099, "relative_reconstruction_bias": 0.9791772365570068, "loss_original": 5.109375, "loss_reconstructed": 6.984375, "loss_zero": 10.25, "frac_recovered": 0.634765625, "frac_alive": 0.139404296875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_26/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_26/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a3f20cf398e1fe3249a1a296b1cef5eaa7f1a8c --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_26/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd0d0f981df3308e7d4c6e8e63fd838c6c800aa3b8dc34f551b62b03cdbf5717 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_26/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_26/config.json new file mode 100644 index 0000000000000000000000000000000000000000..508318de6ea89372507985f39d2bd91a8ff5c87a --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_26/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.1, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_26/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_26/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f1e084d28a48b3b5a7fe750302a2e75ff3997e26 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_26/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.769039154052734, "l1_loss": 16.687355995178223, "l0": 6.924999952316284, "frac_variance_explained": 0.773478239774704, "cossim": 0.860668420791626, "l2_ratio": 0.8444707095623016, "relative_reconstruction_bias": 0.9938578009605408, "loss_original": 5.109375, "loss_reconstructed": 6.671875, "loss_zero": 10.25, "frac_recovered": 0.6953125, "frac_alive": 0.05218505859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_27/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_27/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c08afe15f0e74f9bfbe02c64273cb127fd257ae --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_27/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4d9f96ad7b5c0f23eba993a9f00613ecd46653dc349f03b6ca81d7e75fc716 +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_27/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_27/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1fd09df3eebd97806742a5009e64ff8f1e8a14b7 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_27/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.1, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_27/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_27/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..04cc9033b3b5e64f93fc58465b133e13cf4ded4c --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_27/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.4865899085998535, "l1_loss": 18.57829761505127, "l0": 8.824999809265137, "frac_variance_explained": 0.8460719287395477, "cossim": 0.8731376826763153, "l2_ratio": 0.8349001705646515, "relative_reconstruction_bias": 0.9871676564216614, "loss_original": 5.109375, "loss_reconstructed": 6.625, "loss_zero": 10.25, "frac_recovered": 0.705078125, "frac_alive": 0.0455322265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_3/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7aee5647c53858a02b24d2b27e5513c839c455c7 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f5e4f50f7c9f6291fc0cb128969819b9aafc6b2b56b063cb99d6aee6878ac0d +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_3/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3bc18d8a510c2c7dded0230a206d397d0b4ee99a --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_3/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.01, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_3/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fc636c4de0099cc5e922eb456339341513d3b0bf --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.03312186896800995, "l1_loss": 178.4405746459961, "l0": 624.5649719238281, "frac_variance_explained": 0.9999954700469971, "cossim": 0.9999978244304657, "l2_ratio": 0.9997702836990356, "relative_reconstruction_bias": 0.9998632967472076, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.07763671875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_4/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0bd6c2e5458daebebc63b6de270c1f533ffd843 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73e048798dba039221a0d1527b1bb9c509caefbc727954795c7b3f5e3d409fcf +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_4/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c296503981ef7a90249f61ebe53dca1b7b9c0886 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_4/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.02, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_4/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b21a15572e30f7a490ae69e76cf23e4cad344e7b --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.44455401599407196, "l1_loss": 178.58230590820312, "l0": 497.79998779296875, "frac_variance_explained": 0.9994261860847473, "cossim": 0.9996283054351807, "l2_ratio": 0.9989120960235596, "relative_reconstruction_bias": 0.9998871088027954, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.2529296875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_5/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a2eb4cfcd2101665ea8b93a3440c456d282214a --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c39790341de710d11168c9b13bdf7c197551938a5374753c9f0c3d98e7c5c78 +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_5/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d68fed7477ec18fabb9f6351b7a714f36ca87dac --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_5/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.02, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_5/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f7ff428a8c2d4e832a9c12e48280e4b9b2880df9 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.055815575644373894, "l1_loss": 174.553466796875, "l0": 589.5, "frac_variance_explained": 0.9999871253967285, "cossim": 0.9999943673610687, "l2_ratio": 0.9999815225601196, "relative_reconstruction_bias": 1.000096082687378, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.2919921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_6/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_6/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b948c22e3ab9092e8887c1fee310803154f590a8 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_6/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d252b8a8eed9c0aca90ac24daea3c5fce01cb60bc027261ce2736b882602de0d +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_6/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_6/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e7980d39f713d063c32b0148083d97a24be1e8b5 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_6/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.02, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_6/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_6/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5cfff120af0eb886eaa6c6d60590255c72fa0bbf --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_6/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.42415545880794525, "l1_loss": 176.83885192871094, "l0": 497.0299835205078, "frac_variance_explained": 0.9995298981666565, "cossim": 0.9996658861637115, "l2_ratio": 0.999405026435852, "relative_reconstruction_bias": 1.0001167058944702, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.06304931640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_7/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_7/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..09f4b9d77d02e47b9d6aeb7b9199b7615229c86d --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_7/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3521ef649bdcf0b7c9f3768cc50567691a9f6d50828ae00e8ecf2fd1ef38c5ae +size 67178280 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_7/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_7/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ad7f264d1215976a8f7322c961b41dbb97e85578 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_7/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.02, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_7/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_7/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..16adfbc6d55c3a393e14dc88915c59e9b53af377 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_7/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.0657585896551609, "l1_loss": 172.8652801513672, "l0": 621.3749694824219, "frac_variance_explained": 0.9999754428863525, "cossim": 0.9999916553497314, "l2_ratio": 0.9998246431350708, "relative_reconstruction_bias": 0.9998592734336853, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.07720947265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_8/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_8/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f06ae1eeb6cf4b60560dae29e3985651beb0cdb --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_8/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:586bfea161c7725aea036ea5ce885098feb42f9e0795f764a6418af93dc3ac9b +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_8/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_8/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3d0abfaca4577efe64417efa15ff6884341e2208 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_8/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.03, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_8/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_8/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bb5f56655d46edac7489e2eb432bf4f1a0c7372e --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_8/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.0281026363372803, "l1_loss": 164.44959259033203, "l0": 442.04998779296875, "frac_variance_explained": 0.9970531165599823, "cossim": 0.9979654848575592, "l2_ratio": 0.9944948852062225, "relative_reconstruction_bias": 0.9985988438129425, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.252197265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_9/ae.pt b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_9/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef0bfb9be52d489cee65915ef2848f2cff795480 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_9/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fa5b8d5af89a3fbe607806d803931b5b666aba58620ff40c69f116e07f5f10f +size 16797480 diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_9/config.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_9/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5b2cd8cf5be414b6ce674d92c03a9b0bc83e15d7 --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_9/config.json @@ -0,0 +1,33 @@ +{ + "trainer": { + "trainer_class": "PAnnealTrainer", + "dict_class": "AutoEncoder", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "sparsity_function": "Lp^p", + "sparsity_penalty": 0.03, + "p_start": 1, + "p_end": 0.2, + "anneal_start": 10000, + "sparsity_queue_length": 10, + "n_sparsity_updates": 10, + "warmup_steps": 1000, + "resample_steps": null, + "steps": 48828, + "seed": 0, + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "PAnnealTrainer-pythia70m-4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_9/eval_results.json b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_9/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6276ba26b3ec82a2bf9cc300ec179642c403d0ea --- /dev/null +++ b/pythia70m_sweep_panneal_ctx128_0730/resid_post_layer_4/trainer_9/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.192142091691494, "l1_loss": 168.17107391357422, "l0": 537.6549987792969, "frac_variance_explained": 0.9993937015533447, "cossim": 0.9999546110630035, "l2_ratio": 0.9990874826908112, "relative_reconstruction_bias": 0.9989540278911591, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.2822265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_0/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4d974878771098d65560d0c017c48eea57a3155 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:635418656393b4725cd1bce35ecae76847885b4ace12c435276a06788ca3a09b +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_0/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..11b0a441cc8048772edde91b32f74b48b8bf0b4b --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.01, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_0/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a6773de3d197f261a009dab07e395dd51a8f9bc2 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.12426043301820755, "l1_loss": 143.2674789428711, "l0": 595.7799987792969, "frac_variance_explained": 0.999964565038681, "cossim": 0.9999651312828064, "l2_ratio": 0.998753696680069, "relative_reconstruction_bias": 1.0003299117088318, "loss_original": 5.109375, "loss_reconstructed": 5.09375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.294677734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_1/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee2cc2a6937c74b82388c18870ee923c661682ed --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e6cfb8ba8658e2c75a0f18d50609f15473a65d987140b2f43ae49f9bb627d53 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_1/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..428239e5e8f158e9a11f6953865191511ad85fdd --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.01, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_1/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..66677e0352523da453752909027ea3c4466afe18 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.0810961164534092, "l1_loss": 156.89439392089844, "l0": 988.3049621582031, "frac_variance_explained": 0.9999762773513794, "cossim": 0.9999830424785614, "l2_ratio": 0.9994895756244659, "relative_reconstruction_bias": 0.9992925226688385, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.432861328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_10/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_10/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc53fe1d227b1bec6612a7d3ab84772f4cfed1a0 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_10/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aba2c971fb5a1232d015bd5672bcdf8515028d6c50c8f21580c9dbfb01d0fb10 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_10/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_10/config.json new file mode 100644 index 0000000000000000000000000000000000000000..887ba1a28a5bda37b8a34354ed77414114e229c2 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_10/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.075, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_10/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_10/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0e09c221424afb90f29d0b01cda9b7ee5a0cc7c4 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_10/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.514751672744751, "l1_loss": 28.3001127243042, "l0": 101.54999542236328, "frac_variance_explained": 0.901954710483551, "cossim": 0.9454782009124756, "l2_ratio": 0.8981001973152161, "relative_reconstruction_bias": 0.9714211821556091, "loss_original": 5.109375, "loss_reconstructed": 5.6875, "loss_zero": 13.5625, "frac_recovered": 0.93359375, "frac_alive": 0.26434326171875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_11/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_11/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d43fc59108568b417412ca18efafd3208f905f8 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_11/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:207b6b9c21a0ce64b141b186d0ad341110d15ef1fd689f538585dd0ae437ef77 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_11/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_11/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9ed9b2a1351342465b6de965c42ee87155a40dc2 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_11/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.075, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_11/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_11/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ee9489ebdea6a6feb0f9385a733a7cd4c1240fd0 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_11/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.36195182800293, "l1_loss": 33.35663986206055, "l0": 138.25999450683594, "frac_variance_explained": 0.9749880731105804, "cossim": 0.9497334361076355, "l2_ratio": 0.8918062150478363, "relative_reconstruction_bias": 0.9890599548816681, "loss_original": 5.109375, "loss_reconstructed": 5.625, "loss_zero": 13.5625, "frac_recovered": 0.94140625, "frac_alive": 0.3258056640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_12/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_12/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b90f1f4cfba8a607f64b5a1b9a91dac12613081 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_12/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:818cb1a0df1893d7137f0a6d19a7fad72fba7f01487a1d5c03c744ff9767b123 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_12/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_12/config.json new file mode 100644 index 0000000000000000000000000000000000000000..90061d1bc6f5c0564b535e661237a4fdfb6a6e34 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_12/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_12/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_12/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..97b8fe8d55520039bffdba21927adfb12089be2a --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_12/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.731834888458252, "l1_loss": 19.52291965484619, "l0": 40.58999824523926, "frac_variance_explained": 0.8190242052078247, "cossim": 0.9044541120529175, "l2_ratio": 0.8395806849002838, "relative_reconstruction_bias": 0.9527353644371033, "loss_original": 5.109375, "loss_reconstructed": 6.234375, "loss_zero": 13.5625, "frac_recovered": 0.869140625, "frac_alive": 0.4091796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_13/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_13/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f807670e071563bf8959fdc872a13eff147139e3 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_13/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a3156874722946890e033fe21e9f44234181047e10ae8f62fedab2062e7bd7 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_13/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_13/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d2bd59fb9edf0eefcff40feb19592bdb1ab1764f --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_13/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_13/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_13/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..37885fc80da7f6b9e85c0b78ea51b5c28521c2d4 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_13/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.731866121292114, "l1_loss": 21.847941398620605, "l0": 43.53999900817871, "frac_variance_explained": 0.8392283916473389, "cossim": 0.9095476567745209, "l2_ratio": 0.8281406164169312, "relative_reconstruction_bias": 0.9537761211395264, "loss_original": 5.109375, "loss_reconstructed": 6.34375, "loss_zero": 13.5625, "frac_recovered": 0.85546875, "frac_alive": 0.32177734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_14/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_14/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..31afdcf2d76a46ddbd3340c42d33c000cd5b77a9 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_14/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781ca2b282bc7b29c8b85a8f835b095335f4b8f5dc02a4f6dad8784885f6754a +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_14/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_14/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d7cb1c565ed92ea9343f77179fdef24ba0f01069 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_14/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_14/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_14/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..69fcb2d18987c9216524cf88a2f57819699e7790 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_14/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.388267993927002, "l1_loss": 22.517229080200195, "l0": 46.34499931335449, "frac_variance_explained": 0.9555586874485016, "cossim": 0.9193859100341797, "l2_ratio": 0.8641889691352844, "relative_reconstruction_bias": 0.9934327602386475, "loss_original": 5.109375, "loss_reconstructed": 6.0625, "loss_zero": 13.5625, "frac_recovered": 0.888671875, "frac_alive": 0.14202880859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_15/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_15/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..aacc4d1453b57a37b39744912fa46c5c38322ac8 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_15/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f8f9ffecc9f645195a4976bcc6890671b91a0929c66c99043470bcc2e096399 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_15/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_15/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1bd90c2e3b2d42fac879430462c5443a26681ee1 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_15/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_15/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_15/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..53f619cc55ebeb4088ce022bd2a4339aaed6a769 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_15/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.390922784805298, "l1_loss": 25.603646278381348, "l0": 55.71999931335449, "frac_variance_explained": 0.9650035798549652, "cossim": 0.9216497540473938, "l2_ratio": 0.8385327160358429, "relative_reconstruction_bias": 0.9863651692867279, "loss_original": 5.109375, "loss_reconstructed": 6.09375, "loss_zero": 13.5625, "frac_recovered": 0.88671875, "frac_alive": 0.14739990234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_16/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_16/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0dda1e7812539af5238ce0a80c3ca2f6c539b70 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_16/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daf35d79ffa2f856f8f1ac44c7023f434186f64b37df92a8c6ca89b4948739c1 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_16/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e4d7aaff705857dc69710a59a9331e49494610bd --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_16/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.15, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_16/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_16/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6f84e1443ea3b96435927298cbd482c45bd32840 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_16/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.162523031234741, "l1_loss": 12.265330791473389, "l0": 14.419999599456787, "frac_variance_explained": 0.6976601779460907, "cossim": 0.8514250814914703, "l2_ratio": 0.7801115810871124, "relative_reconstruction_bias": 0.9325586557388306, "loss_original": 5.109375, "loss_reconstructed": 7.078125, "loss_zero": 13.5625, "frac_recovered": 0.76953125, "frac_alive": 0.150390625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_17/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_17/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..0dfc2f3027a64d2d65b58a434bc4f909db633e17 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_17/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8efcae05583abad827d6ad5146c79f468fb574a10c43861efab0165efdb16952 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_17/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ce33ba3cbcc0d9887f5e20d9d225dbfdd4ff8871 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_17/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.15, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_17/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_17/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ca2c5f35b1bf25df9b075cbd4cb4d1ee8328249b --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_17/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.557527542114258, "l1_loss": 13.80838394165039, "l0": 11.309999465942383, "frac_variance_explained": 0.8601208031177521, "cossim": 0.8353725969791412, "l2_ratio": 0.7360717356204987, "relative_reconstruction_bias": 0.961407333612442, "loss_original": 5.109375, "loss_reconstructed": 7.640625, "loss_zero": 13.5625, "frac_recovered": 0.703125, "frac_alive": 0.052490234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_18/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_18/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fe7fa4fd5c21f0f9ce90709b3c087f4440b7404 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_18/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:186374e706ddc376ba985583ae275f3e46ff8a6e24af816554d6e0555c929e67 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_18/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_18/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d51a1d9f816360b1a0c3ba982b704ba9c32b8b81 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_18/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.15, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_18/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_18/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f6c74e522c5944bd4f5094b8629ef1e8e2ab48e3 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_18/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.985109329223633, "l1_loss": 13.327394962310791, "l0": 14.53999948501587, "frac_variance_explained": 0.8693634271621704, "cossim": 0.8634053766727448, "l2_ratio": 0.7890957593917847, "relative_reconstruction_bias": 0.9743449985980988, "loss_original": 5.109375, "loss_reconstructed": 6.90625, "loss_zero": 13.5625, "frac_recovered": 0.7890625, "frac_alive": 0.03997802734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_19/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_19/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b741567a765983382638ca475b19835876f594e7 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_19/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124f94308773456a8369b4ff472a3d291e61e5a66622597b79f0b69fe21def20 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_19/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d4e2ae14dfa6621a2a4ae60da562e171aa2b301e --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_19/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.15, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_19/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_19/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c8049d3add5b10ce3ab2a9660db966b57a18ffd8 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_19/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.272101163864136, "l1_loss": 14.894614219665527, "l0": 15.03999948501587, "frac_variance_explained": 0.865559846162796, "cossim": 0.851164847612381, "l2_ratio": 0.7471699714660645, "relative_reconstruction_bias": 0.9622777104377747, "loss_original": 5.109375, "loss_reconstructed": 7.265625, "loss_zero": 13.5625, "frac_recovered": 0.74609375, "frac_alive": 0.02484130859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_2/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..41673e4929aa1c86667717d46a15f92b8c4c4bb6 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90510fec3c42271bfd97e8e20ba503a99bff0e9b7d2d7a75c894add2c2e9d634 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_2/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6c44e6e32fe8cb5be77a44c8bc1a98f38489108a --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.01, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_2/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..556ff06b668fdbe0daa4b11ee6fc3c9dc02fb5ca --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.13340092450380325, "l1_loss": 144.80532836914062, "l0": 601.8799743652344, "frac_variance_explained": 0.9999664127826691, "cossim": 0.9999653697013855, "l2_ratio": 0.9988075792789459, "relative_reconstruction_bias": 1.0020886659622192, "loss_original": 5.109375, "loss_reconstructed": 5.078125, "loss_zero": 13.5625, "frac_recovered": 1.00390625, "frac_alive": 0.074951171875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_3/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..531a8d8a3ec374091b0f6ec581a9119829850f52 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f827bf7b92436d17c53b59a22e8045eb09c431f86b99a1dc9c0cd8c8cf9b6641 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_3/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f95dd2275022ed6d80c050eb85936a6c71dbd52c --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.01, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_3/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ce804975d33901c2ecf5fac5a67dee413246f3ed --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.05331199988722801, "l1_loss": 152.03803253173828, "l0": 914.4749755859375, "frac_variance_explained": 0.9999842345714569, "cossim": 0.9999947845935822, "l2_ratio": 1.0002580881118774, "relative_reconstruction_bias": 1.0011677742004395, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.1141357421875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_4/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7da9f0fc26c989dea70dad1111c10f7d7d65fa52 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:461d8f84e72a2afe77c859f43a68663b2aaf5be65dc23579dd34b8828e490c32 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_4/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2e1ea297cf69fd9d7ec20b9575dcb98cae936e7d --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_4/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f97d0681c4a19850514ca75585e4a6e671bf3284 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.517223596572876, "l1_loss": 43.52250862121582, "l0": 258.5399856567383, "frac_variance_explained": 0.9346843361854553, "cossim": 0.9675390422344208, "l2_ratio": 0.9293063580989838, "relative_reconstruction_bias": 0.9774938821792603, "loss_original": 5.109375, "loss_reconstructed": 5.484375, "loss_zero": 13.5625, "frac_recovered": 0.958984375, "frac_alive": 0.959716796875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_5/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..13ab57d0d2f867eea38c070da5a92286d3ec5a55 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28fa21ef9daacb9238c53cbc4e9802a44fa1ff7d2b6e3246c99abe917fcf0ada +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_5/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e9772dbddba0ab27e9b48ef8efc1692d1c3fe8d --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_5/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7d400fce876079210a6fc3d603c1685a83df75b4 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.424095630645752, "l1_loss": 50.18914985656738, "l0": 317.34498596191406, "frac_variance_explained": 0.9404914379119873, "cossim": 0.9695490896701813, "l2_ratio": 0.9233435392379761, "relative_reconstruction_bias": 0.9756979644298553, "loss_original": 5.109375, "loss_reconstructed": 5.453125, "loss_zero": 13.5625, "frac_recovered": 0.962890625, "frac_alive": 0.93408203125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_6/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_6/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2cae6b8614d44249a7dc899b13bd6456a3942e7 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_6/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33c3c7c829dd09aa0c18e53a3c6814c00491728049d38480d3a91989fce7a8d1 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_6/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_6/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4ecef3f5878f854a8d9a7c7b379d09a8a6b9522c --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_6/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_6/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_6/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d37ebeb504cd0c9a722ebf0c8724fdf2687e78c0 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_6/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.2998898029327393, "l1_loss": 43.81868553161621, "l0": 315.54498291015625, "frac_variance_explained": 0.9484912157058716, "cossim": 0.9702073633670807, "l2_ratio": 0.9354815483093262, "relative_reconstruction_bias": 0.9803897142410278, "loss_original": 5.109375, "loss_reconstructed": 5.375, "loss_zero": 13.5625, "frac_recovered": 0.96875, "frac_alive": 0.47442626953125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_7/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_7/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb757fa5fcedafe18d72ea5a1fe9618fa0f5dd48 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_7/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a160eb9b7da6f635f64d199adf23daa84c0d0eb92e7a6887fb630bbe22d20f7f +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_7/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_7/config.json new file mode 100644 index 0000000000000000000000000000000000000000..90ac7989509f3cd100665999891493d756fe1186 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_7/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_7/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_7/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..acf0979806f6a924a6e2d6bf62be4c721d098f05 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_7/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.087216377258301, "l1_loss": 51.33630180358887, "l0": 468.9049835205078, "frac_variance_explained": 0.968185156583786, "cossim": 0.9766208529472351, "l2_ratio": 0.9397295117378235, "relative_reconstruction_bias": 0.9837659895420074, "loss_original": 5.109375, "loss_reconstructed": 5.328125, "loss_zero": 13.5625, "frac_recovered": 0.97265625, "frac_alive": 0.58251953125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_8/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_8/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9ff2570f76cb6d5d5ac4ab4d15e04fa8cfa067c --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_8/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f599015ac81cd763977d0c99c7f1a37f484518cbe0489c1e5d22b7f49624cce9 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_8/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_8/config.json new file mode 100644 index 0000000000000000000000000000000000000000..54a29d953727089f3eb55b65de93f33c09d6db78 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_8/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.075, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_8/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_8/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d705207cf0065e57483ae65c319a9458a6dea8a8 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_8/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.79274320602417, "l1_loss": 28.002964973449707, "l0": 85.70499801635742, "frac_variance_explained": 0.9493667185306549, "cossim": 0.9392455518245697, "l2_ratio": 0.8900715112686157, "relative_reconstruction_bias": 0.9794892072677612, "loss_original": 5.109375, "loss_reconstructed": 5.90625, "loss_zero": 13.5625, "frac_recovered": 0.90625, "frac_alive": 0.66162109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_9/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_9/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..29b70677d18c3cb3ad88a1db661f96827aca8bc5 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_9/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1661b61185869cbfad92064ed414e0293f081c8b903cfff6e075846e84e2419 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_9/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_9/config.json new file mode 100644 index 0000000000000000000000000000000000000000..30e46e9e61ab9e9c5076dc053a1d610075dcc8b3 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_9/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.075, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_9/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_9/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cd74612eae77c1d15fc0bb42634973d58657ef55 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_3/trainer_9/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.824173450469971, "l1_loss": 32.33378028869629, "l0": 102.61999893188477, "frac_variance_explained": 0.9658667743206024, "cossim": 0.9369117915630341, "l2_ratio": 0.8783682584762573, "relative_reconstruction_bias": 0.9889915585517883, "loss_original": 5.109375, "loss_reconstructed": 5.875, "loss_zero": 13.5625, "frac_recovered": 0.91015625, "frac_alive": 0.6357421875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_0/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0367d73126a4846555b9db09f0e77ab54ccf0bb --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eec9bd4935adac2da527ab73bb742a9219448c3c21b0a61c9abe006f10afc17 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_0/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f4e0ebaff6b3b09067fee066589001c9ea24f60a --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.01, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_0/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bafb63184e12b8ae62f7d57df83058c0dfe52d6e --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.13906432688236237, "l1_loss": 172.4042739868164, "l0": 580.7900085449219, "frac_variance_explained": 0.9999641478061676, "cossim": 0.9999611377716064, "l2_ratio": 0.9991430044174194, "relative_reconstruction_bias": 0.9997045695781708, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.286865234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_1/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbbfd637ce86198307e25006b447994c6ea5834c --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12913c233e04488902de9fcb0d1ee7a7896f830234d6a2bbcc89eafc8a715d02 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_1/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f2d99fb097db9922b603b1f4474b9caf42de5a0 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.01, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_1/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9388ff57ba92e7ff991f1882e41f160a83146f64 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.039943572133779526, "l1_loss": 179.8002700805664, "l0": 840.594970703125, "frac_variance_explained": 0.9999936819076538, "cossim": 0.9999970495700836, "l2_ratio": 0.9992614984512329, "relative_reconstruction_bias": 0.9992481470108032, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.43212890625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_10/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_10/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a46789fc62b309449b56d53af5d56593afd3b996 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_10/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49054f75ea47a0a0f4d2294a8037a6de2f3f447109148159698c79943d44740 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_10/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_10/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3757d345c54f9521656fe5af44082ff951525f56 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_10/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.075, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_10/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_10/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..21e374d95398044de56a91475a4d6053c1ab8a4c --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_10/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.496506452560425, "l1_loss": 30.859187126159668, "l0": 112.7249984741211, "frac_variance_explained": 0.8526565134525299, "cossim": 0.9341229796409607, "l2_ratio": 0.8806459307670593, "relative_reconstruction_bias": 0.956470251083374, "loss_original": 5.109375, "loss_reconstructed": 5.609375, "loss_zero": 10.25, "frac_recovered": 0.904296875, "frac_alive": 0.3214111328125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_11/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_11/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a1493855171a593c9f905dc0ff018c2c2c7cb6e --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_11/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d69a7cfc7206d4c5d53b909ba70369b8e935e807391dfad3a3b754f3dcabf55d +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_11/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_11/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3f8822e23c59606bf2fbac8b33a663b914a51e29 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_11/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.075, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_11/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_11/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6ab9af6abde80a2a004b965173c7bbc3720727bb --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_11/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.258655309677124, "l1_loss": 36.54450988769531, "l0": 153.41999053955078, "frac_variance_explained": 0.9124889373779297, "cossim": 0.9437460899353027, "l2_ratio": 0.8795965313911438, "relative_reconstruction_bias": 0.9664331078529358, "loss_original": 5.109375, "loss_reconstructed": 5.59375, "loss_zero": 10.25, "frac_recovered": 0.90625, "frac_alive": 0.39825439453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_12/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_12/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..46a70b186dd34097bb01de3b823203b9f3044bb1 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_12/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0920922a1a54f58a73d735e195dffa759c8dabb68b58831c810063153c6fc4c0 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_12/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_12/config.json new file mode 100644 index 0000000000000000000000000000000000000000..801cf363d97db52248f2e0b4a3a153827628e477 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_12/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_12/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_12/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..db44a42969b198ed7ef7ae3ac9ecc5c2c294cfca --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_12/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.905298948287964, "l1_loss": 20.22040843963623, "l0": 37.83999824523926, "frac_variance_explained": 0.6908568441867828, "cossim": 0.892336905002594, "l2_ratio": 0.8203575015068054, "relative_reconstruction_bias": 0.9213624596595764, "loss_original": 5.109375, "loss_reconstructed": 6.109375, "loss_zero": 10.25, "frac_recovered": 0.806640625, "frac_alive": 0.408203125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_13/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_13/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..71e3681a2eacbcbf7caa97cef22a67feb15fbb3b --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_13/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:883a7df3d21b2a2c56d529148dea4843b7df5f14cda4c01008eda8f0d3e3e464 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_13/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_13/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b29e369af3cf87be2e3943f6c957ffe10b0bab81 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_13/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_13/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_13/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4ce15c47e72ca22666a792583846582deb0d9d9a --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_13/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.0613343715667725, "l1_loss": 24.225407600402832, "l0": 39.564998626708984, "frac_variance_explained": 0.8815032541751862, "cossim": 0.8960998952388763, "l2_ratio": 0.8110790848731995, "relative_reconstruction_bias": 0.9674525856971741, "loss_original": 5.109375, "loss_reconstructed": 6.125, "loss_zero": 10.25, "frac_recovered": 0.802734375, "frac_alive": 0.3701171875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_14/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_14/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8d99ca1c51da5e1dafb4fd27c60da05f818d7ee --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_14/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68c4e21dde1c056e6370cb8674544fab47e557914cde616e75b35648771b6a1 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_14/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_14/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5071c670cf4795ecdcceeb81f4455ce48ec48e66 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_14/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_14/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_14/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..657f8beb195da81be8233e015a3511de234fb870 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_14/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.610292911529541, "l1_loss": 22.026453971862793, "l0": 43.994998931884766, "frac_variance_explained": 0.797339677810669, "cossim": 0.906348466873169, "l2_ratio": 0.8377086222171783, "relative_reconstruction_bias": 0.9489409625530243, "loss_original": 5.109375, "loss_reconstructed": 5.921875, "loss_zero": 10.25, "frac_recovered": 0.841796875, "frac_alive": 0.16229248046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_15/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_15/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..eaa2e8af9b1eb441ef6447b955a5b0cb3c01a8e5 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_15/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fc2a61967362998bcbacccac659a9168d8475442e8fbff3f51138916949a959 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_15/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_15/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ecc1cc19328bbcb30960842f1f0e30c6ba53cebe --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_15/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.1, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_15/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_15/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d1e829a750e764970f3d93f81677a08c029e0bc4 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_15/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.463175058364868, "l1_loss": 25.23475456237793, "l0": 55.10000038146973, "frac_variance_explained": 0.8899842202663422, "cossim": 0.9089497029781342, "l2_ratio": 0.824937641620636, "relative_reconstruction_bias": 0.9679083824157715, "loss_original": 5.109375, "loss_reconstructed": 5.90625, "loss_zero": 10.25, "frac_recovered": 0.845703125, "frac_alive": 0.17999267578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_16/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_16/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..73d018ca58699fb9328295c87107dc537d4a6e57 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_16/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68742c8de93bda159183e9981726a1825992a9aa0ba974172f28792d849a5f63 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_16/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..57e5e3af70bb8e6bc71e6ebadbed6128e5aeaa80 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_16/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.125, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_16/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_16/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4aa4f95db76e44e6d545fe66597d9f653597f39f --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_16/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.750272035598755, "l1_loss": 14.960684776306152, "l0": 17.869999885559082, "frac_variance_explained": 0.6092759668827057, "cossim": 0.8620257377624512, "l2_ratio": 0.7801276445388794, "relative_reconstruction_bias": 0.9062569439411163, "loss_original": 5.109375, "loss_reconstructed": 6.515625, "loss_zero": 10.25, "frac_recovered": 0.7265625, "frac_alive": 0.201904296875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_17/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_17/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5cb0a0811750ee3c2a44288762e632c385572343 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_17/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ce6559858b4395c6be40f0556cc0f504b0e24ce77af05510114a41d08882bad +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_17/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a11f050b907238e3b04aac113a52807e9d45a4d1 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_17/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.125, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_17/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_17/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..68d1ee5836c048522198d6e187b20b210fe8a150 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_17/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 8.043148756027222, "l1_loss": 17.678317070007324, "l0": 19.835000038146973, "frac_variance_explained": 0.7752771377563477, "cossim": 0.8577037453651428, "l2_ratio": 0.7598432898521423, "relative_reconstruction_bias": 0.9503984153270721, "loss_original": 5.109375, "loss_reconstructed": 6.59375, "loss_zero": 10.25, "frac_recovered": 0.7109375, "frac_alive": 0.180419921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_18/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_18/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a1a783e7159dc285d35e5eb7cd44859d03f7252 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_18/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab403d37ffca9cad54dfe9da64e08c2d03cadf4ab8eb830cad6da11ac916f767 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_18/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_18/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5faf9f35ed90448792f83091045c4b7b4ff50e0c --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_18/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.125, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_18/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_18/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f3398e8bb8fab89e46154cd203f9ec0c7498ce1b --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_18/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.187922716140747, "l1_loss": 17.748798370361328, "l0": 21.960000038146973, "frac_variance_explained": 0.7765224277973175, "cossim": 0.8868830800056458, "l2_ratio": 0.8089266419410706, "relative_reconstruction_bias": 0.9446336328983307, "loss_original": 5.109375, "loss_reconstructed": 6.328125, "loss_zero": 10.25, "frac_recovered": 0.76171875, "frac_alive": 0.07574462890625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_19/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_19/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f0e699c8a09cc8113ffd1af12eb250227954280 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_19/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bf729bba5215d58240a7f6179a629cd614eabc31e6acf6cd7c9df35c7d170b3 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_19/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..20d300f9c6d8884ec7ef3bbf524120ecbf692119 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_19/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.125, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_19/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_19/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..96eb67ea022f3d2258fe77b30950e1bca58261b2 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_19/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 7.419585704803467, "l1_loss": 17.61837673187256, "l0": 25.229999542236328, "frac_variance_explained": 0.7256596982479095, "cossim": 0.8796624541282654, "l2_ratio": 0.7722421586513519, "relative_reconstruction_bias": 0.9177526831626892, "loss_original": 5.109375, "loss_reconstructed": 6.328125, "loss_zero": 10.25, "frac_recovered": 0.76171875, "frac_alive": 0.07366943359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_2/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..98078de74ca6dbf635620dda77280745997a26ca --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8262903741bfbcf4b5af80a575a5e56880936513ec4841e37ee0b87dcfff6a75 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_2/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..df4c5f46e399c93bec82712a37d025e3d6bf5d17 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.01, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_2/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bfb61ddc170ff2beb84a2a94ddd6a6b46a1e8e05 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.14557188749313354, "l1_loss": 176.1813507080078, "l0": 571.2599792480469, "frac_variance_explained": 0.9999658167362213, "cossim": 0.9999614059925079, "l2_ratio": 0.9985294938087463, "relative_reconstruction_bias": 0.9981709420681, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.07061767578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_20/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_20/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..63cb1fe6f5d03768f7fcafcd6652ec91042d17a4 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_20/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d64f8ca51072c7c2b257b985e33982c2f43c56740d34a44d288528f3bd15c4 +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_20/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_20/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f64c3d27fb10f9c1850126ad4614c5bc87c4e0f6 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_20/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.15, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_20/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_20/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b08b3da936f1deb7444ca90ff5c4993c8b7c987f --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_20/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 8.817397117614746, "l1_loss": 12.390697956085205, "l0": 9.375, "frac_variance_explained": 0.7390300035476685, "cossim": 0.8167252242565155, "l2_ratio": 0.7350420951843262, "relative_reconstruction_bias": 0.9481939077377319, "loss_original": 5.109375, "loss_reconstructed": 7.03125, "loss_zero": 10.25, "frac_recovered": 0.625, "frac_alive": 0.093994140625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_21/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_21/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..699ba7c0dabdd2ec663746d6e2f04efa32770045 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_21/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2202c9b054fd941a2871d2892c57892ac18a161c6a674f0adb82c1ebc6a19beb +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_21/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_21/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9b547f88795c6516e6e6c9d37430d40e4351c647 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_21/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.15, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_21/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_21/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..48c24901cefcaa3c501edb2dd56b3202b2a94a06 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_21/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 8.986730575561523, "l1_loss": 14.376469612121582, "l0": 10.614999771118164, "frac_variance_explained": 0.7838070094585419, "cossim": 0.8144988417625427, "l2_ratio": 0.7060303688049316, "relative_reconstruction_bias": 0.9518384337425232, "loss_original": 5.109375, "loss_reconstructed": 7.21875, "loss_zero": 10.25, "frac_recovered": 0.58984375, "frac_alive": 0.077392578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_22/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_22/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..4bb5e40649cf1db284cee9231ef2fa96f4e58de8 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_22/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c564cf47b8669ae8e6f43f947e70cc8b734206f9c9e52a470cab1385c4011d79 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_22/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_22/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0f00ca24c483c7cc4e6911baed6be29b9b8023a1 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_22/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.15, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_22/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_22/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0d51eccc6579059a1d450d3b6ee19550c45b8990 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_22/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 8.493481159210205, "l1_loss": 12.704645156860352, "l0": 11.5649995803833, "frac_variance_explained": 0.6243764162063599, "cossim": 0.8343020379543304, "l2_ratio": 0.7512751519680023, "relative_reconstruction_bias": 0.9171733856201172, "loss_original": 5.109375, "loss_reconstructed": 6.859375, "loss_zero": 10.25, "frac_recovered": 0.658203125, "frac_alive": 0.03338623046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_23/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_23/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..544f3ee3f5d01abc20112b08b0d6fe601a576f90 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_23/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23d0a627e0fbdb67f9d8e1dfb9fe396e5427073951e42a38c43accda2c748087 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_23/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_23/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9002c8b438f4ac2adc41f0a70ea79fbf8bbb2078 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_23/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.15, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_23/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_23/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2cff00cfbc2aa24ad7db23cd422e829d0244c153 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_23/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 8.535282611846924, "l1_loss": 15.774386882781982, "l0": 13.78499984741211, "frac_variance_explained": 0.7991003394126892, "cossim": 0.8328536152839661, "l2_ratio": 0.7212831676006317, "relative_reconstruction_bias": 0.9515023529529572, "loss_original": 5.109375, "loss_reconstructed": 6.828125, "loss_zero": 10.25, "frac_recovered": 0.6640625, "frac_alive": 0.033935546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_3/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe88e416042ff24f4c8d3d193c71858075eac4bd --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215fa86d0e565255011631254185d42c3bbb29ebde91114eb5d4af13aacfdded +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_3/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f599d4571ca607760af8881052312a8475ee959 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.01, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_3/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..de044df632526036b3c8af486b6e010c5ac7e844 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.04370100051164627, "l1_loss": 182.10286712646484, "l0": 884.7949829101562, "frac_variance_explained": 0.9999894201755524, "cossim": 0.9999958574771881, "l2_ratio": 0.9992266893386841, "relative_reconstruction_bias": 0.999617725610733, "loss_original": 5.109375, "loss_reconstructed": 5.09375, "loss_zero": 10.25, "frac_recovered": 1.00390625, "frac_alive": 0.11712646484375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_4/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f88c493a0a9c5f0d9bf7fcf6f3f96f374b69711 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26c3ec8b6aa38c04b1ff045cc1457ac48799e0889f5836d4e6816d495adb611f +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_4/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bee8335f3bb42e922b4fac1ba729369053cd8c20 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_4/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..933f8f4a459d30cf654f9bd4103e9ff1f449aef9 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.28046441078186, "l1_loss": 55.58331298828125, "l0": 289.0800018310547, "frac_variance_explained": 0.9569923579692841, "cossim": 0.9617600440979004, "l2_ratio": 0.9129365384578705, "relative_reconstruction_bias": 0.9799267053604126, "loss_original": 5.109375, "loss_reconstructed": 5.359375, "loss_zero": 10.25, "frac_recovered": 0.951171875, "frac_alive": 0.977294921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_5/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2cf5c2f5d4cd3318b0f996486072ed15f7dfcea7 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81f75ae2baac5a4e69b8882e941a8a77c1bea9146783c81a3d25a9557b1540ba +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_5/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a2ac3f69a0dfe1b75b9e142d6dd75e8c5614a7fb --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_5/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c682b2954e64de339bd5d86aa0cb827a976eba15 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.175949335098267, "l1_loss": 60.347869873046875, "l0": 348.7799835205078, "frac_variance_explained": 0.9440552592277527, "cossim": 0.9639659225940704, "l2_ratio": 0.9107547402381897, "relative_reconstruction_bias": 0.9754826128482819, "loss_original": 5.109375, "loss_reconstructed": 5.359375, "loss_zero": 10.25, "frac_recovered": 0.951171875, "frac_alive": 0.951416015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_6/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_6/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..17ec9ab7582279fb9f07be6ec68d902bd54e10aa --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_6/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ff362c077d9c7578916f589b6b753ef3d9debf28253d443622c095f0a3ad8a5 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_6/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_6/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8698d4f5c5abb976299cd47dedcfda32a8d82400 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_6/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.001, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_6/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_6/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1c515c9ca6aad25f8847f920d8455bf41f1b5206 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_6/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.8073216676712036, "l1_loss": 51.118635177612305, "l0": 369.1499938964844, "frac_variance_explained": 0.9440069198608398, "cossim": 0.9695656299591064, "l2_ratio": 0.934249073266983, "relative_reconstruction_bias": 0.9799982309341431, "loss_original": 5.109375, "loss_reconstructed": 5.296875, "loss_zero": 10.25, "frac_recovered": 0.96484375, "frac_alive": 0.56005859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_7/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_7/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..df9ce317ece2781c8b9a3990fa460efa2d9d7b06 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_7/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a704baedec376ac97b74aa8d1ab78150961a833efceaed01a01e2752a6a309 +size 67178280 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_7/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_7/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ffbb25b79c8fa4d6f05c2f0d7ef3c940a4694761 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_7/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 16384, + "lr": 0.0001, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_7/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_7/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..410e1b70c3aafb34291b05361c3e08c7677ae6a3 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_7/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.5381969213485718, "l1_loss": 61.29299354553223, "l0": 549.5999755859375, "frac_variance_explained": 0.9644224941730499, "cossim": 0.9755493700504303, "l2_ratio": 0.936077207326889, "relative_reconstruction_bias": 0.9815801382064819, "loss_original": 5.109375, "loss_reconstructed": 5.28125, "loss_zero": 10.25, "frac_recovered": 0.966796875, "frac_alive": 0.65277099609375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_8/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_8/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e304fbfdf6466faa67b7ec369d6534d20df4571 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_8/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b903668fa0aee2c3447df75dbc6588afb664252ff55d9374e3c35e4d317bd6f +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_8/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_8/config.json new file mode 100644 index 0000000000000000000000000000000000000000..736f7a957054e90de05a784cd57040b51e34c044 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_8/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.001, + "l1_penalty": 0.075, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_8/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_8/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..01277187d554cc5a78090ec42cdefb29ef8a6473 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_8/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.777284383773804, "l1_loss": 29.042738914489746, "l0": 86.56999969482422, "frac_variance_explained": 0.8400347530841827, "cossim": 0.9266457855701447, "l2_ratio": 0.8629283905029297, "relative_reconstruction_bias": 0.952755868434906, "loss_original": 5.109375, "loss_reconstructed": 5.75, "loss_zero": 10.25, "frac_recovered": 0.875, "frac_alive": 0.727783203125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_9/ae.pt b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_9/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a651dadebc94f0b8bd3aa44e154206267c66d629 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_9/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5935a8559c2715113a2f57b0d873965c73fc56c5d952b4e9848f374cb8e2a5b +size 16797480 diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_9/config.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_9/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dc37a6cbcaf62e83001af48ffc470bcc741b61e3 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_9/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 512, + "dict_size": 4096, + "lr": 0.0001, + "l1_penalty": 0.075, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "StandardTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 64, + "out_batch_size": 8192, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_9/eval_results.json b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_9/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3b0e6282316a320ea5488e0709996d80dce077d4 --- /dev/null +++ b/pythia70m_sweep_standard_ctx128_0712/resid_post_layer_4/trainer_9/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.928151369094849, "l1_loss": 33.09730529785156, "l0": 108.88999557495117, "frac_variance_explained": 0.8473385572433472, "cossim": 0.9234676957130432, "l2_ratio": 0.8503770530223846, "relative_reconstruction_bias": 0.9529539942741394, "loss_original": 5.109375, "loss_reconstructed": 5.765625, "loss_zero": 10.25, "frac_recovered": 0.873046875, "frac_alive": 0.700439453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_0/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..57c52009d15b1acefbbc21fe15bb44bd8cfdb356 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3964c09d2553ef149f30e48d6232fb290622adfe9be7b3e20decb8decb5c4c4 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_0/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3deb398cd9595f59d236b5abfded0575580bb975 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_0/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6124a052d6db0bdd8c0034240b92a132fd7956b9 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.935908079147339, "l1_loss": 31.69143581390381, "l0": 20.0, "frac_variance_explained": 0.9232955873012543, "cossim": 0.9306000769138336, "l2_ratio": 0.9304746389389038, "relative_reconstruction_bias": 1.000677913427353, "loss_original": 5.109375, "loss_reconstructed": 5.921875, "loss_zero": 13.5625, "frac_recovered": 0.904296875, "frac_alive": 0.391357421875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_1/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..830a7f3735734cd897fd646db71b4fa8ded12a3f --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32f9b62b9e203521b9ce7d182aa698b73d3dc36b79d42f0b9691fe31d891b435 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_1/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3deb398cd9595f59d236b5abfded0575580bb975 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_1/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..059b69aa7b7a8166539e92ac7a22a7cfb2231e58 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.0450356006622314, "l1_loss": 32.46671199798584, "l0": 19.994999885559082, "frac_variance_explained": 0.9223879873752594, "cossim": 0.9300017058849335, "l2_ratio": 0.9265586733818054, "relative_reconstruction_bias": 0.9985445141792297, "loss_original": 5.109375, "loss_reconstructed": 5.859375, "loss_zero": 13.5625, "frac_recovered": 0.912109375, "frac_alive": 0.388427734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_10/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_10/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..75ba47541ecc3da47449ad1406a0b7ba8b0dfc29 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_10/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da675a39fe35b4d4f4bebb6365603788d48bed98bd9030746427e14847094749 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_10/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_10/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c0b607b856ca9c6de33cae8e2a4ed8cee67a418 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_10/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_10/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_10/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..42bc268085d985e4e851acf4c154faf9a1795487 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_10/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.234249711036682, "l1_loss": 66.75620079040527, "l0": 80.0, "frac_variance_explained": 0.9826801419258118, "cossim": 0.9711524546146393, "l2_ratio": 0.971171498298645, "relative_reconstruction_bias": 0.9993952810764313, "loss_original": 5.109375, "loss_reconstructed": 5.328125, "loss_zero": 13.5625, "frac_recovered": 0.97265625, "frac_alive": 0.30572509765625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_11/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_11/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c599897766054ad415f0d63a7600d9572cf4f50a --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_11/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f67d7ea901d3d8ce74e49862897b929637f3137edd35526c6d41caf7ca54e8e +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_11/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_11/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0c0b607b856ca9c6de33cae8e2a4ed8cee67a418 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_11/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_11/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_11/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..abb50ea17c34314a8ef9e6be2093840cca041a4f --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_11/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.266580820083618, "l1_loss": 63.347068786621094, "l0": 80.0, "frac_variance_explained": 0.9793018102645874, "cossim": 0.970993161201477, "l2_ratio": 0.9706854522228241, "relative_reconstruction_bias": 1.0004341006278992, "loss_original": 5.109375, "loss_reconstructed": 5.328125, "loss_zero": 13.5625, "frac_recovered": 0.97265625, "frac_alive": 0.320068359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_12/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_12/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..223f989f29998b3496a403f1ad3930f82f11b6ed --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_12/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a26531e65bffa669f18c8801048a5d1c3ed7ab08b8bc4ea3336c67484665b6 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_12/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_12/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bfb97bf3532bf94692c6f79b014ab9e571780a93 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_12/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_12/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_12/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1398530762d18063318f6f1c170cd245ce42c7ec --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_12/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.582371473312378, "l1_loss": 95.0156364440918, "l0": 160.0, "frac_variance_explained": 0.9637480080127716, "cossim": 0.982122153043747, "l2_ratio": 0.9821611046791077, "relative_reconstruction_bias": 1.0001484155654907, "loss_original": 5.109375, "loss_reconstructed": 5.296875, "loss_zero": 13.5625, "frac_recovered": 0.98046875, "frac_alive": 0.612060546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_13/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_13/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f806d6bbb460bceed3e9f480067250467b3e1deb --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_13/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651d5423e67f4e7ea7a404b50a0ad8ad00e0106055afc2855e86fb7416203062 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_13/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_13/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bfb97bf3532bf94692c6f79b014ab9e571780a93 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_13/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_13/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_13/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..aad50dc696cbc3934d38bdc31ba1ef95d5b850aa --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_13/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.5901246070861816, "l1_loss": 102.0064468383789, "l0": 159.97999572753906, "frac_variance_explained": 0.9645738303661346, "cossim": 0.9815526604652405, "l2_ratio": 0.9812406599521637, "relative_reconstruction_bias": 1.0001290440559387, "loss_original": 5.109375, "loss_reconstructed": 5.28125, "loss_zero": 13.5625, "frac_recovered": 0.984375, "frac_alive": 0.6064453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_14/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_14/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..867c1a339c778c09c029e86cb8d7e2eb00352402 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_14/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a20aa109486e5d49b4e5ee99a3053e284a5aa7f70daa3dba68215c7db60fffbc +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_14/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_14/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1afc287cd048f788e1eae6e4e3295e5a6825d4ad --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_14/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_14/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_14/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f4d1a672375f6a5627dcdfc5aa5a9e0b80a144f7 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_14/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.4579464197158813, "l1_loss": 93.92619323730469, "l0": 160.0, "frac_variance_explained": 0.970717579126358, "cossim": 0.9828965067863464, "l2_ratio": 0.9827223420143127, "relative_reconstruction_bias": 0.9993032217025757, "loss_original": 5.109375, "loss_reconstructed": 5.234375, "loss_zero": 13.5625, "frac_recovered": 0.984375, "frac_alive": 0.369140625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_15/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_15/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..37d9fc3707cc31a1344df2bf5ab2bd41c236a220 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_15/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d4258535404b94abf0c30208ff977a9289c23c7fa03693428852c0334fed961 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_15/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_15/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1afc287cd048f788e1eae6e4e3295e5a6825d4ad --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_15/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 160, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_15/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_15/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..04c732e9e3c4e327426bb52c0481b6bba5cbf81d --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_15/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 2.490305781364441, "l1_loss": 91.74856948852539, "l0": 160.0, "frac_variance_explained": 0.9788775444030762, "cossim": 0.9834240674972534, "l2_ratio": 0.9832234382629395, "relative_reconstruction_bias": 1.0000578165054321, "loss_original": 5.109375, "loss_reconstructed": 5.25, "loss_zero": 13.5625, "frac_recovered": 0.984375, "frac_alive": 0.381103515625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_16/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_16/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..981278aeed60b1e7ec5b82b05b2f6fe9c3f41363 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_16/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62349dd70ff125fae7a77704e231b584c4753345b47081b03246e23cf2e5786f +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_16/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..98fc5a724de34931b0c5f00b3ff8a2a41700b514 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_16/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_16/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_16/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1a9a4a6ec19bfd50d92e7046e032cff1dc27550b --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_16/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.112624168395996, "l1_loss": 220.210205078125, "l0": 320.0, "frac_variance_explained": 0.9972675442695618, "cossim": 0.996800422668457, "l2_ratio": 0.9970879554748535, "relative_reconstruction_bias": 1.0000087916851044, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.35302734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_17/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_17/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..beb33167c125ec7d3a2a0fce29bd8783f61f3823 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_17/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f509a8b544816a5735d7b4ae47f97d6c4ee49f585d74f614ac85712a4cdcad05 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_17/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..98fc5a724de34931b0c5f00b3ff8a2a41700b514 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_17/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_17/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_17/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e8825af816f6e7683c7f83434f610cde796c778e --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_17/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.1107040047645569, "l1_loss": 227.40424346923828, "l0": 319.84999084472656, "frac_variance_explained": 0.9981722235679626, "cossim": 0.9966503977775574, "l2_ratio": 0.9965516626834869, "relative_reconstruction_bias": 0.999874621629715, "loss_original": 5.109375, "loss_reconstructed": 5.140625, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.354248046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_18/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_18/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c912ccbaf17471583718cc9f732c49846f8b0d4 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_18/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:545ca0460eb3cc381466a6bcfb015d9d49308f8377b16bbeb8307971509dbb27 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_18/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_18/config.json new file mode 100644 index 0000000000000000000000000000000000000000..acc2446d8a8a1de1303100fa4abfdec42bfde57a --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_18/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_18/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_18/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fe6df5f5f25f5707e4631a461e2aeb587bf1bca3 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_18/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.1304024457931519, "l1_loss": 162.3750457763672, "l0": 320.0, "frac_variance_explained": 0.9938696026802063, "cossim": 0.9966120719909668, "l2_ratio": 0.9968003034591675, "relative_reconstruction_bias": 0.9999623000621796, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.16937255859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_19/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_19/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..48ec715f0d6b495620b9195a48e61416abd40cc3 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_19/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e19e73d879beafd14f3cb800a4d49621d69d6acab02290e3b3d60673f973dd50 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_19/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..acc2446d8a8a1de1303100fa4abfdec42bfde57a --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_19/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 320, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_19/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_19/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..883aa7b2eb23bbeb0e7b56d6c07a68cfbb871846 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_19/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.1097350120544434, "l1_loss": 165.12006378173828, "l0": 319.8450012207031, "frac_variance_explained": 0.9983470141887665, "cossim": 0.9967158436775208, "l2_ratio": 0.9965410232543945, "relative_reconstruction_bias": 0.9998109042644501, "loss_original": 5.109375, "loss_reconstructed": 5.125, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.169677734375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_2/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c926d445ee92465b14b06b2d5ad2f1ae9ad1dba6 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23e8dbf96809490b9e62a3e0b3ac641635b070eee880f8a2792e34f8c3dcb988 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_2/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ebb6823ffbdd88fa7e28f1390fd3ece10dfa875 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_2/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e73d622770b837235dcfd11134a611dfd52c5427 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.565485000610352, "l1_loss": 30.03243637084961, "l0": 20.0, "frac_variance_explained": 0.8826577067375183, "cossim": 0.9396016597747803, "l2_ratio": 0.939398854970932, "relative_reconstruction_bias": 0.9996489584445953, "loss_original": 5.109375, "loss_reconstructed": 5.578125, "loss_zero": 13.5625, "frac_recovered": 0.947265625, "frac_alive": 0.13482666015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_20/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_20/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b32d1073d82c288834b5ecfafaf9fb7f04fd3f1d --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_20/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4ea088a0539635023e511ce09683682c58634fef7d25646741569cfe4cfd481 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_20/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_20/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3cac54d06967a0e4fde0f8c1292007c016a26f6f --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_20/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_20/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_20/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9bef3bf17db85a0fed7def0b87c7e6f036e73975 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_20/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.012151475064456463, "l1_loss": 767.0169067382812, "l0": 639.7599792480469, "frac_variance_explained": 0.9999955594539642, "cossim": 0.9999993741512299, "l2_ratio": 0.9999833405017853, "relative_reconstruction_bias": 0.9998700618743896, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.329345703125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_21/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_21/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..db60ef5c8709885447a9251b70b2a5ddb68de7c4 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_21/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a4eaa08f6c139aa162c6307f6a9b589e789c322f4178c72ab05e60efe6b0c4 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_21/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_21/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3cac54d06967a0e4fde0f8c1292007c016a26f6f --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_21/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_21/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_21/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b08cbb88b407036bc9229b6d3b6a097b7a4e4e3b --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_21/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.012649939395487309, "l1_loss": 775.1196594238281, "l0": 639.6399841308594, "frac_variance_explained": 0.9999949336051941, "cossim": 0.9999997913837433, "l2_ratio": 0.9999857842922211, "relative_reconstruction_bias": 1.0000550150871277, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.3349609375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_22/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_22/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..490a5fbf1f1b6a66265c31e3257c0344c342e332 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_22/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d1e3378fb92aa7ecb9112c1981f2dce9cc1cca8c2c6bec7a470b5e6e61ab46f +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_22/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_22/config.json new file mode 100644 index 0000000000000000000000000000000000000000..500371b6011588cf95c06ac7780cc58949240599 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_22/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_22/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_22/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..81ba7e2620cd3a3699b1f97c25bc5bbdefe77c49 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_22/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.015976474154740572, "l1_loss": 858.8795166015625, "l0": 639.4449768066406, "frac_variance_explained": 0.9999938309192657, "cossim": 0.9999998211860657, "l2_ratio": 0.9999706149101257, "relative_reconstruction_bias": 0.999804675579071, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.08978271484375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_23/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_23/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0e6761c6600eb709d68c85518215c0271cc9a5e --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_23/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d9b8a339a5843d9b5e9a11ed08b5a9794ba116cff528e29595a42e46074fb48 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_23/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_23/config.json new file mode 100644 index 0000000000000000000000000000000000000000..500371b6011588cf95c06ac7780cc58949240599 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_23/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 640, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_23/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_23/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a2ac0a53a7db4d2ff7a77ab8637dc52720fee99e --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_23/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.007348929066210985, "l1_loss": 862.3438110351562, "l0": 639.0050048828125, "frac_variance_explained": 0.999992161989212, "cossim": 0.9999998509883881, "l2_ratio": 0.999979704618454, "relative_reconstruction_bias": 0.9999172389507294, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 13.5625, "frac_recovered": 1.0, "frac_alive": 0.07568359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_3/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6c45e46d066cd56779bf1c6bacea87685722ac1 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bf557f7b14e2f563e03e03da2b7897243dcd311a14cf3ffdaa125d5b633d03e +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_3/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5ebb6823ffbdd88fa7e28f1390fd3ece10dfa875 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 20, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_3/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e5c31423eed2ea6cf9b0ee9b68bffd6b76a8abb2 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.442843437194824, "l1_loss": 31.618624687194824, "l0": 20.0, "frac_variance_explained": 0.9502413868904114, "cossim": 0.9445270895957947, "l2_ratio": 0.9433278739452362, "relative_reconstruction_bias": 0.9998310506343842, "loss_original": 5.109375, "loss_reconstructed": 5.546875, "loss_zero": 13.5625, "frac_recovered": 0.953125, "frac_alive": 0.13262939453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_4/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..66a065c39ad06f796b07a1b92c7959c43ed88e24 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a407064ddbdb88d2c90316bcc6273af867bff94358345627d40b4fb2ca920633 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_4/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3a60ce51369aeb3c67826da3662a9336918193bf --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_4/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..edb4a469fd33c369945c33f4c1d961ebaaf95ecf --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.421348333358765, "l1_loss": 45.20492172241211, "l0": 40.0, "frac_variance_explained": 0.947259396314621, "cossim": 0.9467827081680298, "l2_ratio": 0.9451018273830414, "relative_reconstruction_bias": 0.9997144043445587, "loss_original": 5.109375, "loss_reconstructed": 5.625, "loss_zero": 13.5625, "frac_recovered": 0.939453125, "frac_alive": 0.533447265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_5/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6759216fa3b4a5732d8b6126340c429abcd2af33 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb22d3f02e732472483ec87111dbd1d3d65d130a588eefa56385b183f271f2fe +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_5/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3a60ce51369aeb3c67826da3662a9336918193bf --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_5/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c1ba739c4e4fb9a5a51a9ce44d9390fa914bf7d4 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.395580768585205, "l1_loss": 44.94974327087402, "l0": 40.0, "frac_variance_explained": 0.9498070478439331, "cossim": 0.9466257393360138, "l2_ratio": 0.944282740354538, "relative_reconstruction_bias": 0.9993623495101929, "loss_original": 5.109375, "loss_reconstructed": 5.625, "loss_zero": 13.5625, "frac_recovered": 0.94140625, "frac_alive": 0.565185546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_6/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_6/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..dac0dc084642008584c18bff92cbd07fd23f75f9 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_6/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ca181bc2abbcbae6cc604da21b0df2493d5c1ea840ecfcbc8ad243c0d3bf49 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_6/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_6/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8899eeb502ae66cad3090f6bcdc0ab731f6effb0 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_6/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_6/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_6/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..341490c13cd6776e7ff8614decf53bb0e735acf9 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_6/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.8854072093963623, "l1_loss": 42.4716854095459, "l0": 40.0, "frac_variance_explained": 0.9503816962242126, "cossim": 0.9572432935237885, "l2_ratio": 0.9553997218608856, "relative_reconstruction_bias": 0.9991843700408936, "loss_original": 5.109375, "loss_reconstructed": 5.421875, "loss_zero": 13.5625, "frac_recovered": 0.966796875, "frac_alive": 0.22265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_7/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_7/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..01b3ef851c9f4cb2c3b6377242dc5712aabf4393 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_7/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704e746ccaedbc60809c347fd6176872eb72565bbdbfb01c54b9cbd34fcee5f9 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_7/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_7/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8899eeb502ae66cad3090f6bcdc0ab731f6effb0 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_7/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 40, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_7/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_7/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4bf23b6254bec5b83c61bfd9fd4e1410e5fbab45 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_7/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.8537884950637817, "l1_loss": 43.19114112854004, "l0": 39.96999931335449, "frac_variance_explained": 0.9531150460243225, "cossim": 0.9572946727275848, "l2_ratio": 0.9553911685943604, "relative_reconstruction_bias": 1.0004658699035645, "loss_original": 5.109375, "loss_reconstructed": 5.40625, "loss_zero": 13.5625, "frac_recovered": 0.96875, "frac_alive": 0.22137451171875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_8/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_8/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebe77d00ab10fbee5896fd05c548c551a66fa540 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_8/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b22441f82f54b5155086fbf58f14240fbb0fcd9f1d09ae7eadfa680e8a42ec5 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_8/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_8/config.json new file mode 100644 index 0000000000000000000000000000000000000000..17dfa1765374eb7309d94f84561d6d7a8e2e69a7 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_8/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_8/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_8/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a9e2ef46c7051358c6069aaf1d5253f50a09a9fb --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_8/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.7417449951171875, "l1_loss": 65.06080627441406, "l0": 80.0, "frac_variance_explained": 0.9273052513599396, "cossim": 0.9613871872425079, "l2_ratio": 0.9623427391052246, "relative_reconstruction_bias": 1.0015930533409119, "loss_original": 5.109375, "loss_reconstructed": 5.453125, "loss_zero": 13.5625, "frac_recovered": 0.9609375, "frac_alive": 0.650634765625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_9/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_9/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb1150e806b152a0fc73c58a1f1c31e2a08597aa --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_9/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb28544955aede7164a3a0630456c2f84cab59999b6c8c4370aa20418c7ede7 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_9/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_9/config.json new file mode 100644 index 0000000000000000000000000000000000000000..17dfa1765374eb7309d94f84561d6d7a8e2e69a7 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_9/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 80, + "device": "cuda:0", + "layer": 3, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_3", + "submodule_name": "resid_post_layer_3" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_9/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_9/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4eca5d188a421ef787af01a42077187079b967bf --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_3/trainer_9/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.6133843660354614, "l1_loss": 70.73324203491211, "l0": 80.0, "frac_variance_explained": 0.9763646125793457, "cossim": 0.9642331302165985, "l2_ratio": 0.9634521901607513, "relative_reconstruction_bias": 0.9992213249206543, "loss_original": 5.109375, "loss_reconstructed": 5.453125, "loss_zero": 13.5625, "frac_recovered": 0.9609375, "frac_alive": 0.642822265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_0/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_0/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7bdd2afa55501f63685946cd0ec03810b1bafd1 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_0/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3e6d9510529d47e287a0436f0af6d02c428834bbfd06cbb2439caf06a51936a +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_0/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..af0c92413b21a218b057cc2a14f72d5d18729197 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_0/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 20, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_0/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d0f6324b40fc7712eedbcd60d6cea9bb067d0df2 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.187693119049072, "l1_loss": 32.042784690856934, "l0": 20.0, "frac_variance_explained": 0.7525249719619751, "cossim": 0.9131239652633667, "l2_ratio": 0.9080785512924194, "relative_reconstruction_bias": 0.9949668347835541, "loss_original": 5.109375, "loss_reconstructed": 5.765625, "loss_zero": 10.25, "frac_recovered": 0.87109375, "frac_alive": 0.41064453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_1/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_1/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..42b4b27b027693c2c12da31cce0da855c0f7ef4b --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_1/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3839536147722a6409f94e7e187f77277d1db9f4ad38fe61c1d33db4c252b1e +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_1/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_1/config.json new file mode 100644 index 0000000000000000000000000000000000000000..af0c92413b21a218b057cc2a14f72d5d18729197 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_1/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 20, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_1/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_1/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b4dc8714764d86f6024ebb23d7d71ce59ae3a3c8 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_1/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 6.185708522796631, "l1_loss": 36.339500427246094, "l0": 20.0, "frac_variance_explained": 0.9093905687332153, "cossim": 0.9169774353504181, "l2_ratio": 0.9097681939601898, "relative_reconstruction_bias": 0.9981831312179565, "loss_original": 5.109375, "loss_reconstructed": 5.796875, "loss_zero": 10.25, "frac_recovered": 0.8671875, "frac_alive": 0.41943359375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_10/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_10/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..98c63abb57462050791374e66541be598d6d29f3 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_10/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e50e33ab3a589b58b308ed1e12e55a0da6c7852de869bc347dfc0859345f565 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_10/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_10/config.json new file mode 100644 index 0000000000000000000000000000000000000000..341394c151e8004999f2a354fb3b986cb9226947 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_10/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 80, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_10/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_10/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8b44e65244667284ade780fdfd5e1dcca99d3ca7 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_10/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.255720615386963, "l1_loss": 63.45562744140625, "l0": 80.0, "frac_variance_explained": 0.9149579405784607, "cossim": 0.9612174034118652, "l2_ratio": 0.9577771127223969, "relative_reconstruction_bias": 0.998077780008316, "loss_original": 5.109375, "loss_reconstructed": 5.265625, "loss_zero": 10.25, "frac_recovered": 0.970703125, "frac_alive": 0.3394775390625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_11/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_11/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..2dceda65cbf2b8c7159793f7cf15aec28f3124a4 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_11/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b381b52ec687bee486c3b55b82bae200787fd9833fec17c3668d231c246aab4 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_11/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_11/config.json new file mode 100644 index 0000000000000000000000000000000000000000..341394c151e8004999f2a354fb3b986cb9226947 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_11/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 80, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_11/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_11/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c197c361b3b8269ae00ddefb987588f7654d8060 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_11/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.201929807662964, "l1_loss": 67.61545944213867, "l0": 80.0, "frac_variance_explained": 0.9530799686908722, "cossim": 0.9605956077575684, "l2_ratio": 0.9588273167610168, "relative_reconstruction_bias": 1.000040203332901, "loss_original": 5.109375, "loss_reconstructed": 5.28125, "loss_zero": 10.25, "frac_recovered": 0.966796875, "frac_alive": 0.33233642578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_12/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_12/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..60af59e557976bda33eb748baf9e62c3b868aa6e --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_12/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb8b879f137ec19b650dc9cc5474af9024f646a5fd5a57750d61940ed7e5ba9 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_12/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_12/config.json new file mode 100644 index 0000000000000000000000000000000000000000..99fa00faa343611f435ba681471722e50ec235e0 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_12/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 160, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_12/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_12/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8dfead89241dc8910384e9931870181dce4b1fbb --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_12/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.2080224752426147, "l1_loss": 104.71086883544922, "l0": 160.0, "frac_variance_explained": 0.9324043989181519, "cossim": 0.9771845042705536, "l2_ratio": 0.9765647053718567, "relative_reconstruction_bias": 0.9996898770332336, "loss_original": 5.109375, "loss_reconstructed": 5.25, "loss_zero": 10.25, "frac_recovered": 0.97265625, "frac_alive": 0.606201171875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_13/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_13/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..485bdb3f790d0e3738987858078df8d29f4ba2b5 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_13/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bc1abfa77939a4c4d8e81cccaad0895564ca1a54b77ef8718501d406ba883d6 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_13/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_13/config.json new file mode 100644 index 0000000000000000000000000000000000000000..99fa00faa343611f435ba681471722e50ec235e0 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_13/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 160, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_13/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_13/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..faba91d28a384af45e528900574f0eae7940b827 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_13/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.2890782356262207, "l1_loss": 110.39154052734375, "l0": 160.0, "frac_variance_explained": 0.9618218541145325, "cossim": 0.9764308929443359, "l2_ratio": 0.9765906631946564, "relative_reconstruction_bias": 1.0001537203788757, "loss_original": 5.109375, "loss_reconstructed": 5.25, "loss_zero": 10.25, "frac_recovered": 0.97265625, "frac_alive": 0.646728515625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_14/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_14/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..10928cdbe800e10297abe585bdd62b4f77ef4224 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_14/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6bbd9d99c3a43de64c0b7cffad1aaf552f818f0c0b54d9c52e4c3f13113d488 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_14/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_14/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9400ac2b76c0484cc7d1902877252e55e3cceaf0 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_14/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 160, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_14/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_14/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..011cb2530e76865bef4f87701e4931dccc6896ae --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_14/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.0063244104385376, "l1_loss": 99.73238754272461, "l0": 160.0, "frac_variance_explained": 0.9606643915176392, "cossim": 0.9806200265884399, "l2_ratio": 0.9806884825229645, "relative_reconstruction_bias": 1.0003360211849213, "loss_original": 5.109375, "loss_reconstructed": 5.234375, "loss_zero": 10.25, "frac_recovered": 0.974609375, "frac_alive": 0.38726806640625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_15/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_15/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3384734b8f5ec39d23275bc9d1c970c6cb5317f --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_15/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d7b40e3d5baae7db00fa986f5ab402a2332146817b54bd113493c3b655bb9a +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_15/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_15/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9400ac2b76c0484cc7d1902877252e55e3cceaf0 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_15/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 160, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_15/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_15/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b0ef37e9e770dad9974b665ea780ae666b28ad82 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_15/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 3.0843429565429688, "l1_loss": 97.2034797668457, "l0": 160.0, "frac_variance_explained": 0.9517990350723267, "cossim": 0.9791075587272644, "l2_ratio": 0.976712167263031, "relative_reconstruction_bias": 0.9977854788303375, "loss_original": 5.109375, "loss_reconstructed": 5.21875, "loss_zero": 10.25, "frac_recovered": 0.978515625, "frac_alive": 0.389892578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_16/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_16/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e6bd03ac4788d84069e4fe74467877a84bad16d --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_16/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9e486b0716b33617709ac33a0ac850d95281a9756a0e3c8460f12ac202ca5ae +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_16/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..86967ae564409642b02037f6beec64a980cc28d9 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_16/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 320, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_16/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_16/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..246298261aa9eab1ab77e9a28d0cba14ca400db8 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_16/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.3341951966285706, "l1_loss": 185.94232177734375, "l0": 319.3899841308594, "frac_variance_explained": 0.9949994087219238, "cossim": 0.9960632026195526, "l2_ratio": 0.9965726435184479, "relative_reconstruction_bias": 1.0002158880233765, "loss_original": 5.109375, "loss_reconstructed": 5.15625, "loss_zero": 10.25, "frac_recovered": 0.990234375, "frac_alive": 0.350830078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_17/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_17/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb0e0bc80fb9f183d42fd082ab7ebe5aef6744aa --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_17/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f72f6425dee09d4823cea0e185a15e8051149bbd8b97b06ff422db5a6d292e2e +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_17/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_17/config.json new file mode 100644 index 0000000000000000000000000000000000000000..86967ae564409642b02037f6beec64a980cc28d9 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_17/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 320, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_17/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_17/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a4311e3bb1647789b6e7b4b7a0bed595cdac760b --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_17/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.329186737537384, "l1_loss": 183.53929901123047, "l0": 319.7949981689453, "frac_variance_explained": 0.9940095543861389, "cossim": 0.9961691498756409, "l2_ratio": 0.9961867332458496, "relative_reconstruction_bias": 1.0001101791858673, "loss_original": 5.109375, "loss_reconstructed": 5.140625, "loss_zero": 10.25, "frac_recovered": 0.9921875, "frac_alive": 0.347412109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_18/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_18/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a90896088177285f42356fb784a9d6e910a04de --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_18/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90eb29c0997275f9ee396786b5ce24fd73121b6f84336666a188bb8800229b8c +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_18/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_18/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3ec1e8c84c14055a29e4f42b29d86d0330c2348c --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_18/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 320, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_18/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_18/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a3542c214ba101c0b62498434b318c455f820e54 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_18/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.3193784952163696, "l1_loss": 172.92806243896484, "l0": 319.97999572753906, "frac_variance_explained": 0.9953396022319794, "cossim": 0.9963517785072327, "l2_ratio": 0.9963072240352631, "relative_reconstruction_bias": 1.0003407001495361, "loss_original": 5.109375, "loss_reconstructed": 5.140625, "loss_zero": 10.25, "frac_recovered": 0.9921875, "frac_alive": 0.09063720703125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_19/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_19/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0f41f2819ac75ab7bba524dd32112fdcf062d10 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_19/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab30dc74e7a47f8dfd9c967c3682aceeab9ee2c69bd244121d0626679d132247 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_19/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_19/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3ec1e8c84c14055a29e4f42b29d86d0330c2348c --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_19/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 320, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_19/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_19/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..47b0cbeacf9a67c2f0ed5b4f320147634fd2a9fd --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_19/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 1.295108675956726, "l1_loss": 166.72576904296875, "l0": 320.0, "frac_variance_explained": 0.9911924004554749, "cossim": 0.9963652789592743, "l2_ratio": 0.996476411819458, "relative_reconstruction_bias": 1.0001288652420044, "loss_original": 5.109375, "loss_reconstructed": 5.140625, "loss_zero": 10.25, "frac_recovered": 0.9921875, "frac_alive": 0.086669921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_2/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_2/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..825b05d6cec983e74a3103cd5cdc3ae062d1529d --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_2/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70c68098942c5f02e0d663fe44f783059f0ec826928b92eaaa5457c7edd7eef3 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_2/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_2/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4939cf9c056681bd3312edb4e8fa5ce355b6821 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_2/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 20, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_2/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_2/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8e94d13e9fcbe0b21ddf44427283e39b3fa26310 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_2/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.553231716156006, "l1_loss": 34.858821868896484, "l0": 20.0, "frac_variance_explained": 0.9131167232990265, "cossim": 0.9295020699501038, "l2_ratio": 0.924098938703537, "relative_reconstruction_bias": 0.9973934292793274, "loss_original": 5.109375, "loss_reconstructed": 5.546875, "loss_zero": 10.25, "frac_recovered": 0.916015625, "frac_alive": 0.1470947265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_20/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_20/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..c885acd887e04202c4caa6f76bb7d6555616eeba --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_20/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed49d34c807212459acf0c03ae7a6c99962bf0eec54416c918887458396d765f +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_20/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_20/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec9e27298865f16f82b69e69dbc9866f0e78d6b0 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_20/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 640, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_20/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_20/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4fe3ade06505e7734da3331101717edb2feb3909 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_20/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.0297269057482481, "l1_loss": 904.6213684082031, "l0": 639.8399963378906, "frac_variance_explained": 0.9999261796474457, "cossim": 0.9999942779541016, "l2_ratio": 0.9999663531780243, "relative_reconstruction_bias": 1.000014454126358, "loss_original": 5.109375, "loss_reconstructed": 5.109375, "loss_zero": 10.25, "frac_recovered": 1.0, "frac_alive": 0.349609375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_21/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_21/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..503eda6ea259319449fc6884610b8ce388917969 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_21/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d21b6ad584018d72ab49228ff7230979623187df37b93d0057464cea6bd1a655 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_21/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_21/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec9e27298865f16f82b69e69dbc9866f0e78d6b0 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_21/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 640, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_21/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_21/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6dd18b7bd07b39944234ee90025b1be99c28bd5e --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_21/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.01876946073025465, "l1_loss": 915.5217590332031, "l0": 639.6299743652344, "frac_variance_explained": 0.9999874234199524, "cossim": 0.999999463558197, "l2_ratio": 0.9999753534793854, "relative_reconstruction_bias": 0.999941498041153, "loss_original": 5.109375, "loss_reconstructed": 5.09375, "loss_zero": 10.25, "frac_recovered": 1.00390625, "frac_alive": 0.338623046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_22/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_22/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..72ec01591572df6815f5e67369a389a7aa3cca71 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_22/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30b18a26d0eff1d7206b2e09ca7c8cc3b1b4e151789c44c9afe2bed8cc683208 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_22/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_22/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4013ef55d23fba3b013da886755456138765e3db --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_22/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 640, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_22/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_22/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fd62bdc6a6840f5400dd85b3452bb2ca98682407 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_22/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.015161680523306131, "l1_loss": 1012.1881713867188, "l0": 639.9599914550781, "frac_variance_explained": 0.9999798238277435, "cossim": 0.9999978542327881, "l2_ratio": 0.9999668598175049, "relative_reconstruction_bias": 1.0002116560935974, "loss_original": 5.109375, "loss_reconstructed": 5.09375, "loss_zero": 10.25, "frac_recovered": 1.00390625, "frac_alive": 0.0855712890625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_23/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_23/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce4bcada2098c1e33c137b4801873d9236eb1d0e --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_23/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aba299d0da4ade335db1d0b17b6b6943da640dfd41b0e24318523182484bb3d +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_23/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_23/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4013ef55d23fba3b013da886755456138765e3db --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_23/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 640, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_23/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_23/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8cdd562dbe000ee1c137540f5eaff9a83bd2abb3 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_23/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 0.016230236273258924, "l1_loss": 990.3951416015625, "l0": 639.5950012207031, "frac_variance_explained": 0.9999604225158691, "cossim": 0.9999988377094269, "l2_ratio": 1.0000020563602448, "relative_reconstruction_bias": 1.0001449584960938, "loss_original": 5.109375, "loss_reconstructed": 5.09375, "loss_zero": 10.25, "frac_recovered": 1.00390625, "frac_alive": 0.0904541015625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_3/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_3/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7e0cd4e9df3d35f8ad472a8eef0a9eddac948c5 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_3/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab86e0111f0d6d4c293619e135a977c02803fda88469543571c6c2e503fe3c9 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_3/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_3/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b4939cf9c056681bd3312edb4e8fa5ce355b6821 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_3/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 20, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_3/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_3/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ac3b91d20fad480668101dfba3e72e3f8431bb12 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_3/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.691006422042847, "l1_loss": 33.56903648376465, "l0": 20.0, "frac_variance_explained": 0.9082096815109253, "cossim": 0.9299785196781158, "l2_ratio": 0.9247334897518158, "relative_reconstruction_bias": 0.9966910183429718, "loss_original": 5.109375, "loss_reconstructed": 5.53125, "loss_zero": 10.25, "frac_recovered": 0.91796875, "frac_alive": 0.14593505859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_4/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_4/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..e56b90321bbe9e2051a0b689e1300aea99560b8f --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_4/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:547518859ef60f525e68c9c963b5fa6d43b13e0d2a320b014279d328a60ea1d4 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_4/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_4/config.json new file mode 100644 index 0000000000000000000000000000000000000000..08dd2b0463f196c23e5d9469a8bbca1783a4a9a8 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_4/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 40, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_4/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_4/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c7ef11d5d3bfcac8b09311c1a2e25557b179d3fe --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_4/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.363648414611816, "l1_loss": 46.02840995788574, "l0": 40.0, "frac_variance_explained": 0.8620550632476807, "cossim": 0.9350945055484772, "l2_ratio": 0.9320630729198456, "relative_reconstruction_bias": 0.9985549449920654, "loss_original": 5.109375, "loss_reconstructed": 5.609375, "loss_zero": 10.25, "frac_recovered": 0.90234375, "frac_alive": 0.56103515625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_5/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_5/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2384e82118cbd2cd37bcd03ff3ac69ff0fc2879 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_5/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:501d5c7bcda2e6689a72b72bd27cf62b014a3e03cfa4fa7168ed9fadc256699a +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_5/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_5/config.json new file mode 100644 index 0000000000000000000000000000000000000000..08dd2b0463f196c23e5d9469a8bbca1783a4a9a8 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_5/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 40, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_5/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_5/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1a0c0693700bdfa0f59b9f0cd7013234022c9b71 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_5/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 5.565380573272705, "l1_loss": 46.831459045410156, "l0": 40.0, "frac_variance_explained": 0.8650524914264679, "cossim": 0.9291727244853973, "l2_ratio": 0.926683783531189, "relative_reconstruction_bias": 0.9992250800132751, "loss_original": 5.109375, "loss_reconstructed": 5.59375, "loss_zero": 10.25, "frac_recovered": 0.90625, "frac_alive": 0.58642578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_6/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_6/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1c82dc48130394130ade96a1b3a37a0d816b414 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_6/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6618158b8fb41ade6baf1a7271849bfc82d02048425b2beb5e3dfe2b18fa9fc +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_6/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_6/config.json new file mode 100644 index 0000000000000000000000000000000000000000..058c2b387ee7176272855ea951f86d1a82af762f --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_6/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 40, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_6/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_6/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..379f6d03771f2730388d95d991dfbefc8c215493 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_6/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.915081262588501, "l1_loss": 45.74349021911621, "l0": 40.0, "frac_variance_explained": 0.9220407605171204, "cossim": 0.9473784863948822, "l2_ratio": 0.9431718289852142, "relative_reconstruction_bias": 0.9988125562667847, "loss_original": 5.109375, "loss_reconstructed": 5.390625, "loss_zero": 10.25, "frac_recovered": 0.9453125, "frac_alive": 0.24053955078125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_7/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_7/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..21c884cf7f5776db6bc47bf4f8dfdd3d295b60a8 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_7/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd0a6a0f37cdbfcb81162dd14c70bcf0ce4667a233a952b1344580ece133e223 +size 67178280 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_7/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_7/config.json new file mode 100644 index 0000000000000000000000000000000000000000..058c2b387ee7176272855ea951f86d1a82af762f --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_7/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0002, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 16384, + "k": 40, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_7/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_7/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..935df8d056af1336648b2c1531985db4004492f3 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_7/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.883408784866333, "l1_loss": 44.199357986450195, "l0": 40.0, "frac_variance_explained": 0.8970994651317596, "cossim": 0.9464913010597229, "l2_ratio": 0.939737856388092, "relative_reconstruction_bias": 0.9962046444416046, "loss_original": 5.109375, "loss_reconstructed": 5.375, "loss_zero": 10.25, "frac_recovered": 0.947265625, "frac_alive": 0.2408447265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_8/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_8/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..017f3647f69a3509c5249230f07b37dfd4599a49 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_8/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7711b85db2fd9f4e64228e11e24f3a1e26609b4dc62022eac4b02848bf18496 +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_8/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_8/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c307037979229d6f5e6440a26633c76862a839c --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_8/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 80, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_8/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_8/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bbf5228b5138229f2110b0943107098d1e36a280 --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_8/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.521111965179443, "l1_loss": 71.71196365356445, "l0": 80.0, "frac_variance_explained": 0.9191311895847321, "cossim": 0.9547393918037415, "l2_ratio": 0.9538576602935791, "relative_reconstruction_bias": 0.9993356466293335, "loss_original": 5.109375, "loss_reconstructed": 5.375, "loss_zero": 10.25, "frac_recovered": 0.947265625, "frac_alive": 0.65234375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_9/ae.pt b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_9/ae.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef4ce53f7d609fd49ed1a998d4c407164ee7899f --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_9/ae.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:507b0b954df772f3ea28ad9a67064f0fa2a00487de5b9a97ce1c545d457dd63c +size 16797480 diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_9/config.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_9/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c307037979229d6f5e6440a26633c76862a839c --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_9/config.json @@ -0,0 +1,26 @@ +{ + "trainer": { + "trainer_class": "TrainerTopK", + "dict_class": "AutoEncoderTopK", + "lr": 0.0004, + "steps": 48828, + "seed": 0, + "activation_dim": 512, + "dict_size": 4096, + "k": 80, + "device": "cuda:0", + "layer": 4, + "lm_name": "EleutherAI/pythia-70m-deduped", + "wandb_name": "TopKTrainer-EleutherAI/pythia-70m-deduped-resid_post_layer_4", + "submodule_name": "resid_post_layer_4" + }, + "buffer": { + "d_submodule": 512, + "io": "out", + "n_ctxs": 10000, + "ctx_len": 128, + "refresh_batch_size": 32, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_9/eval_results.json b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_9/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8b66261059c8f83a2786bd5339fd3b87f79a29be --- /dev/null +++ b/pythia70m_sweep_topk_ctx128_0730/resid_post_layer_4/trainer_9/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 4.46405553817749, "l1_loss": 71.03140640258789, "l0": 80.0, "frac_variance_explained": 0.932121604681015, "cossim": 0.9542989134788513, "l2_ratio": 0.9516503214836121, "relative_reconstruction_bias": 0.9991129636764526, "loss_original": 5.109375, "loss_reconstructed": 5.390625, "loss_zero": 10.25, "frac_recovered": 0.9453125, "frac_alive": 0.630126953125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file