diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f92795b1db26c0c61cbc35ada8dac9382ab53c4d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b4132b575d0812eb2c60ac71266df8b2875c0874 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 217.6, "l1_loss": 12697.6, "l0": 9219.10849609375, "frac_variance_explained": -1.03125, "cossim": 0.0041290283203125, "l2_ratio": 1.1546875, "relative_reconstruction_bias": 242.8, "loss_original": 2.440642213821411, "loss_reconstructed": 19.563122940063476, "loss_zero": 12.452932643890382, "frac_recovered": -0.710543018579483, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e882a58c24b859f20aa112b1e9a1c9ea169c9f48 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2624b81cdaab4a67ff7ce326bc00590d6b05ba6f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 128.35, "l1_loss": 3496.0, "l0": 3833.566796875, "frac_variance_explained": -0.017578125, "cossim": 0.4515625, "l2_ratio": 0.462890625, "relative_reconstruction_bias": 1.023046875, "loss_original": 2.440642213821411, "loss_reconstructed": 8.181695604324341, "loss_zero": 12.452932643890382, "frac_recovered": 0.4267542868852615, "frac_alive": 0.9962565302848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8e46f16a6a67cdb0d69dab4e4e0ed6bf094bdfc --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8d39a9fc21987b3c812f6168d959ea18530f9d80 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 69.55, "l1_loss": 493.4, "l0": 255.4000045776367, "frac_variance_explained": 0.64609375, "cossim": 0.8765625, "l2_ratio": 0.824609375, "relative_reconstruction_bias": 0.947265625, "loss_original": 2.440642213821411, "loss_reconstructed": 3.072607707977295, "loss_zero": 12.452932643890382, "frac_recovered": 0.9368688404560089, "frac_alive": 0.2028537392616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5d7db5860f8ae737eb796531c1e35c06beca2b13 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..375e27d5cfca7c58f0fcb1673f634351d19524e9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 45.65, "l1_loss": 719.6, "l0": 673.3541748046875, "frac_variance_explained": 0.8828125, "cossim": 0.948046875, "l2_ratio": 0.91171875, "relative_reconstruction_bias": 0.974609375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5144663572311403, "loss_zero": 12.452932643890382, "frac_recovered": 0.992665809392929, "frac_alive": 0.6722548007965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..434e08e2653a2b4fd3ce6e7590383422a82ac176 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bfe65a4158b6a161902b8e596548f031e102b3a7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 174.1, "l1_loss": 11296.0, "l0": 8454.97939453125, "frac_variance_explained": -0.75546875, "cossim": 0.10068359375, "l2_ratio": 0.75859375, "relative_reconstruction_bias": 7.478125, "loss_original": 2.440642213821411, "loss_reconstructed": 10.471371078491211, "loss_zero": 12.452932643890382, "frac_recovered": 0.1980000004172325, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b31e8d4129af31667d4ce9557422a8824364b5fa --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8c0c0a91977e6c30c041aa8832e52d3d384fa374 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 89.5, "l1_loss": 508.4, "l0": 306.70000915527345, "frac_variance_explained": 0.43046875, "cossim": 0.784375, "l2_ratio": 0.713671875, "relative_reconstruction_bias": 0.91953125, "loss_original": 2.440642213821411, "loss_reconstructed": 5.207838249206543, "loss_zero": 12.452932643890382, "frac_recovered": 0.7236662685871125, "frac_alive": 0.7305229902267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b98f9f2b0ec259b242b4c4e18c7bbae5b89b5a8c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0185ba3ef7acd2ed7d2c72a8393361bd4028e301 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 54.025, "l1_loss": 653.2, "l0": 478.7708465576172, "frac_variance_explained": 0.78828125, "cossim": 0.926953125, "l2_ratio": 0.87734375, "relative_reconstruction_bias": 0.951953125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5821482658386232, "loss_zero": 12.452932643890382, "frac_recovered": 0.98592569231987, "frac_alive": 0.2722981870174408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3017b0e1ec2fa0a7935ea5740383ce3280346a02 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a9c75d6da6657f7e48ed261ad80edf69123fb2f8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 230.8, "l1_loss": 13510.4, "l0": 9219.1044921875, "frac_variance_explained": -1.053125, "cossim": 0.0073699951171875, "l2_ratio": 1.15546875, "relative_reconstruction_bias": 181.7, "loss_original": 2.440642213821411, "loss_reconstructed": 19.563122940063476, "loss_zero": 12.452932643890382, "frac_recovered": -0.710543018579483, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3fd836c6cb0ff30625995002e044357bd011189 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..77b9fbd606dec78c2667088a17109c17d121c7ba --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 138.4, "l1_loss": 3404.8, "l0": 3756.1584228515626, "frac_variance_explained": -0.034765625, "cossim": 0.35234375, "l2_ratio": 0.4197265625, "relative_reconstruction_bias": 1.165625, "loss_original": 2.440642213821411, "loss_reconstructed": 9.118078804016113, "loss_zero": 12.452932643890382, "frac_recovered": 0.33320634365081786, "frac_alive": 0.997178852558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a2d4b44bc56c232297250ed8a1d98b7e130590f7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4fb68d3205373da4f0bf0713a6784e85a3bc95d9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 80.25, "l1_loss": 283.6, "l0": 86.57500228881835, "frac_variance_explained": 0.50859375, "cossim": 0.832421875, "l2_ratio": 0.780859375, "relative_reconstruction_bias": 0.944140625, "loss_original": 2.440642213821411, "loss_reconstructed": 4.01392765045166, "loss_zero": 12.452932643890382, "frac_recovered": 0.8428499519824981, "frac_alive": 0.138671875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5496a2391ba8e96690dd6c6265bf1acd94746e5a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1fb19768e4449633122d66737b04b95c1bdb4cd9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 58.025, "l1_loss": 424.0, "l0": 212.88750610351562, "frac_variance_explained": 0.785546875, "cossim": 0.91328125, "l2_ratio": 0.867578125, "relative_reconstruction_bias": 0.959765625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6162596464157106, "loss_zero": 12.452932643890382, "frac_recovered": 0.9825248777866363, "frac_alive": 0.3571506142616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e97de7ed4ca04f9136899677449986d4a3bfcc2b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..eb7ceaf67566b28b9c1432a0a00cd52b436da463 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 188.9, "l1_loss": 12384.0, "l0": 8454.1712890625, "frac_variance_explained": -0.6765625, "cossim": 0.08720703125, "l2_ratio": 0.75625, "relative_reconstruction_bias": 8.128125, "loss_original": 2.440642213821411, "loss_reconstructed": 10.604339790344238, "loss_zero": 12.452932643890382, "frac_recovered": 0.18471183478832245, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c7b04e8f1ead86e7a39a59f9e1efb307b8d4de98 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a84ca58d5611e4ecdf70aed55747376587103bba --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 98.15, "l1_loss": 381.2, "l0": 213.7291748046875, "frac_variance_explained": 0.3546875, "cossim": 0.727734375, "l2_ratio": 0.65, "relative_reconstruction_bias": 0.910546875, "loss_original": 2.440642213821411, "loss_reconstructed": 6.74614634513855, "loss_zero": 12.452932643890382, "frac_recovered": 0.5700634002685547, "frac_alive": 0.7088758945465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2a2997fb4faa5fa3ff468c24b769376cfde01036 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3998fd0d7db0c69a74296ee0ebca75a65ca02faa --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 68.95, "l1_loss": 354.8, "l0": 141.5416732788086, "frac_variance_explained": 0.626953125, "cossim": 0.87421875, "l2_ratio": 0.82109375, "relative_reconstruction_bias": 0.94296875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.8861867427825927, "loss_zero": 12.452932643890382, "frac_recovered": 0.9555670261383057, "frac_alive": 0.1347113698720932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fbf44b89ede4e821fcae7bfd45764c473cec8848 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..af4e76f8480535b1d177b2e3f18f9721cf6526ef --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 227.9, "l1_loss": 13337.6, "l0": 9220.1462890625, "frac_variance_explained": -1.034375, "cossim": 0.00837249755859375, "l2_ratio": 1.15546875, "relative_reconstruction_bias": 130.1, "loss_original": 2.440642213821411, "loss_reconstructed": 19.563122940063476, "loss_zero": 12.452932643890382, "frac_recovered": -0.710543018579483, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5114cbb0c481ba7ab458236e5a567c305fdf4dbb --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3ecb82768c42f4ef131372883facc266f8462e6a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 141.2, "l1_loss": 3417.6, "l0": 3691.47509765625, "frac_variance_explained": -0.040234375, "cossim": 0.3244140625, "l2_ratio": 0.4064453125, "relative_reconstruction_bias": 1.22109375, "loss_original": 2.440642213821411, "loss_reconstructed": 9.383785438537597, "loss_zero": 12.452932643890382, "frac_recovered": 0.3066366076469421, "frac_alive": 0.99658203125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6702903517e174da0e4a8104f97c4c352bd48d05 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..19465e10458507a355e071428f479973d8d01cbf --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 82.7, "l1_loss": 241.6, "l0": 60.62500152587891, "frac_variance_explained": 0.5109375, "cossim": 0.8140625, "l2_ratio": 0.765625, "relative_reconstruction_bias": 0.950390625, "loss_original": 2.440642213821411, "loss_reconstructed": 4.482995939254761, "loss_zero": 12.452932643890382, "frac_recovered": 0.7959964573383331, "frac_alive": 0.1371527761220932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..545a9efb45ae8d1e67fc408c7daf3ea6507f8f3d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1d371d63822c3e87438f0e1fdd9d00f3df5ab820 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 63.65, "l1_loss": 358.4, "l0": 140.45833892822264, "frac_variance_explained": 0.72890625, "cossim": 0.897265625, "l2_ratio": 0.844921875, "relative_reconstruction_bias": 0.951953125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.716301202774048, "loss_zero": 12.452932643890382, "frac_recovered": 0.9725344896316528, "frac_alive": 0.241970494389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4775b10ddd82bf9237592a6d2888e47e16b215b0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5e5b30f0335db7270415c3408a7407260011effc --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 176.7, "l1_loss": 11347.2, "l0": 8398.85029296875, "frac_variance_explained": -0.7203125, "cossim": 0.086279296875, "l2_ratio": 0.7546875, "relative_reconstruction_bias": 8.26875, "loss_original": 2.440642213821411, "loss_reconstructed": 10.636037349700928, "loss_zero": 12.452932643890382, "frac_recovered": 0.18154401183128357, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..976dbf8f20541309e35992a9fa411cf417cd3879 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..38e685e45970865a91825d4912b6b4537d6241ba --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 101.1, "l1_loss": 326.8, "l0": 210.6291702270508, "frac_variance_explained": 0.2640625, "cossim": 0.70234375, "l2_ratio": 0.614453125, "relative_reconstruction_bias": 0.890625, "loss_original": 2.440642213821411, "loss_reconstructed": 7.406615495681763, "loss_zero": 12.452932643890382, "frac_recovered": 0.5041129201650619, "frac_alive": 0.7194553017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e78a1b618be9800ef1fe15b6b67f2de26b2e900e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..be473c9dd17bcf8887b95ea2ef658f1a3ed1a4c7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 72.2, "l1_loss": 301.6, "l0": 86.50000228881837, "frac_variance_explained": 0.73359375, "cossim": 0.8578125, "l2_ratio": 0.798828125, "relative_reconstruction_bias": 0.959375, "loss_original": 2.440642213821411, "loss_reconstructed": 3.290687155723572, "loss_zero": 12.452932643890382, "frac_recovered": 0.9150846123695373, "frac_alive": 0.0876193568110466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..de2d18f22bbbb2807cc95abe8ee0af4e247e4a9f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..eea407e7837b551037aa9dabd80e573e432282fa --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 224.6, "l1_loss": 13132.8, "l0": 9213.05458984375, "frac_variance_explained": -1.03046875, "cossim": 0.007816314697265625, "l2_ratio": 1.15546875, "relative_reconstruction_bias": 195.5, "loss_original": 2.440642213821411, "loss_reconstructed": 19.563122940063476, "loss_zero": 12.452932643890382, "frac_recovered": -0.710543018579483, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5b235022521c3234c916f6d0cc809d784ab62d0f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..66906c1842e9ec7cf4aa372cb63953985ddd8799 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 140.9, "l1_loss": 3142.4, "l0": 3520.612548828125, "frac_variance_explained": -0.069140625, "cossim": 0.291015625, "l2_ratio": 0.3875, "relative_reconstruction_bias": 1.308203125, "loss_original": 2.440642213821411, "loss_reconstructed": 9.744282913208007, "loss_zero": 12.452932643890382, "frac_recovered": 0.2705884039402008, "frac_alive": 0.9943576455116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b63c2eec8bee6b15f5be80f46aa14c69d18cbd3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b54d52dfcaeea9d46af7ab8518bcfdd45a788921 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 91.6, "l1_loss": 197.6, "l0": 37.650001525878906, "frac_variance_explained": 0.503515625, "cossim": 0.78046875, "l2_ratio": 0.72265625, "relative_reconstruction_bias": 0.947265625, "loss_original": 2.440642213821411, "loss_reconstructed": 5.352246809005737, "loss_zero": 12.452932643890382, "frac_recovered": 0.709167218208313, "frac_alive": 0.1135525181889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c07f7a2bcac62c2e21a0505c24df5fb1312dbba3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c209d59c8a85a513124a37ab91560b6b06518767 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 72.05, "l1_loss": 252.3, "l0": 64.75000114440918, "frac_variance_explained": 0.631640625, "cossim": 0.861328125, "l2_ratio": 0.807421875, "relative_reconstruction_bias": 0.9453125, "loss_original": 2.440642213821411, "loss_reconstructed": 3.132000136375427, "loss_zero": 12.452932643890382, "frac_recovered": 0.9310269713401794, "frac_alive": 0.099500872194767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f9ccd06eba4502f6c30c193db49fb869c917de02 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fc80ff3e72d4bcf4abfd3c4c34bb85af7524d549 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 178.8, "l1_loss": 11526.4, "l0": 8427.72529296875, "frac_variance_explained": -0.728125, "cossim": 0.0859375, "l2_ratio": 0.751953125, "relative_reconstruction_bias": 8.265625, "loss_original": 2.440642213821411, "loss_reconstructed": 10.669744300842286, "loss_zero": 12.452932643890382, "frac_recovered": 0.17817798852920533, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2f84e0f7c4eb3267bc05ce5c12f642fd51522f63 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7da0765fb90d845c91e5cdd724ef93b4f69b89bf --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 107.6, "l1_loss": 263.7, "l0": 187.25833892822266, "frac_variance_explained": 0.18203125, "cossim": 0.65, "l2_ratio": 0.5578125, "relative_reconstruction_bias": 0.87890625, "loss_original": 2.440642213821411, "loss_reconstructed": 8.886247253417968, "loss_zero": 12.452932643890382, "frac_recovered": 0.35650283098220825, "frac_alive": 0.7111545205116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e209dc7629222f6d88651553b56c17cb881612a9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3816e462a7e4c90e22515f89b9263e266686088e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 79.45, "l1_loss": 212.9, "l0": 40.37083473205566, "frac_variance_explained": 0.619921875, "cossim": 0.82109375, "l2_ratio": 0.759375, "relative_reconstruction_bias": 0.948828125, "loss_original": 2.440642213821411, "loss_reconstructed": 4.25244140625, "loss_zero": 12.452932643890382, "frac_recovered": 0.8189941287040711, "frac_alive": 0.0354275181889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d8d1c02de442457025fcded30b3cc68355597a74 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4e84f2dcc96a5943211c569815453b23153d8b07 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 225.7, "l1_loss": 13216.0, "l0": 9215.48349609375, "frac_variance_explained": -1.03046875, "cossim": 0.007458209991455078, "l2_ratio": 1.153125, "relative_reconstruction_bias": -8355.05, "loss_original": 2.440642213821411, "loss_reconstructed": 19.563122940063476, "loss_zero": 12.452932643890382, "frac_recovered": -0.710543018579483, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f9d224dce8bb69cb5602bfb8610795c806f6d138 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..29014d5ed159d87a94039c874f6bc109adc10a48 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 148.3, "l1_loss": 3659.2, "l0": 3528.3126220703125, "frac_variance_explained": -0.00859375, "cossim": 0.284375, "l2_ratio": 0.3830078125, "relative_reconstruction_bias": 1.25, "loss_original": 2.440642213821411, "loss_reconstructed": 9.948379707336425, "loss_zero": 12.452932643890382, "frac_recovered": 0.2501836001873016, "frac_alive": 0.9934353232383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3c10358de5c0be70e09fc50ef907f67a27d8bdaf --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4ca6836fbeb80610bde5c7f7eb1e46c6b46e79f5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 96.05, "l1_loss": 163.1, "l0": 23.87500057220459, "frac_variance_explained": 0.401171875, "cossim": 0.746484375, "l2_ratio": 0.683203125, "relative_reconstruction_bias": 0.932421875, "loss_original": 2.440642213821411, "loss_reconstructed": 6.304030132293701, "loss_zero": 12.452932643890382, "frac_recovered": 0.6141297578811645, "frac_alive": 0.0886501744389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..138047db316953825992d2d803eabba7408ab75a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1cf385bf667b24ed47200a66198a27e4f2b20ce4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 79.25, "l1_loss": 188.4, "l0": 35.304167938232425, "frac_variance_explained": 0.52734375, "cossim": 0.825390625, "l2_ratio": 0.760546875, "relative_reconstruction_bias": 0.93046875, "loss_original": 2.440642213821411, "loss_reconstructed": 4.10310423374176, "loss_zero": 12.452932643890382, "frac_recovered": 0.8339592039585113, "frac_alive": 0.0388997383415699, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a76101ccb1102b2b2a4cd4dc056410be9b8bfce5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0a19364ba17a123067ed97e2cf63790581c22c86 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 177.3, "l1_loss": 11411.2, "l0": 8449.48369140625, "frac_variance_explained": -0.7890625, "cossim": 0.080859375, "l2_ratio": 0.755859375, "relative_reconstruction_bias": 9.15, "loss_original": 2.440642213821411, "loss_reconstructed": 10.689292430877686, "loss_zero": 12.452932643890382, "frac_recovered": 0.17622570991516112, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..056e4b98e90af530ae15f8241c8fd6e03ee8780d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6c2a74515e55a7026e107af0614db8922356b5ba --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 111.1, "l1_loss": 228.8, "l0": 161.11667175292968, "frac_variance_explained": 0.181640625, "cossim": 0.63359375, "l2_ratio": 0.525390625, "relative_reconstruction_bias": 0.858984375, "loss_original": 2.440642213821411, "loss_reconstructed": 9.610341358184815, "loss_zero": 12.452932643890382, "frac_recovered": 0.2841554760932922, "frac_alive": 0.7009548544883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cadccca4f95503d59212d42bf9ce4c6a3beff406 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6d4b93e4c3bb150c827a4b11cb4b746545c55268 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 87.2, "l1_loss": 171.1, "l0": 23.016667366027832, "frac_variance_explained": 0.5234375, "cossim": 0.78203125, "l2_ratio": 0.72421875, "relative_reconstruction_bias": 0.947265625, "loss_original": 2.440642213821411, "loss_reconstructed": 5.1860432624816895, "loss_zero": 12.452932643890382, "frac_recovered": 0.7257787346839905, "frac_alive": 0.01508246548473835, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e1c70993871aebe2111581d5d5ac85bccd0c2ec1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..51d63617530737b0ffa523dc057652a734ddf76b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 229.1, "l1_loss": 13427.2, "l0": 9219.6294921875, "frac_variance_explained": -1.03515625, "cossim": 0.0065277099609375, "l2_ratio": 1.1546875, "relative_reconstruction_bias": 1310.4, "loss_original": 2.440642213821411, "loss_reconstructed": 19.563122940063476, "loss_zero": 12.452932643890382, "frac_recovered": -0.710543018579483, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ae524ce87a39afa0b633a6caa4fc6e3a309bb8e8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1ee9c06c2a254bf8884771852fb01fdd95285188 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 142.9, "l1_loss": 3104.0, "l0": 3512.3501220703124, "frac_variance_explained": -0.062109375, "cossim": 0.2708984375, "l2_ratio": 0.3837890625, "relative_reconstruction_bias": 1.341015625, "loss_original": 2.440642213821411, "loss_reconstructed": 10.05876636505127, "loss_zero": 12.452932643890382, "frac_recovered": 0.23915767222642897, "frac_alive": 0.994140625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..78a17f16ddef88900de88a7c654937f7f7e9e630 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..74d9727336b97722561e9231b70f0e0640a53480 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 99.25, "l1_loss": 136.9, "l0": 16.15416736602783, "frac_variance_explained": 0.33515625, "cossim": 0.723046875, "l2_ratio": 0.65546875, "relative_reconstruction_bias": 0.92265625, "loss_original": 2.440642213821411, "loss_reconstructed": 7.244567346572876, "loss_zero": 12.452932643890382, "frac_recovered": 0.5202153503894806, "frac_alive": 0.0591905377805233, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e7f4c608e46f0685db2bfd111d2441d33c81665f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..93b97751ff510aa97a7c0732ab17d8ef989648e0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 87.6, "l1_loss": 153.9, "l0": 20.15416736602783, "frac_variance_explained": 0.454296875, "cossim": 0.782421875, "l2_ratio": 0.7171875, "relative_reconstruction_bias": 0.9328125, "loss_original": 2.440642213821411, "loss_reconstructed": 5.088769006729126, "loss_zero": 12.452932643890382, "frac_recovered": 0.7355478584766388, "frac_alive": 0.013888888992369175, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e627cbeb04a52a8bf7500e9c8f85e19bb92c832a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..720eb2aaac88f2e240a209200a5b82c84680daa2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 182.1, "l1_loss": 11756.8, "l0": 8451.31689453125, "frac_variance_explained": -0.77421875, "cossim": 0.08115234375, "l2_ratio": 0.753515625, "relative_reconstruction_bias": 9.053125, "loss_original": 2.440642213821411, "loss_reconstructed": 10.701376819610596, "loss_zero": 12.452932643890382, "frac_recovered": 0.1750192239880562, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7406e04cc2d173be9d80b47a4a0280fe35164b6a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0897b2a8dc24baa6bea4841d7379b446c4f3b74c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 124.95, "l1_loss": 204.45, "l0": 160.62917098999023, "frac_variance_explained": 0.2703125, "cossim": 0.553515625, "l2_ratio": 0.316796875, "relative_reconstruction_bias": 0.708984375, "loss_original": 2.440642213821411, "loss_reconstructed": 10.599821949005127, "loss_zero": 12.452932643890382, "frac_recovered": 0.18526119142770767, "frac_alive": 0.7195637822151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e0f46ccaaa502941ed03153e855ff2ab57eda947 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 11, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_11", + "submodule_name": "resid_post_layer_11", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..feedeb21bd0358848298d5e166b9716fe5da06f4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_11_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 91.45, "l1_loss": 141.15, "l0": 15.133333778381347, "frac_variance_explained": 0.3828125, "cossim": 0.755859375, "l2_ratio": 0.683984375, "relative_reconstruction_bias": 0.92109375, "loss_original": 2.440642213821411, "loss_reconstructed": 6.110202693939209, "loss_zero": 12.452932643890382, "frac_recovered": 0.6334960699081421, "frac_alive": 0.008626301772892475, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..be34c5953f2fd5798b6b0f1b3e941e9c7f38dc48 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..df339326947b64f4f44680fdde2d697cf0df56a0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 298.4, "l1_loss": 17523.2, "l0": 9199.8087890625, "frac_variance_explained": -1.0109375, "cossim": 0.0119964599609375, "l2_ratio": 1.14921875, "relative_reconstruction_bias": 106.025, "loss_original": 2.440642213821411, "loss_reconstructed": 19.979672050476076, "loss_zero": 12.452932643890382, "frac_recovered": -0.7520080089569092, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..127a1278e367fa4e2f29be1e71b76a66bfe455e1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9ca6e938b4e176d224da0a09dda60e1ec31ab195 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 178.5, "l1_loss": 5555.2, "l0": 4155.38759765625, "frac_variance_explained": 0.012890625, "cossim": 0.4712890625, "l2_ratio": 0.4849609375, "relative_reconstruction_bias": 1.014453125, "loss_original": 2.440642213821411, "loss_reconstructed": 7.340534353256226, "loss_zero": 12.452932643890382, "frac_recovered": 0.510846272110939, "frac_alive": 0.9973415732383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ad30ff35cccbc2cef0614d40c7f7d0efbb1b91ea --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..fb97a1d7bfee1346116a1d5975bb9ef76fff7baf --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 88.45, "l1_loss": 678.0, "l0": 257.46667633056643, "frac_variance_explained": 0.694921875, "cossim": 0.889453125, "l2_ratio": 0.843359375, "relative_reconstruction_bias": 0.95546875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.161420702934265, "loss_zero": 12.452932643890382, "frac_recovered": 0.928012239933014, "frac_alive": 0.2344835102558136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f34ba5cb45cfd4805a6095611bb07c5bf3402b73 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9dfd7f8033947689b03f7056d88fc21d3dd8d80f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 57.575, "l1_loss": 872.0, "l0": 579.3583557128907, "frac_variance_explained": 0.858203125, "cossim": 0.955078125, "l2_ratio": 0.91875, "relative_reconstruction_bias": 0.965625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.526445484161377, "loss_zero": 12.452932643890382, "frac_recovered": 0.9914665818214417, "frac_alive": 0.6834852695465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9f2fa60e5b9d3624aad134d2425e6c44c473ba01 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..51389b0414b9d446787eacb748458fd247a314a8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 233.0, "l1_loss": 15436.8, "l0": 8506.6794921875, "frac_variance_explained": -0.73125, "cossim": 0.1138671875, "l2_ratio": 0.755859375, "relative_reconstruction_bias": 6.384375, "loss_original": 2.440642213821411, "loss_reconstructed": 10.566972827911377, "loss_zero": 12.452932643890382, "frac_recovered": 0.18852877020835876, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2bb3ec6a4f4782212769f478769757456452b4b0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..243a3be9326483e0fd7e78f90f2e63f78bb208a4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 121.25, "l1_loss": 727.2, "l0": 390.58751220703124, "frac_variance_explained": 0.439453125, "cossim": 0.77421875, "l2_ratio": 0.70546875, "relative_reconstruction_bias": 0.923046875, "loss_original": 2.440642213821411, "loss_reconstructed": 5.248047256469727, "loss_zero": 12.452932643890382, "frac_recovered": 0.7196589469909668, "frac_alive": 0.8287760615348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ae5c2141efdbff2c6c4c43d9d8c58f08bc5cc0b3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..89b64d267bb6cb64066ee38f7191d7b107c829fe --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 67.425, "l1_loss": 873.2, "l0": 466.35418090820315, "frac_variance_explained": 0.8203125, "cossim": 0.93515625, "l2_ratio": 0.895703125, "relative_reconstruction_bias": 0.964453125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.608068060874939, "loss_zero": 12.452932643890382, "frac_recovered": 0.9833413660526276, "frac_alive": 0.3128255307674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..613902cc7278d4ce8b21353e0e5de50d35cb73b8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6c717a711d5927b8472c3938014e500c05c64ebf --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 303.8, "l1_loss": 17772.8, "l0": 9207.08798828125, "frac_variance_explained": -1.01796875, "cossim": 0.0111541748046875, "l2_ratio": 1.1515625, "relative_reconstruction_bias": 110.975, "loss_original": 2.440642213821411, "loss_reconstructed": 19.979672050476076, "loss_zero": 12.452932643890382, "frac_recovered": -0.7520080089569092, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2a9590be645e7d74c14c143ed3463338c188effe --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e2bf16932c8b50d403344d2fa53accb2afa3a8de --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 189.9, "l1_loss": 5363.2, "l0": 3789.4833984375, "frac_variance_explained": 0.009375, "cossim": 0.371875, "l2_ratio": 0.4171875, "relative_reconstruction_bias": 1.08203125, "loss_original": 2.440642213821411, "loss_reconstructed": 8.38591194152832, "loss_zero": 12.452932643890382, "frac_recovered": 0.4064041078090668, "frac_alive": 0.9968532919883728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..363e01cba7a6b007319362227c267ddf8568b707 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..23d63da2c52fbd0e597e10627b4d1720f386eade --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 101.15, "l1_loss": 418.6, "l0": 86.38333587646484, "frac_variance_explained": 0.61640625, "cossim": 0.852734375, "l2_ratio": 0.8046875, "relative_reconstruction_bias": 0.95234375, "loss_original": 2.440642213821411, "loss_reconstructed": 4.054786968231201, "loss_zero": 12.452932643890382, "frac_recovered": 0.838770842552185, "frac_alive": 0.1159396693110466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d969770473e2d531326f1286f5796d8a65ceb212 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8d401d7cef6579b7b73b645e8462902e2bea5f19 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 72.2, "l1_loss": 592.0, "l0": 210.61667175292968, "frac_variance_explained": 0.82109375, "cossim": 0.93046875, "l2_ratio": 0.887109375, "relative_reconstruction_bias": 0.966796875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6363804817199705, "loss_zero": 12.452932643890382, "frac_recovered": 0.9805110156536102, "frac_alive": 0.3841688334941864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4f857d6494ef5c964c59da7db22e060dcf23dab --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..527552efb4f90e2f1028715b60e92dabfe531030 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 236.3, "l1_loss": 15417.6, "l0": 8462.312890625, "frac_variance_explained": -0.75859375, "cossim": 0.096337890625, "l2_ratio": 0.75078125, "relative_reconstruction_bias": 7.54375, "loss_original": 2.440642213821411, "loss_reconstructed": 10.713249492645264, "loss_zero": 12.452932643890382, "frac_recovered": 0.17390942722558975, "frac_alive": 0.9998915195465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..88cecce16dbe9a1ade018f9e664b46083c2e0284 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..eebff452a1fae05d0adae60d8cda1be093f32e72 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 130.5, "l1_loss": 524.4, "l0": 264.0416717529297, "frac_variance_explained": 0.33515625, "cossim": 0.73359375, "l2_ratio": 0.651953125, "relative_reconstruction_bias": 0.903125, "loss_original": 2.440642213821411, "loss_reconstructed": 6.326344060897827, "loss_zero": 12.452932643890382, "frac_recovered": 0.6120063900947571, "frac_alive": 0.763780415058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2644c9102713f0d7f006307f04816db6d073676f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..11cd366a774ab96fdcb3e9c29cd1b55c6fa71208 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 84.55, "l1_loss": 500.2, "l0": 145.20417175292968, "frac_variance_explained": 0.723046875, "cossim": 0.89296875, "l2_ratio": 0.845703125, "relative_reconstruction_bias": 0.95625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.915688180923462, "loss_zero": 12.452932643890382, "frac_recovered": 0.9526396453380584, "frac_alive": 0.184353306889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1da0aa00e1b3cd254e173c33b938429065ca4cc2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3c3ec8741c8c5c570c4cd96d7403d51841faf754 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 303.4, "l1_loss": 17740.8, "l0": 9200.38779296875, "frac_variance_explained": -1.0109375, "cossim": 0.0105377197265625, "l2_ratio": 1.15, "relative_reconstruction_bias": 108.45, "loss_original": 2.440642213821411, "loss_reconstructed": 19.979672050476076, "loss_zero": 12.452932643890382, "frac_recovered": -0.7520080089569092, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..42dea8ea970aafb38bc9a156fd61c8b0a62eb43d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..406e7170a603f17fe61bd32f307c3c5074d42633 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 192.6, "l1_loss": 4803.2, "l0": 3740.0417236328126, "frac_variance_explained": -0.023046875, "cossim": 0.346875, "l2_ratio": 0.405859375, "relative_reconstruction_bias": 1.128515625, "loss_original": 2.440642213821411, "loss_reconstructed": 8.6781081199646, "loss_zero": 12.452932643890382, "frac_recovered": 0.3772002398967743, "frac_alive": 0.9958224892616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c0825a6db9b63a515ed1fc022b9c27d812c8b152 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4653e4d9ad5d8f0a85377657c62aaab43d670ae7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 107.85, "l1_loss": 341.2, "l0": 60.12916870117188, "frac_variance_explained": 0.5296875, "cossim": 0.830859375, "l2_ratio": 0.77890625, "relative_reconstruction_bias": 0.94765625, "loss_original": 2.440642213821411, "loss_reconstructed": 4.502575922012329, "loss_zero": 12.452932643890382, "frac_recovered": 0.7940543353557586, "frac_alive": 0.0950520858168602, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d84d224b60301e681f7e2df643facbe09dbff482 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4b8564dabf4955130cef1cc287d066a4118a83c0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 77.3, "l1_loss": 487.6, "l0": 137.42917175292968, "frac_variance_explained": 0.770703125, "cossim": 0.917578125, "l2_ratio": 0.875390625, "relative_reconstruction_bias": 0.957421875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.736348795890808, "loss_zero": 12.452932643890382, "frac_recovered": 0.9705508470535278, "frac_alive": 0.2677951455116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..28ef170d385132ec5746cf54729d62ac3f8ca50e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8318e04a439bf1a10c8ae1b8e1b3c5e89185b069 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 239.2, "l1_loss": 15539.2, "l0": 8454.14189453125, "frac_variance_explained": -0.75859375, "cossim": 0.09560546875, "l2_ratio": 0.755859375, "relative_reconstruction_bias": 7.709375, "loss_original": 2.440642213821411, "loss_reconstructed": 10.751816177368164, "loss_zero": 12.452932643890382, "frac_recovered": 0.17005544751882554, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..66a088f6f6200f0080d040402b1209d74d30bf8f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..89be8c6460833f6f8e0d65d1f12b6d71f72f2245 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 133.6, "l1_loss": 482.6, "l0": 236.40001068115234, "frac_variance_explained": 0.30390625, "cossim": 0.7203125, "l2_ratio": 0.6375, "relative_reconstruction_bias": 0.89921875, "loss_original": 2.440642213821411, "loss_reconstructed": 6.817232656478882, "loss_zero": 12.452932643890382, "frac_recovered": 0.5630136430263519, "frac_alive": 0.74267578125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..11b04dd602efb3895246d694157f0eeb0129d808 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d4cea3f6bfa1c529ec8a52fbdbcefe6cb90f8d6b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 91.3, "l1_loss": 416.8, "l0": 97.5583366394043, "frac_variance_explained": 0.651953125, "cossim": 0.882421875, "l2_ratio": 0.830859375, "relative_reconstruction_bias": 0.948828125, "loss_original": 2.440642213821411, "loss_reconstructed": 3.217394709587097, "loss_zero": 12.452932643890382, "frac_recovered": 0.9225192904472351, "frac_alive": 0.1559787392616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e12cdd7dd887bb2873762fb9f077e1297d67038 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..abd4d68b14bd739992739c615958785ce1030238 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 322.4, "l1_loss": 18944.0, "l0": 9201.4171875, "frac_variance_explained": -1.05390625, "cossim": 0.0118377685546875, "l2_ratio": 1.15, "relative_reconstruction_bias": 148.85, "loss_original": 2.440642213821411, "loss_reconstructed": 19.979672050476076, "loss_zero": 12.452932643890382, "frac_recovered": -0.7520080089569092, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fe9b8676ddb4d18d1305e16186838534549944e6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9a446c7530cd1e708d6830d6c77692171faaa35f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 193.1, "l1_loss": 4678.4, "l0": 3734.7125732421873, "frac_variance_explained": -0.078515625, "cossim": 0.315234375, "l2_ratio": 0.4, "relative_reconstruction_bias": 1.27109375, "loss_original": 2.440642213821411, "loss_reconstructed": 9.022181034088135, "loss_zero": 12.452932643890382, "frac_recovered": 0.34281357526779177, "frac_alive": 0.99560546875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..247ab07e284dc107429598af37eebb40cd082487 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..833d642a242d1dbeb1d22eb79fed589c1fb9d0c3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 117.95, "l1_loss": 266.0, "l0": 40.29166831970215, "frac_variance_explained": 0.43046875, "cossim": 0.7953125, "l2_ratio": 0.739453125, "relative_reconstruction_bias": 0.937890625, "loss_original": 2.440642213821411, "loss_reconstructed": 5.293327379226684, "loss_zero": 12.452932643890382, "frac_recovered": 0.715108186006546, "frac_alive": 0.1028645858168602, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e5e19e13822b6134780af3ac7f8845e4764e6de --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9added1f1e7397bc39843780c5d4cd1ba7ff0865 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 92.55, "l1_loss": 368.2, "l0": 69.7333366394043, "frac_variance_explained": 0.673046875, "cossim": 0.875390625, "l2_ratio": 0.82265625, "relative_reconstruction_bias": 0.948046875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.124556040763855, "loss_zero": 12.452932643890382, "frac_recovered": 0.9317654490470886, "frac_alive": 0.1222330704331398, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..305838504314e890ea2f2d3f92fddbe8c2929a6d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1f26304d05bf79ee39b11e0fafe32684de3ee31f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 239.7, "l1_loss": 15488.0, "l0": 8441.42529296875, "frac_variance_explained": -0.75859375, "cossim": 0.093603515625, "l2_ratio": 0.7578125, "relative_reconstruction_bias": 7.978125, "loss_original": 2.440642213821411, "loss_reconstructed": 10.79729642868042, "loss_zero": 12.452932643890382, "frac_recovered": 0.16551105678081512, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..48dcc080e7cd569395c2004356a93e438c9d2a03 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b839765b115b3dd3b176d24a37b02c8e6d2b3403 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 143.8, "l1_loss": 381.6, "l0": 195.20000610351562, "frac_variance_explained": 0.195703125, "cossim": 0.67421875, "l2_ratio": 0.575, "relative_reconstruction_bias": 0.869140625, "loss_original": 2.440642213821411, "loss_reconstructed": 7.941213989257813, "loss_zero": 12.452932643890382, "frac_recovered": 0.45088631212711333, "frac_alive": 0.72998046875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..abe5682b1961a6baed4634eb1f23de5d6be0aef4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3268cb8f876ce5182943e8b12a3d7988232ecb74 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 106.15, "l1_loss": 306.8, "l0": 45.35833511352539, "frac_variance_explained": 0.541796875, "cossim": 0.837109375, "l2_ratio": 0.782421875, "relative_reconstruction_bias": 0.94140625, "loss_original": 2.440642213821411, "loss_reconstructed": 4.1498006820678714, "loss_zero": 12.452932643890382, "frac_recovered": 0.8292764723300934, "frac_alive": 0.0440538190305233, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7206eec54f72d898c94e30e9b590a7c898cd40ef --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2d815dc8d063d115db77c56ceb61fb061c543129 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 301.6, "l1_loss": 17689.6, "l0": 9208.08369140625, "frac_variance_explained": -1.01328125, "cossim": 0.013214111328125, "l2_ratio": 1.1515625, "relative_reconstruction_bias": 84.4, "loss_original": 2.440642213821411, "loss_reconstructed": 19.979672050476076, "loss_zero": 12.452932643890382, "frac_recovered": -0.7520080089569092, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..316bf3eb85f912287495bb1b2f9f026d4346942a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e78738d98c8df47430dc162eaf7ce6952a0232c2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 193.4, "l1_loss": 4726.4, "l0": 3796.5126220703123, "frac_variance_explained": -0.0984375, "cossim": 0.2982421875, "l2_ratio": 0.4029296875, "relative_reconstruction_bias": 1.34921875, "loss_original": 2.440642213821411, "loss_reconstructed": 9.211329936981201, "loss_zero": 12.452932643890382, "frac_recovered": 0.32391943633556364, "frac_alive": 0.9954969882965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..94406e18869d45b28fbde12fbb7637ab0c36518b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d25150c0ade8d9f1b656babde847ce2234233714 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 123.85, "l1_loss": 215.0, "l0": 25.329167556762695, "frac_variance_explained": 0.397265625, "cossim": 0.75234375, "l2_ratio": 0.691796875, "relative_reconstruction_bias": 0.937109375, "loss_original": 2.440642213821411, "loss_reconstructed": 6.16484842300415, "loss_zero": 12.452932643890382, "frac_recovered": 0.6280666649341583, "frac_alive": 0.0861545130610466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..83a08962ce079d88ef01813e26c452786b777c74 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b570ce2fde2047394ae6d6c9dfdc12875ac559e2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 104.85, "l1_loss": 291.8, "l0": 37.57083435058594, "frac_variance_explained": 0.597265625, "cossim": 0.839453125, "l2_ratio": 0.781640625, "relative_reconstruction_bias": 0.9453125, "loss_original": 2.440642213821411, "loss_reconstructed": 3.941355586051941, "loss_zero": 12.452932643890382, "frac_recovered": 0.8501846313476562, "frac_alive": 0.0476345494389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d47c40e1b4515b97f75ca73f9213e4caeeb537da --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7426458735a0ede1ea96e750473f6b24b3b89252 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 255.0, "l1_loss": 16787.2, "l0": 8465.43369140625, "frac_variance_explained": -0.715625, "cossim": 0.093994140625, "l2_ratio": 0.753515625, "relative_reconstruction_bias": 7.95, "loss_original": 2.440642213821411, "loss_reconstructed": 10.824813652038575, "loss_zero": 12.452932643890382, "frac_recovered": 0.16276027262210846, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8e09e650cf993ae8cbf671a35de684bb7319bc64 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..070a6077c657978118771cc5262057cb02aa86d1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 155.7, "l1_loss": 330.2, "l0": 198.4083396911621, "frac_variance_explained": 0.145703125, "cossim": 0.624609375, "l2_ratio": 0.5013671875, "relative_reconstruction_bias": 0.824609375, "loss_original": 2.440642213821411, "loss_reconstructed": 9.115774059295655, "loss_zero": 12.452932643890382, "frac_recovered": 0.33352536559104917, "frac_alive": 0.7464192509651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7853e916a382ce4419ad87a67de52387e121d198 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..212ca8cddd79e9158a76f65e39339f9be1e5cf6c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 116.55, "l1_loss": 250.6, "l0": 26.537500381469727, "frac_variance_explained": 0.5171875, "cossim": 0.7984375, "l2_ratio": 0.7390625, "relative_reconstruction_bias": 0.94375, "loss_original": 2.440642213821411, "loss_reconstructed": 5.107259798049927, "loss_zero": 12.452932643890382, "frac_recovered": 0.73365877866745, "frac_alive": 0.03043619729578495, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..02b232fc4f828da81651541a6b340362984b8ea4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4372ad0245283937dea1a2bd69cd748741d330d6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 303.2, "l1_loss": 17728.0, "l0": 9204.1169921875, "frac_variance_explained": -1.025, "cossim": 0.0110626220703125, "l2_ratio": 1.15078125, "relative_reconstruction_bias": 117.5, "loss_original": 2.440642213821411, "loss_reconstructed": 19.979672050476076, "loss_zero": 12.452932643890382, "frac_recovered": -0.7520080089569092, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..243a3771393d88a040db2ed663cb1190594e1624 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4fedf2a18127e91fefbd6af6fa10e2119d8c9b09 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 192.4, "l1_loss": 4328.0, "l0": 3605.087548828125, "frac_variance_explained": -0.092578125, "cossim": 0.2828125, "l2_ratio": 0.387890625, "relative_reconstruction_bias": 1.35859375, "loss_original": 2.440642213821411, "loss_reconstructed": 9.340449142456055, "loss_zero": 12.452932643890382, "frac_recovered": 0.31101674735546114, "frac_alive": 0.9951714277267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e0e9cd66ef2dbbd89ada0d7fbc3f07adf2883d5f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..35fbb70dda2503408bcaf76b76073261d79804fe --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 131.3, "l1_loss": 186.2, "l0": 16.7916672706604, "frac_variance_explained": 0.282421875, "cossim": 0.738671875, "l2_ratio": 0.6703125, "relative_reconstruction_bias": 0.918359375, "loss_original": 2.440642213821411, "loss_reconstructed": 7.088256406784057, "loss_zero": 12.452932643890382, "frac_recovered": 0.5358434379100799, "frac_alive": 0.0676540806889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4887cebcd9a2f82ff66b2a7c7f24452dab474de5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9dd3d87c6ef0c54c44371b97df68a792f54e998c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 116.45, "l1_loss": 223.4, "l0": 21.112500381469726, "frac_variance_explained": 0.496484375, "cossim": 0.799609375, "l2_ratio": 0.736328125, "relative_reconstruction_bias": 0.93671875, "loss_original": 2.440642213821411, "loss_reconstructed": 5.163997936248779, "loss_zero": 12.452932643890382, "frac_recovered": 0.7280074059963226, "frac_alive": 0.01513671875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..90e89e7ea50764d60474a57b9de98bc7ed989bb8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b0cf300370e549be36cf72a6c72d25f12cb54826 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 241.9, "l1_loss": 15590.4, "l0": 8443.78798828125, "frac_variance_explained": -0.75390625, "cossim": 0.08896484375, "l2_ratio": 0.76171875, "relative_reconstruction_bias": 8.365625, "loss_original": 2.440642213821411, "loss_reconstructed": 10.843234443664551, "loss_zero": 12.452932643890382, "frac_recovered": 0.16092031002044677, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b7ca221f417acb77a64c585f7ee9f7a235be56f7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cf83b08e011ea1cae5fea9b8f5b0dcf6fc4f57e2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 172.3, "l1_loss": 180.2, "l0": 180.89167327880858, "frac_variance_explained": 0.0265625, "cossim": 0.5267578125, "l2_ratio": 0.2748046875, "relative_reconstruction_bias": 0.54453125, "loss_original": 2.440642213821411, "loss_reconstructed": 10.057819747924805, "loss_zero": 12.452932643890382, "frac_recovered": 0.23940212279558182, "frac_alive": 0.7567816972732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..617c732c84d4b5738ace945d58a68f0522192b13 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 15, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_15", + "submodule_name": "resid_post_layer_15", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a8f80a7bbed8a561bbc0baea2e8cef464eaf7d5f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_15_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 128.45, "l1_loss": 191.4, "l0": 14.500000286102296, "frac_variance_explained": 0.336328125, "cossim": 0.7484375, "l2_ratio": 0.681640625, "relative_reconstruction_bias": 0.920703125, "loss_original": 2.440642213821411, "loss_reconstructed": 6.227117872238159, "loss_zero": 12.452932643890382, "frac_recovered": 0.6218695878982544, "frac_alive": 0.009006076492369175, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1586df0888618284871d0c5b8be71ff0b10ae9df --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9e8b5e6ab393ba423c4d6666b830853fcceb13b6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 456.8, "l1_loss": 26739.2, "l0": 9200.98369140625, "frac_variance_explained": -0.99375, "cossim": 0.01264495849609375, "l2_ratio": 1.15, "relative_reconstruction_bias": 118.875, "loss_original": 2.440642213821411, "loss_reconstructed": 21.83314037322998, "loss_zero": 12.452932643890382, "frac_recovered": -0.9372808158397674, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..31c51b7959e6e78e4eb5800396abf2fa66f93f11 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..70e14c3e9fea8c2d43bdc5426aee178c5de5c3c1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 282.8, "l1_loss": 5516.8, "l0": 2661.129248046875, "frac_variance_explained": 0.019921875, "cossim": 0.4046875, "l2_ratio": 0.3876953125, "relative_reconstruction_bias": 0.926171875, "loss_original": 2.440642213821411, "loss_reconstructed": 10.32450180053711, "loss_zero": 12.452932643890382, "frac_recovered": 0.21272739246487618, "frac_alive": 0.9959309697151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2605356294c6aac12228e7a2be196e22a10de831 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..393d3b3a1f994e2151f97d10977f86a4d965e011 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 150.0, "l1_loss": 912.8, "l0": 193.70833587646484, "frac_variance_explained": 0.600390625, "cossim": 0.85625, "l2_ratio": 0.8078125, "relative_reconstruction_bias": 0.948046875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.452366757392883, "loss_zero": 12.452932643890382, "frac_recovered": 0.8989889085292816, "frac_alive": 0.157497838139534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6537a8b990a38ac5eb457d1ed069485ac3a75933 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8545768ca85ba1f6940d88ddb8e79e430e3b2fa4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 93.15, "l1_loss": 1247.2, "l0": 439.21251525878904, "frac_variance_explained": 0.848828125, "cossim": 0.9484375, "l2_ratio": 0.909765625, "relative_reconstruction_bias": 0.963671875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.574883985519409, "loss_zero": 12.452932643890382, "frac_recovered": 0.9866514384746552, "frac_alive": 0.689561665058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8147583683744f29bb671b6764c0ef3ca9486b84 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6019a1344664e6d060b7e5b7b76d51e080584308 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 360.6, "l1_loss": 22272.0, "l0": 8138.291748046875, "frac_variance_explained": -0.7515625, "cossim": 0.11416015625, "l2_ratio": 0.75859375, "relative_reconstruction_bias": 6.46875, "loss_original": 2.440642213821411, "loss_reconstructed": 11.07071237564087, "loss_zero": 12.452932643890382, "frac_recovered": 0.13822999522089957, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f6fc4c05f36f0fb39cce56b36f778ae55b82ee13 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f600e2b9559187676c51fa1f84ba75db6dc4de93 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 205.7, "l1_loss": 823.6, "l0": 219.75834045410156, "frac_variance_explained": 0.278515625, "cossim": 0.7140625, "l2_ratio": 0.648046875, "relative_reconstruction_bias": 0.912890625, "loss_original": 2.440642213821411, "loss_reconstructed": 5.884131956100464, "loss_zero": 12.452932643890382, "frac_recovered": 0.656288456916809, "frac_alive": 0.687608540058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fcc5a2220edd92a3f8d089023a6c906957f64002 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f550a7b4f5827aa72b83ec51ca88e00c93b06f83 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 111.2, "l1_loss": 1262.4, "l0": 390.74168090820314, "frac_variance_explained": 0.776171875, "cossim": 0.918359375, "l2_ratio": 0.87109375, "relative_reconstruction_bias": 0.953125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.674443221092224, "loss_zero": 12.452932643890382, "frac_recovered": 0.9767174541950225, "frac_alive": 0.298828125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..71893d4f1c953776bcc0092523ffe9842a3d9c7b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..697502e91d9ed1c7c4b998bff1ffe5b3c249478c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 459.4, "l1_loss": 26841.6, "l0": 9198.6794921875, "frac_variance_explained": -1.00390625, "cossim": 0.0110626220703125, "l2_ratio": 1.1546875, "relative_reconstruction_bias": 106.475, "loss_original": 2.440642213821411, "loss_reconstructed": 21.83314037322998, "loss_zero": 12.452932643890382, "frac_recovered": -0.9372808158397674, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0438b9c3d3013bbc6ca5e2579449c1948e5ee58a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7d3b107bc83e44e3802ad779e4704d09613896b9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 291.0, "l1_loss": 3848.0, "l0": 2397.4667724609376, "frac_variance_explained": -0.06171875, "cossim": 0.2849609375, "l2_ratio": 0.324609375, "relative_reconstruction_bias": 1.17890625, "loss_original": 2.440642213821411, "loss_reconstructed": 13.03815517425537, "loss_zero": 12.452932643890382, "frac_recovered": -0.05841167252510786, "frac_alive": 0.9943034052848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..74fc55be075ab384d3fa8471680adee741dfdde5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..64d473525371513da2d49706507035bf586d5e4d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 175.5, "l1_loss": 518.2, "l0": 64.67916831970214, "frac_variance_explained": 0.458203125, "cossim": 0.79609375, "l2_ratio": 0.7453125, "relative_reconstruction_bias": 0.941796875, "loss_original": 2.440642213821411, "loss_reconstructed": 4.30438346862793, "loss_zero": 12.452932643890382, "frac_recovered": 0.8138806521892548, "frac_alive": 0.1017252579331398, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..862066f1e37b65696fe5d8c4076e7f7f2dd63251 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2ed04cf7220705f47a1b7f938f3fa865081a37ad --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 114.05, "l1_loss": 835.6, "l0": 168.62083740234374, "frac_variance_explained": 0.773828125, "cossim": 0.91796875, "l2_ratio": 0.870703125, "relative_reconstruction_bias": 0.9515625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6944114208221435, "loss_zero": 12.452932643890382, "frac_recovered": 0.9747312247753144, "frac_alive": 0.4045138955116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0d469c3cc2cb475b3909a41ddc68809b32ef108c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6a651c73acd408a8270f5af30d02cbb6657cf7e5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 373.0, "l1_loss": 22899.2, "l0": 8096.754345703125, "frac_variance_explained": -0.721875, "cossim": 0.09970703125, "l2_ratio": 0.7578125, "relative_reconstruction_bias": 7.8, "loss_original": 2.440642213821411, "loss_reconstructed": 11.22905330657959, "loss_zero": 12.452932643890382, "frac_recovered": 0.12240497246384621, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b60fd23ddda64bb02512536e21478fe534be8e30 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4c0aeb83ac5497f32065bd47d441796a4ee23312 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 214.9, "l1_loss": 610.8, "l0": 154.8208381652832, "frac_variance_explained": 0.208203125, "cossim": 0.66796875, "l2_ratio": 0.594140625, "relative_reconstruction_bias": 0.897265625, "loss_original": 2.440642213821411, "loss_reconstructed": 7.198544979095459, "loss_zero": 12.452932643890382, "frac_recovered": 0.5251240968704224, "frac_alive": 0.6703016757965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a62381c1ea1a62cae751d9b0278ba9de95c95c9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..41968cc556ec6790fc5f43e565f981c080cde9c1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 138.1, "l1_loss": 720.8, "l0": 117.22916870117187, "frac_variance_explained": 0.654296875, "cossim": 0.874609375, "l2_ratio": 0.82578125, "relative_reconstruction_bias": 0.949609375, "loss_original": 2.440642213821411, "loss_reconstructed": 3.0530463218688966, "loss_zero": 12.452932643890382, "frac_recovered": 0.9389393389225006, "frac_alive": 0.1564127653837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2a09ba467e251ec5a0df94b5828b89ac288a3c1b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..220034d776274de39e19589fd8fca6fa3b7cf8a7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 471.2, "l1_loss": 27635.2, "l0": 9202.92529296875, "frac_variance_explained": -1.02890625, "cossim": 0.013226318359375, "l2_ratio": 1.15, "relative_reconstruction_bias": 108.975, "loss_original": 2.440642213821411, "loss_reconstructed": 21.83314037322998, "loss_zero": 12.452932643890382, "frac_recovered": -0.9372808158397674, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b447675563eee98089aa13e596a87ad5805aa100 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..74ab70396734c219317e68c24b01b3bfbaafd09f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 297.2, "l1_loss": 3924.8, "l0": 2422.7375732421874, "frac_variance_explained": -0.07578125, "cossim": 0.25576171875, "l2_ratio": 0.323828125, "relative_reconstruction_bias": 1.32890625, "loss_original": 2.440642213821411, "loss_reconstructed": 13.276070308685302, "loss_zero": 12.452932643890382, "frac_recovered": -0.08219222165644169, "frac_alive": 0.9962565302848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..266657feb7f3e47ce2e9a089de00560d7327a9df --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c3c2efda5add16a0b9b71f1cfbc7150a18a153d2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 184.3, "l1_loss": 441.0, "l0": 45.12500076293945, "frac_variance_explained": 0.42109375, "cossim": 0.778515625, "l2_ratio": 0.730078125, "relative_reconstruction_bias": 0.944140625, "loss_original": 2.440642213821411, "loss_reconstructed": 4.669199419021607, "loss_zero": 12.452932643890382, "frac_recovered": 0.7774573624134063, "frac_alive": 0.0773654505610466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4f050aa098b6a8cb2d2a75c74fddcce5153f5c76 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ba7a98141dc6aaa9665bc81e7c6ffdc9f0d18078 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 123.55, "l1_loss": 733.2, "l0": 121.07083587646484, "frac_variance_explained": 0.729296875, "cossim": 0.90234375, "l2_ratio": 0.8515625, "relative_reconstruction_bias": 0.947265625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.783217740058899, "loss_zero": 12.452932643890382, "frac_recovered": 0.9658669233322144, "frac_alive": 0.3011067807674408, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5fc62d61ef67728f01de880620731271b3e2cf00 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3f00a40b57f9618d41d2359129a68dc818a71fd9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 354.4, "l1_loss": 21363.2, "l0": 8065.021044921875, "frac_variance_explained": -0.75078125, "cossim": 0.100146484375, "l2_ratio": 0.76015625, "relative_reconstruction_bias": 7.503125, "loss_original": 2.440642213821411, "loss_reconstructed": 11.266265773773194, "loss_zero": 12.452932643890382, "frac_recovered": 0.11868658438324928, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bc23f32c5eae7675e762ef80ba31611b97b0722c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..60230143f5d7a53c1e36967610455d8dca1d3e59 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 223.9, "l1_loss": 669.4, "l0": 151.6916702270508, "frac_variance_explained": 0.216796875, "cossim": 0.648828125, "l2_ratio": 0.569921875, "relative_reconstruction_bias": 0.89375, "loss_original": 2.440642213821411, "loss_reconstructed": 8.074299097061157, "loss_zero": 12.452932643890382, "frac_recovered": 0.43777204751968385, "frac_alive": 0.6750759482383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3f9ac82659a1cd525c7f4212e419a59d3242ba4d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..480fbd1fa1c9a9bb0fd3d3b7b07652252ccbab83 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 149.1, "l1_loss": 603.6, "l0": 74.62916946411133, "frac_variance_explained": 0.609375, "cossim": 0.85546875, "l2_ratio": 0.80078125, "relative_reconstruction_bias": 0.941796875, "loss_original": 2.440642213821411, "loss_reconstructed": 3.3274339199066163, "loss_zero": 12.452932643890382, "frac_recovered": 0.9115424156188965, "frac_alive": 0.104871965944767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c387de73fb2121e0a088f098002561b2a1005af1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dba442587db2080d33fb8a32d7f34aca81449f9c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 492.0, "l1_loss": 28902.4, "l0": 9206.10029296875, "frac_variance_explained": -1.05859375, "cossim": 0.0141143798828125, "l2_ratio": 1.15234375, "relative_reconstruction_bias": 135.975, "loss_original": 2.440642213821411, "loss_reconstructed": 21.83314037322998, "loss_zero": 12.452932643890382, "frac_recovered": -0.9372808158397674, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..644b50bf16c1583b282ea471c7960849d96b971f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8aade5be0fc78af290dd354b97f45242cb143d04 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 294.6, "l1_loss": 3740.8, "l0": 2362.9333984375, "frac_variance_explained": -0.0828125, "cossim": 0.22666015625, "l2_ratio": 0.3154296875, "relative_reconstruction_bias": 1.46328125, "loss_original": 2.440642213821411, "loss_reconstructed": 13.42322359085083, "loss_zero": 12.452932643890382, "frac_recovered": -0.09688852950930596, "frac_alive": 0.9954969882965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a06902ecec0c22967e427de19e3a517226a5bdc0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f042f5cf634efaeb7dc584c94d85d251228fac57 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 196.6, "l1_loss": 331.8, "l0": 23.65000057220459, "frac_variance_explained": 0.341015625, "cossim": 0.7390625, "l2_ratio": 0.68203125, "relative_reconstruction_bias": 0.93125, "loss_original": 2.440642213821411, "loss_reconstructed": 5.629452657699585, "loss_zero": 12.452932643890382, "frac_recovered": 0.6815829992294311, "frac_alive": 0.0403103306889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef7ff5d187b27dcff6bf779d26dcbb96d7deef5a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..da6b906e64da44837999f04c445440853ed06041 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 143.5, "l1_loss": 539.4, "l0": 59.45833473205566, "frac_variance_explained": 0.636328125, "cossim": 0.86328125, "l2_ratio": 0.80703125, "relative_reconstruction_bias": 0.93984375, "loss_original": 2.440642213821411, "loss_reconstructed": 3.1445400953292846, "loss_zero": 12.452932643890382, "frac_recovered": 0.9298228561878205, "frac_alive": 0.1376410573720932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e0bc45eb6c308b46060bad8f47831696a87d12f1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..578c93a688811402ebd531660547173aed950742 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 361.8, "l1_loss": 21772.8, "l0": 8057.366748046875, "frac_variance_explained": -0.76953125, "cossim": 0.096533203125, "l2_ratio": 0.75703125, "relative_reconstruction_bias": 7.746875, "loss_original": 2.440642213821411, "loss_reconstructed": 11.305345058441162, "loss_zero": 12.452932643890382, "frac_recovered": 0.11478075385093689, "frac_alive": 0.9998915195465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6b997ccf9a319f5086be1a3c5a4457f29fe1df44 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8fba1e809e343fddc128098442df674b196e1833 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 232.6, "l1_loss": 414.6, "l0": 121.14583587646484, "frac_variance_explained": 0.0875, "cossim": 0.603515625, "l2_ratio": 0.5189453125, "relative_reconstruction_bias": 0.8609375, "loss_original": 2.440642213821411, "loss_reconstructed": 9.284746646881104, "loss_zero": 12.452932643890382, "frac_recovered": 0.316985610127449, "frac_alive": 0.6366102695465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3983dfcc581ef9359d16f603ea5322e6b20cc0dd --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..71e38250bef91d5af4177e69f6cb9fa5732c7944 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 172.8, "l1_loss": 424.2, "l0": 37.96250114440918, "frac_variance_explained": 0.470703125, "cossim": 0.80234375, "l2_ratio": 0.744140625, "relative_reconstruction_bias": 0.93515625, "loss_original": 2.440642213821411, "loss_reconstructed": 4.1068400859832765, "loss_zero": 12.452932643890382, "frac_recovered": 0.8336953580379486, "frac_alive": 0.0579969622194767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..449523a3faffcc4672087c6471ba332245b86812 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9dd5406ea2e425abd1770186432918e33f05f74e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 455.2, "l1_loss": 26636.8, "l0": 9201.50048828125, "frac_variance_explained": -1.003125, "cossim": 0.0144805908203125, "l2_ratio": 1.153125, "relative_reconstruction_bias": 85.825, "loss_original": 2.440642213821411, "loss_reconstructed": 21.83314037322998, "loss_zero": 12.452932643890382, "frac_recovered": -0.9372808158397674, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..79589f5b0dbde709096f8a0d038315499c4dcf41 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e2e68f6033660817e4cddbd3c6242d877b3d079f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 306.0, "l1_loss": 4676.8, "l0": 2369.533447265625, "frac_variance_explained": -0.033203125, "cossim": 0.223046875, "l2_ratio": 0.310546875, "relative_reconstruction_bias": 1.38125, "loss_original": 2.440642213821411, "loss_reconstructed": 13.474344730377197, "loss_zero": 12.452932643890382, "frac_recovered": -0.10197329260408879, "frac_alive": 0.9935438632965088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f51d35e9ba5e37bb9eea0de603f578beaed9f182 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c1d5efbdca1c8792ef7e862652b116607e43d98f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 207.0, "l1_loss": 265.2, "l0": 16.579166984558107, "frac_variance_explained": 0.265234375, "cossim": 0.701171875, "l2_ratio": 0.635546875, "relative_reconstruction_bias": 0.91484375, "loss_original": 2.440642213821411, "loss_reconstructed": 6.506928968429565, "loss_zero": 12.452932643890382, "frac_recovered": 0.5939813435077668, "frac_alive": 0.0509982630610466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d857dd7fa43c293f4e220da2fb51907a1cc1dc88 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e8bc0ac0b4138df870a408c42f05e93df090dd0a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 166.3, "l1_loss": 409.6, "l0": 33.14166774749756, "frac_variance_explained": 0.5109375, "cossim": 0.820703125, "l2_ratio": 0.75703125, "relative_reconstruction_bias": 0.92734375, "loss_original": 2.440642213821411, "loss_reconstructed": 3.8747305154800413, "loss_zero": 12.452932643890382, "frac_recovered": 0.8568949043750763, "frac_alive": 0.0560438372194767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2f882175eac54816e4d118a193e3c4b3a0444fbf --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..109c60f8d24a9ef5abee92045e52ecfd83f6b614 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 355.4, "l1_loss": 21427.2, "l0": 8079.012646484375, "frac_variance_explained": -0.765625, "cossim": 0.093310546875, "l2_ratio": 0.76015625, "relative_reconstruction_bias": 8.040625, "loss_original": 2.440642213821411, "loss_reconstructed": 11.328641510009765, "loss_zero": 12.452932643890382, "frac_recovered": 0.11245149970054627, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b58e1579df91ad56c5c6ebfa75fe473cad9bcf56 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a6d9fab7e12c40bac3e0b2c3e92cd65a39fb4269 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 253.3, "l1_loss": 465.8, "l0": 116.95000228881835, "frac_variance_explained": 0.110546875, "cossim": 0.56640625, "l2_ratio": 0.4189453125, "relative_reconstruction_bias": 0.761328125, "loss_original": 2.440642213821411, "loss_reconstructed": 9.601817798614501, "loss_zero": 12.452932643890382, "frac_recovered": 0.28501268923282624, "frac_alive": 0.6189778447151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9a4a26a4f0947f56d9c11f5838bd93d7ba79ec32 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..445c2b53e8280d10bf87ef871a9ebc62c57210b2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 190.2, "l1_loss": 321.6, "l0": 19.60833396911621, "frac_variance_explained": 0.374609375, "cossim": 0.761328125, "l2_ratio": 0.702734375, "relative_reconstruction_bias": 0.93046875, "loss_original": 2.440642213821411, "loss_reconstructed": 5.200517892837524, "loss_zero": 12.452932643890382, "frac_recovered": 0.7243833899497986, "frac_alive": 0.0176866315305233, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..284a769e4ee5f56bc60e1780588cff965695879c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..53ae69017b27705067cd9eb8707f5cf0271bfa03 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 462.8, "l1_loss": 27148.8, "l0": 9207.91279296875, "frac_variance_explained": -1.00546875, "cossim": 0.0137664794921875, "l2_ratio": 1.1484375, "relative_reconstruction_bias": 96.0, "loss_original": 2.440642213821411, "loss_reconstructed": 21.83314037322998, "loss_zero": 12.452932643890382, "frac_recovered": -0.9372808158397674, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ed48d42fd47048cb44c5666f9e4eb7efd864270d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..92c766f4cf74caada033ca862c1480df2d99b7c0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 300.0, "l1_loss": 4672.0, "l0": 2403.2584228515625, "frac_variance_explained": -0.040234375, "cossim": 0.21103515625, "l2_ratio": 0.3154296875, "relative_reconstruction_bias": 1.47265625, "loss_original": 2.440642213821411, "loss_reconstructed": 13.505611896514893, "loss_zero": 12.452932643890382, "frac_recovered": -0.10507980175316334, "frac_alive": 0.9969618320465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ee9fbefe68562b3255766c5383e520e40aa73fe0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..478f0ffd3c921aa5679b817890d46572a42e7898 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 215.6, "l1_loss": 233.1, "l0": 13.266666984558105, "frac_variance_explained": 0.1921875, "cossim": 0.67265625, "l2_ratio": 0.60390625, "relative_reconstruction_bias": 0.905859375, "loss_original": 2.440642213821411, "loss_reconstructed": 7.342954540252686, "loss_zero": 12.452932643890382, "frac_recovered": 0.510615000128746, "frac_alive": 0.0594075508415699, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2bd21ec97e1c1aad0bb48614f827d925154d190c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dd590129d42cf8605fb5255fdd8bb8f95d226427 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 189.1, "l1_loss": 314.8, "l0": 18.09166736602783, "frac_variance_explained": 0.400390625, "cossim": 0.765234375, "l2_ratio": 0.70234375, "relative_reconstruction_bias": 0.92578125, "loss_original": 2.440642213821411, "loss_reconstructed": 4.85347580909729, "loss_zero": 12.452932643890382, "frac_recovered": 0.759119176864624, "frac_alive": 0.0201280377805233, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..94429babfb593a52cb098df0c82a3982a2649a26 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..58455e39e5294dff1190d92fe6f765cfc3060fee --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 359.4, "l1_loss": 21632.0, "l0": 8069.337744140625, "frac_variance_explained": -0.76328125, "cossim": 0.09365234375, "l2_ratio": 0.7609375, "relative_reconstruction_bias": 8.084375, "loss_original": 2.440642213821411, "loss_reconstructed": 11.344709587097167, "loss_zero": 12.452932643890382, "frac_recovered": 0.11084590703248978, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ba372bf98fa0b0363c3f87eff1b9ed6370cdbe86 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ad0200b826697901dc6d6a7260c7c49b3f3550d6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 291.0, "l1_loss": 155.9, "l0": 107.54167022705079, "frac_variance_explained": 0.00546875, "cossim": 0.305859375, "l2_ratio": 0.2009765625, "relative_reconstruction_bias": 0.669140625, "loss_original": 2.440642213821411, "loss_reconstructed": 17.138036727905273, "loss_zero": 12.452932643890382, "frac_recovered": -0.46756131052970884, "frac_alive": 0.6649848222732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..aca3bf18d4c71a7bb987de5c2b606ac46b324e54 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 19, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_19", + "submodule_name": "resid_post_layer_19", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0b81daff69e68999c0f387ddcd1ccd5a6317b23f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_19_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 205.4, "l1_loss": 269.6, "l0": 12.545833587646484, "frac_variance_explained": 0.34140625, "cossim": 0.711328125, "l2_ratio": 0.649609375, "relative_reconstruction_bias": 0.9296875, "loss_original": 2.440642213821411, "loss_reconstructed": 6.169500207901001, "loss_zero": 12.452932643890382, "frac_recovered": 0.6276720285415649, "frac_alive": 0.014431423507630825, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1fda1c95e9fa5a16d2ee8cb4a561b73a77e99f48 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d92992bc0c1d8668cd99c24da53d8974294f57fe --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 125.7, "l1_loss": 7436.8, "l0": 9314.20029296875, "frac_variance_explained": -1.015625, "cossim": 0.0109375, "l2_ratio": 1.16015625, "relative_reconstruction_bias": 119.65, "loss_original": 2.440642213821411, "loss_reconstructed": 11.83780632019043, "loss_zero": 12.452932643890382, "frac_recovered": 0.06137360595166683, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..47cae5e8453c8b97241a985c8964076ae5162e52 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5f9acdd45713da28dd5e4674a6809b15b7ce3f5a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 74.35, "l1_loss": 1423.2, "l0": 3034.9625732421873, "frac_variance_explained": -0.0203125, "cossim": 0.425, "l2_ratio": 0.4044921875, "relative_reconstruction_bias": 1.062890625, "loss_original": 2.440642213821411, "loss_reconstructed": 8.541642379760741, "loss_zero": 12.452932643890382, "frac_recovered": 0.39075923562049864, "frac_alive": 0.9972330927848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..09f93a42363f1bb86f7ac5aecd3efa64d98f2114 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7107fbc05f69e0a816325df093f68a5f977f4870 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.375, "l1_loss": 277.6, "l0": 236.23333587646485, "frac_variance_explained": 0.68359375, "cossim": 0.89375, "l2_ratio": 0.841796875, "relative_reconstruction_bias": 0.93828125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.9637906551361084, "loss_zero": 12.452932643890382, "frac_recovered": 0.9477954745292664, "frac_alive": 0.2267252653837204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..13fe605cfd48a60a585af82dfb724c66f5f64be1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..922f32c659235e061b54233cbdf50518cf2bd23d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 24.9625, "l1_loss": 291.6, "l0": 355.1000091552734, "frac_variance_explained": 0.8484375, "cossim": 0.95078125, "l2_ratio": 0.926953125, "relative_reconstruction_bias": 0.973046875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5234706163406373, "loss_zero": 12.452932643890382, "frac_recovered": 0.9917698383331299, "frac_alive": 0.8058268427848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..64fc61cd1119b41b55ab80c98d8ac255b967e9cb --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9e67d257b7d23df51362902d95d61b815fbd0509 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 100.4, "l1_loss": 6348.8, "l0": 8365.72119140625, "frac_variance_explained": -0.7578125, "cossim": 0.117138671875, "l2_ratio": 0.78125, "relative_reconstruction_bias": 7.171875, "loss_original": 2.440642213821411, "loss_reconstructed": 9.779939556121827, "loss_zero": 12.452932643890382, "frac_recovered": 0.26706644892692566, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..40f56959dec32c97f65d8d61453262d313e95ff3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e85e981993887a1ad5b8a804269824e12daaac3b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 51.575, "l1_loss": 241.0, "l0": 266.0958419799805, "frac_variance_explained": 0.359765625, "cossim": 0.758203125, "l2_ratio": 0.688671875, "relative_reconstruction_bias": 0.908203125, "loss_original": 2.440642213821411, "loss_reconstructed": 6.005924892425537, "loss_zero": 12.452932643890382, "frac_recovered": 0.6439538896083832, "frac_alive": 0.7278103232383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a691abd64a438e410a5abaeced51a405b9723f6c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..589c6161d0f91a9a1482b2de3faeeb87e8b5e28f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 28.275, "l1_loss": 311.6, "l0": 347.51250915527345, "frac_variance_explained": 0.800390625, "cossim": 0.93359375, "l2_ratio": 0.900390625, "relative_reconstruction_bias": 0.959765625, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5743848085403442, "loss_zero": 12.452932643890382, "frac_recovered": 0.9866903483867645, "frac_alive": 0.42919921875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bef9dbba51e67832715521ad58a228a440b0a2be --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4be1dac0e04995c18ee14abea3c30cb56aee16fe --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 126.65, "l1_loss": 7497.6, "l0": 9318.3044921875, "frac_variance_explained": -1.0125, "cossim": 0.012921142578125, "l2_ratio": 1.16015625, "relative_reconstruction_bias": 100.1, "loss_original": 2.440642213821411, "loss_reconstructed": 11.83780632019043, "loss_zero": 12.452932643890382, "frac_recovered": 0.06137360595166683, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c609f216fbbb0d68ea4f119a1f13160f4fa82e27 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..046bb51c41275b3f1b81493b3561fcfc6bd95085 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 78.45, "l1_loss": 1267.2, "l0": 2796.7500732421877, "frac_variance_explained": -0.04296875, "cossim": 0.3470703125, "l2_ratio": 0.3572265625, "relative_reconstruction_bias": 1.16328125, "loss_original": 2.440642213821411, "loss_reconstructed": 9.005312633514404, "loss_zero": 12.452932643890382, "frac_recovered": 0.3444338977336884, "frac_alive": 0.9959309697151184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..66776ef1621740a4064e1f69d95f0c32d1dc63aa --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b5a0bc552c567d7fecf67f41eaccaebfcb7293e3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 40.35, "l1_loss": 175.7, "l0": 88.10416946411132, "frac_variance_explained": 0.61796875, "cossim": 0.86015625, "l2_ratio": 0.808203125, "relative_reconstruction_bias": 0.94453125, "loss_original": 2.440642213821411, "loss_reconstructed": 3.556185221672058, "loss_zero": 12.452932643890382, "frac_recovered": 0.88861163854599, "frac_alive": 0.1337348073720932, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5579f94de4587f0878f894907a46f4600e19fa3d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..552136380e0e0d6eb3a9d5f69a2c62bc6214a608 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 27.8, "l1_loss": 190.6, "l0": 132.23333816528321, "frac_variance_explained": 0.810546875, "cossim": 0.9375, "l2_ratio": 0.91015625, "relative_reconstruction_bias": 0.97109375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.590347933769226, "loss_zero": 12.452932643890382, "frac_recovered": 0.9851083040237427, "frac_alive": 0.4578993022441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..31b8f9995fea615363784989e26fc6ccff346c28 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..42e4ae105498d90e84e54149a52b0870ffd22716 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 100.5, "l1_loss": 6284.8, "l0": 8338.02119140625, "frac_variance_explained": -0.7625, "cossim": 0.1099609375, "l2_ratio": 0.781640625, "relative_reconstruction_bias": 7.709375, "loss_original": 2.440642213821411, "loss_reconstructed": 9.845651054382325, "loss_zero": 12.452932643890382, "frac_recovered": 0.2604978531599045, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..eeb990d88d277a3dee641de4098b6090cb4bc2b5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6dec9aa0d0954ccfa7c60b9db715a9da031bfa18 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 58.8, "l1_loss": 164.2, "l0": 156.95000457763672, "frac_variance_explained": 0.22265625, "cossim": 0.691796875, "l2_ratio": 0.60859375, "relative_reconstruction_bias": 0.879296875, "loss_original": 2.440642213821411, "loss_reconstructed": 7.364314270019531, "loss_zero": 12.452932643890382, "frac_recovered": 0.5083250373601913, "frac_alive": 0.6274956464767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1b5982c3a75b4fed106cd63e2f7eef025ed0adf4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9a4e291ccb9b32f562e4de023411811f8e802b11 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.5625, "l1_loss": 197.7, "l0": 124.70417098999023, "frac_variance_explained": 0.69453125, "cossim": 0.89375, "l2_ratio": 0.848828125, "relative_reconstruction_bias": 0.948828125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.741474914550781, "loss_zero": 12.452932643890382, "frac_recovered": 0.9700230419635772, "frac_alive": 0.2506510317325592, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4f9e30658b26d79ef097b1e4e05697cb6103f878 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..531012ea16f055cbbd12d9d25d0fed17a2fcdbd3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 125.5, "l1_loss": 7424.0, "l0": 9313.92958984375, "frac_variance_explained": -1.0140625, "cossim": 0.0107269287109375, "l2_ratio": 1.16015625, "relative_reconstruction_bias": 147.15, "loss_original": 2.440642213821411, "loss_reconstructed": 11.83780632019043, "loss_zero": 12.452932643890382, "frac_recovered": 0.06137360595166683, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..44c95788812fc1291ec478c533718bdf23134ddd --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a57a157fbb1635dffc8b85934d4b0d545d816e91 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 77.75, "l1_loss": 1216.0, "l0": 2727.2584228515625, "frac_variance_explained": -0.05078125, "cossim": 0.312890625, "l2_ratio": 0.35234375, "relative_reconstruction_bias": 1.2609375, "loss_original": 2.440642213821411, "loss_reconstructed": 9.135836124420166, "loss_zero": 12.452932643890382, "frac_recovered": 0.3314054012298584, "frac_alive": 0.99755859375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..675863cf51756b71bb7f70a69006c012cfeac4b1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..04ecf262865495a3c303c3dc6d962032e8b18b10 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 44.2, "l1_loss": 145.3, "l0": 62.200001907348636, "frac_variance_explained": 0.524609375, "cossim": 0.83125, "l2_ratio": 0.778515625, "relative_reconstruction_bias": 0.9359375, "loss_original": 2.440642213821411, "loss_reconstructed": 3.927385950088501, "loss_zero": 12.452932643890382, "frac_recovered": 0.8515277445316315, "frac_alive": 0.103895403444767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e77ceb78b10926db77772e6f9dbf9b469f738d9c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e94c9dd24879cc2f14257fa533899e051b5117f7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.5875, "l1_loss": 166.8, "l0": 92.54166946411132, "frac_variance_explained": 0.783203125, "cossim": 0.917578125, "l2_ratio": 0.880078125, "relative_reconstruction_bias": 0.9609375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.635545754432678, "loss_zero": 12.452932643890382, "frac_recovered": 0.9805962979793549, "frac_alive": 0.3427191972732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d10977117655359c5ff6a78d57569433da385f2d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a41d3d5fb7a319c54d745f7b3c985fb60659a17c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 99.35, "l1_loss": 6192.0, "l0": 8333.63779296875, "frac_variance_explained": -0.76484375, "cossim": 0.10498046875, "l2_ratio": 0.7796875, "relative_reconstruction_bias": 8.059375, "loss_original": 2.440642213821411, "loss_reconstructed": 9.861291313171387, "loss_zero": 12.452932643890382, "frac_recovered": 0.2589359924197197, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..81fdc8b62c8ea6d934df83a4c056793db7551d33 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bf8fbbf1c11ae2666f8ad5659d19d173f94f6079 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 59.45, "l1_loss": 205.6, "l0": 163.7791732788086, "frac_variance_explained": 0.327734375, "cossim": 0.675390625, "l2_ratio": 0.589453125, "relative_reconstruction_bias": 0.89609375, "loss_original": 2.440642213821411, "loss_reconstructed": 7.893911695480346, "loss_zero": 12.452932643890382, "frac_recovered": 0.4554441511631012, "frac_alive": 0.6577690839767456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..58c4c4e1ef15f7d61bf4666cd5dd535a0190198f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..cccbc7959ee242fa21d9617a70270b78000638a1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.2, "l1_loss": 173.3, "l0": 87.1208366394043, "frac_variance_explained": 0.666015625, "cossim": 0.886328125, "l2_ratio": 0.842578125, "relative_reconstruction_bias": 0.94921875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.8570242881774903, "loss_zero": 12.452932643890382, "frac_recovered": 0.9584941565990448, "frac_alive": 0.1792534738779068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3209bda417e2cd1daeb54b1a0d17590f5005f8b0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b6d3eecd55a3a544801803a63b6a599fa4375496 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 127.8, "l1_loss": 7564.8, "l0": 9314.02529296875, "frac_variance_explained": -1.0171875, "cossim": 0.01275634765625, "l2_ratio": 1.16015625, "relative_reconstruction_bias": 122.45, "loss_original": 2.440642213821411, "loss_reconstructed": 11.83780632019043, "loss_zero": 12.452932643890382, "frac_recovered": 0.06137360595166683, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d720d7d3b4e8b87850d6fe2f82f6393b2097254f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7d6c1d8095e2a3a127d362acf9fb2b7dbea2fa25 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 79.85, "l1_loss": 1197.6, "l0": 2669.2417236328124, "frac_variance_explained": -0.05625, "cossim": 0.2978515625, "l2_ratio": 0.3412109375, "relative_reconstruction_bias": 1.2796875, "loss_original": 2.440642213821411, "loss_reconstructed": 9.308492755889892, "loss_zero": 12.452932643890382, "frac_recovered": 0.31416334211826324, "frac_alive": 0.9962565302848816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..485532c92f122f590c1fb1f04908a6bbc5a4798f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..beb0297f6a897be2de3bccd6869a53efd3ded54b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 48.3, "l1_loss": 110.1, "l0": 37.441668319702146, "frac_variance_explained": 0.44140625, "cossim": 0.797265625, "l2_ratio": 0.7421875, "relative_reconstruction_bias": 0.930859375, "loss_original": 2.440642213821411, "loss_reconstructed": 4.812506294250488, "loss_zero": 12.452932643890382, "frac_recovered": 0.7631361186504364, "frac_alive": 0.0867513045668602, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..066a8c1909ffb2fe82118cf42067e6caced7930d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f41156fd9402d389de8f7474f89948ffb855a070 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 35.425, "l1_loss": 137.65, "l0": 55.01666831970215, "frac_variance_explained": 0.6875, "cossim": 0.894921875, "l2_ratio": 0.85546875, "relative_reconstruction_bias": 0.95546875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.7959484815597535, "loss_zero": 12.452932643890382, "frac_recovered": 0.964591920375824, "frac_alive": 0.1822916716337204, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4f016b8abe30d722f4bd72332926f40969f70d5c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e801ef6acded6d4a4a047bd7f80ff10851c05436 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 101.35, "l1_loss": 6284.8, "l0": 8309.35869140625, "frac_variance_explained": -0.7703125, "cossim": 0.10263671875, "l2_ratio": 0.784375, "relative_reconstruction_bias": 8.259375, "loss_original": 2.440642213821411, "loss_reconstructed": 9.877824306488037, "loss_zero": 12.452932643890382, "frac_recovered": 0.25728367567062377, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..38cfe16afef257a06c0e3d562ebeaf5b6b9ff802 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f4cda6ecf6b2ef02e238aa2baf6544ded8241af8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 65.075, "l1_loss": 116.4, "l0": 120.4708381652832, "frac_variance_explained": 0.123828125, "cossim": 0.6140625, "l2_ratio": 0.51796875, "relative_reconstruction_bias": 0.84765625, "loss_original": 2.440642213821411, "loss_reconstructed": 9.09445219039917, "loss_zero": 12.452932643890382, "frac_recovered": 0.3355876266956329, "frac_alive": 0.6110568642616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..105852e64392c6d8c8186dce81bb2796fb1b3836 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..960d967f373f9c5bf0cb7348208a5be86db9b698 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 40.875, "l1_loss": 129.15, "l0": 43.89166793823242, "frac_variance_explained": 0.599609375, "cossim": 0.860546875, "l2_ratio": 0.811328125, "relative_reconstruction_bias": 0.94140625, "loss_original": 2.440642213821411, "loss_reconstructed": 3.269835376739502, "loss_zero": 12.452932643890382, "frac_recovered": 0.9172515153884888, "frac_alive": 0.0827907994389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7a4f6b0cad6243e73995efe586782a4697dda0c9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5ca3a55a507470699c6f04d59f046f475ea47352 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 126.5, "l1_loss": 7500.8, "l0": 9319.98779296875, "frac_variance_explained": -1.0171875, "cossim": 0.011639404296875, "l2_ratio": 1.15859375, "relative_reconstruction_bias": 133.8, "loss_original": 2.440642213821411, "loss_reconstructed": 11.83780632019043, "loss_zero": 12.452932643890382, "frac_recovered": 0.06137360595166683, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..632818fedbb4d2bb5a1bec31333c05d8d9328f6b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ed9221afa7777a57f114f7b4d6313cfc7e23eca4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 81.25, "l1_loss": 1140.0, "l0": 2571.616748046875, "frac_variance_explained": -0.06171875, "cossim": 0.27763671875, "l2_ratio": 0.3271484375, "relative_reconstruction_bias": 1.29296875, "loss_original": 2.440642213821411, "loss_reconstructed": 9.41802282333374, "loss_zero": 12.452932643890382, "frac_recovered": 0.3032310396432877, "frac_alive": 0.9944118857383728, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..83b5d165c0b749acbe284430b37f841136a2b1a8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0a0838765d8064e0f8846cea15cdaa37b75d6de3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 55.0, "l1_loss": 81.8, "l0": 19.595834159851073, "frac_variance_explained": 0.32421875, "cossim": 0.73984375, "l2_ratio": 0.6765625, "relative_reconstruction_bias": 0.913671875, "loss_original": 2.440642213821411, "loss_reconstructed": 6.068083238601685, "loss_zero": 12.452932643890382, "frac_recovered": 0.6377541959285736, "frac_alive": 0.0453559048473835, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d70e2a60123477436df2bcaac823392caa0d9809 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b9c978df5b6dd842d9ddcc71fcb960e5e2a9e9a0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 39.5, "l1_loss": 111.95, "l0": 31.587500762939452, "frac_variance_explained": 0.622265625, "cossim": 0.869140625, "l2_ratio": 0.8140625, "relative_reconstruction_bias": 0.934765625, "loss_original": 2.440642213821411, "loss_reconstructed": 3.200065755844116, "loss_zero": 12.452932643890382, "frac_recovered": 0.9242486178874969, "frac_alive": 0.0857204869389534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e7b647a59c55caf1ffedbeeacb20bbf61d95ef21 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d41241f155a62f4fa279428f14e6e6272b09a531 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 98.55, "l1_loss": 6134.4, "l0": 8311.712890625, "frac_variance_explained": -0.76953125, "cossim": 0.104541015625, "l2_ratio": 0.779296875, "relative_reconstruction_bias": 8.1625, "loss_original": 2.440642213821411, "loss_reconstructed": 9.888659572601318, "loss_zero": 12.452932643890382, "frac_recovered": 0.25620109885931014, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f04b3ae7eb512773a6c8c50000755bf51b814a87 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..116cdfc2aa1c5a31114afc1018720fb339c8e19a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 68.6, "l1_loss": 82.75, "l0": 96.02500228881836, "frac_variance_explained": 0.0515625, "cossim": 0.544921875, "l2_ratio": 0.427734375, "relative_reconstruction_bias": 0.780078125, "loss_original": 2.440642213821411, "loss_reconstructed": 10.85888614654541, "loss_zero": 12.452932643890382, "frac_recovered": 0.15934529900550842, "frac_alive": 0.5725911259651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8e6939a3fd3a2cde20ab98b3aebb41f39671fceb --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..915fc25168ea87477d3c6b1c8af0d233baf548a1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 47.825, "l1_loss": 101.25, "l0": 24.93750057220459, "frac_variance_explained": 0.47265625, "cossim": 0.80390625, "l2_ratio": 0.744921875, "relative_reconstruction_bias": 0.927734375, "loss_original": 2.440642213821411, "loss_reconstructed": 4.213635277748108, "loss_zero": 12.452932643890382, "frac_recovered": 0.8229243338108063, "frac_alive": 0.0367838554084301, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3ff6ac8d8a10e95e77ad1dc9a65415c3eb92c4f2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6ffd699012330c86417414602461cee3f630db14 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 129.15, "l1_loss": 7664.0, "l0": 9313.76279296875, "frac_variance_explained": -1.0265625, "cossim": 0.012237548828125, "l2_ratio": 1.1609375, "relative_reconstruction_bias": 115.35, "loss_original": 2.440642213821411, "loss_reconstructed": 11.83780632019043, "loss_zero": 12.452932643890382, "frac_recovered": 0.06137360595166683, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9997200c2d586e5450d8406518aa1ce34b24d139 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..92cf600437b16af2b459bbf442e4885247323348 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 78.75, "l1_loss": 1153.2, "l0": 2625.962548828125, "frac_variance_explained": -0.059375, "cossim": 0.276171875, "l2_ratio": 0.3359375, "relative_reconstruction_bias": 1.346875, "loss_original": 2.440642213821411, "loss_reconstructed": 9.482363605499268, "loss_zero": 12.452932643890382, "frac_recovered": 0.2968075037002563, "frac_alive": 0.9965277910232544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3d8e9b8c7edd842305d9cc85a468be850546dd9e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c14dda20acd7611604db617936e093dd7dbef03b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 59.25, "l1_loss": 68.225, "l0": 12.900000381469727, "frac_variance_explained": 0.28828125, "cossim": 0.687890625, "l2_ratio": 0.614453125, "relative_reconstruction_bias": 0.906640625, "loss_original": 2.440642213821411, "loss_reconstructed": 7.193751859664917, "loss_zero": 12.452932643890382, "frac_recovered": 0.525319117307663, "frac_alive": 0.0420464426279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8f0d37a15e562915c5ddd368c87387082b9f6481 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..774b878ea0a5c3c1d5f3474ac3a7246a528d957c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 47.15, "l1_loss": 88.15, "l0": 19.012500953674316, "frac_variance_explained": 0.474609375, "cossim": 0.80625, "l2_ratio": 0.746484375, "relative_reconstruction_bias": 0.92734375, "loss_original": 2.440642213821411, "loss_reconstructed": 4.166976833343506, "loss_zero": 12.452932643890382, "frac_recovered": 0.8276787519454956, "frac_alive": 0.0343967005610466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ec497ce20e42c4e6fddbfba7769ea35db70e15cc --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1fe2f88207cb753eb9a4412994756c05794df651 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 101.55, "l1_loss": 6374.4, "l0": 8310.27958984375, "frac_variance_explained": -0.7375, "cossim": 0.102978515625, "l2_ratio": 0.776953125, "relative_reconstruction_bias": 8.00625, "loss_original": 2.440642213821411, "loss_reconstructed": 9.89528980255127, "loss_zero": 12.452932643890382, "frac_recovered": 0.2555403396487236, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..082644f72000401538134339729a991128de42a9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..54068f2546ef0fd776cb0530237ecf64a3097809 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 73.5, "l1_loss": 52.2, "l0": 84.69166946411133, "frac_variance_explained": 0.021875, "cossim": 0.4447265625, "l2_ratio": 0.2865234375, "relative_reconstruction_bias": 0.670703125, "loss_original": 2.440642213821411, "loss_reconstructed": 10.837434673309327, "loss_zero": 12.452932643890382, "frac_recovered": 0.16138963997364045, "frac_alive": 0.5611979365348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..793051521668ed8a33b74a5bad9714993f595664 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 3, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_3", + "submodule_name": "resid_post_layer_3", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4c50842fc4fdd611862b0e18c231542a385d94ae --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_3_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 51.85, "l1_loss": 76.65, "l0": 13.975000286102295, "frac_variance_explained": 0.3515625, "cossim": 0.76015625, "l2_ratio": 0.6984375, "relative_reconstruction_bias": 0.92109375, "loss_original": 2.440642213821411, "loss_reconstructed": 5.676401519775391, "loss_zero": 12.452932643890382, "frac_recovered": 0.6768557488918304, "frac_alive": 0.01513671875, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3e5e28413a12761c13c147440ea9e35f68e68b56 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..84213d46f140f5295ad8856f18de0596516ed630 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 156.2, "l1_loss": 9100.8, "l0": 9204.91708984375, "frac_variance_explained": -1.0046875, "cossim": 0.009985601902008057, "l2_ratio": 1.15546875, "relative_reconstruction_bias": -817.8, "loss_original": 2.440642213821411, "loss_reconstructed": 14.571032238006591, "loss_zero": 12.452932643890382, "frac_recovered": -0.21182486265897751, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..488526b8397f6caf4ee66eed416d3bad63409cf9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..84842483e6266a885d641862f79951bd62a616aa --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 96.5, "l1_loss": 2459.2, "l0": 3289.9584228515623, "frac_variance_explained": 0.05859375, "cossim": 0.4255859375, "l2_ratio": 0.4216796875, "relative_reconstruction_bias": 0.98984375, "loss_original": 2.440642213821411, "loss_reconstructed": 8.404415035247803, "loss_zero": 12.452932643890382, "frac_recovered": 0.4044595301151276, "frac_alive": 0.9961479902267456, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3dbf197e6519e241045b00d1c90b2fbe5171cd81 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bb73bf4b33cc77ca58ddaa584af635561a6905a2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 48.025, "l1_loss": 379.8, "l0": 254.94584045410156, "frac_variance_explained": 0.660546875, "cossim": 0.88671875, "l2_ratio": 0.83359375, "relative_reconstruction_bias": 0.940234375, "loss_original": 2.440642213821411, "loss_reconstructed": 3.0283466815948485, "loss_zero": 12.452932643890382, "frac_recovered": 0.9413172245025635, "frac_alive": 0.1541341096162796, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3b8c2a33aaa4354107822e30b00d04a9007fda29 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0c99470671b48dfb2fa9d4f2e56596b5d9627a36 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 31.7875, "l1_loss": 436.4, "l0": 480.0208465576172, "frac_variance_explained": 0.852734375, "cossim": 0.951953125, "l2_ratio": 0.915234375, "relative_reconstruction_bias": 0.96328125, "loss_original": 2.440642213821411, "loss_reconstructed": 2.500810742378235, "loss_zero": 12.452932643890382, "frac_recovered": 0.9940264821052551, "frac_alive": 0.6815863847732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..293f0f94d516a571cde51bf4c29b71db1d1b404b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..3344e73a6263edf0ac88836320321b0290d4bbf0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 126.3, "l1_loss": 7971.2, "l0": 8312.05859375, "frac_variance_explained": -0.75859375, "cossim": 0.10849609375, "l2_ratio": 0.763671875, "relative_reconstruction_bias": 7.278125, "loss_original": 2.440642213821411, "loss_reconstructed": 9.854583644866944, "loss_zero": 12.452932643890382, "frac_recovered": 0.25967336595058443, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..49c0c5b96c2bef8533d0adf7c1033152038c4aaa --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7ec7bac08308a5db1c242ded4289c6bf1f9d5512 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 65.225, "l1_loss": 397.8, "l0": 295.57500915527345, "frac_variance_explained": 0.43515625, "cossim": 0.769921875, "l2_ratio": 0.696875, "relative_reconstruction_bias": 0.915234375, "loss_original": 2.440642213821411, "loss_reconstructed": 5.345195007324219, "loss_zero": 12.452932643890382, "frac_recovered": 0.7099828541278839, "frac_alive": 0.7333984375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..057536e32aa93dd5df6f09ea86876a5b97f74456 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.025, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..aaa578ff2318087b9212fda6297c52d025371dd4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_0_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 37.275, "l1_loss": 442.8, "l0": 412.65000915527344, "frac_variance_explained": 0.794921875, "cossim": 0.93046875, "l2_ratio": 0.890625, "relative_reconstruction_bias": 0.957421875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5505842685699465, "loss_zero": 12.452932643890382, "frac_recovered": 0.9890749871730804, "frac_alive": 0.2987196147441864, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..380c0f024f3260b12b19cf36526d405eb828f8c1 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..b0147993c55fd6256a50b911cc65e4529c5130ff --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 166.6, "l1_loss": 9728.0, "l0": 9209.291796875, "frac_variance_explained": -1.0359375, "cossim": 0.0111297607421875, "l2_ratio": 1.15546875, "relative_reconstruction_bias": 126.65, "loss_original": 2.440642213821411, "loss_reconstructed": 14.571032238006591, "loss_zero": 12.452932643890382, "frac_recovered": -0.21182486265897751, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..22c528a97be5fe7a1cf3404cabdbc3d78363f689 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0170e0840a04e67d10b996ef1bb6b0b89a50396d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 99.65, "l1_loss": 2208.8, "l0": 3060.0959228515626, "frac_variance_explained": 0.019921875, "cossim": 0.3361328125, "l2_ratio": 0.371875, "relative_reconstruction_bias": 1.13671875, "loss_original": 2.440642213821411, "loss_reconstructed": 8.953547191619872, "loss_zero": 12.452932643890382, "frac_recovered": 0.3495545297861099, "frac_alive": 0.9943576455116272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..61056f3c176f8cafcf873a49cbdb1d75c2141436 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..dd97bfc6cf9940a93ed30fb492ea3de96853e599 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 55.525, "l1_loss": 228.8, "l0": 92.31250228881837, "frac_variance_explained": 0.5546875, "cossim": 0.842578125, "l2_ratio": 0.791796875, "relative_reconstruction_bias": 0.941015625, "loss_original": 2.440642213821411, "loss_reconstructed": 3.7567901372909547, "loss_zero": 12.452932643890382, "frac_recovered": 0.8685382425785064, "frac_alive": 0.099989153444767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9900a57ba385df1d6153e587d218dc8dc7f11ce9 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1f56c26a56dd7ae95ba80855f8da92232c108d9f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 38.125, "l1_loss": 292.4, "l0": 180.3541732788086, "frac_variance_explained": 0.79296875, "cossim": 0.928515625, "l2_ratio": 0.891796875, "relative_reconstruction_bias": 0.96484375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.5693552017211916, "loss_zero": 12.452932643890382, "frac_recovered": 0.987200129032135, "frac_alive": 0.3882921040058136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..05356552fd80fe33fdbea60b4484fe419b6a3b3a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f61a1e4b7af5136de0fad278f4eaef537ae38564 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 126.95, "l1_loss": 7913.6, "l0": 8299.075390625, "frac_variance_explained": -0.759375, "cossim": 0.096142578125, "l2_ratio": 0.766796875, "relative_reconstruction_bias": 8.090625, "loss_original": 2.440642213821411, "loss_reconstructed": 9.946970176696777, "loss_zero": 12.452932643890382, "frac_recovered": 0.25044662654399874, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..af584a6ff110764398370924417d188725299cc3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..7343f098eea7485bdbf946a36f2bf938551b7b6f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 73.05, "l1_loss": 233.5, "l0": 181.07917022705078, "frac_variance_explained": 0.26015625, "cossim": 0.707421875, "l2_ratio": 0.620703125, "relative_reconstruction_bias": 0.880859375, "loss_original": 2.440642213821411, "loss_reconstructed": 6.592868709564209, "loss_zero": 12.452932643890382, "frac_recovered": 0.5854110896587372, "frac_alive": 0.653917133808136, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ffcbb8fdd2e95a5238b024f9166916b4473d113e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.035, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..93c1bf1ffdea001c2821935b646ea463c00cda0c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_1_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 46.25, "l1_loss": 269.4, "l0": 138.33333740234374, "frac_variance_explained": 0.685546875, "cossim": 0.891796875, "l2_ratio": 0.840234375, "relative_reconstruction_bias": 0.94296875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.7839150190353394, "loss_zero": 12.452932643890382, "frac_recovered": 0.9657946586608886, "frac_alive": 0.14453125, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7c075ebd4743c2f4010ae8643ee24f82c62a26a2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6f1b2a7a9a4d03232b81c81b4032b13f8ad5ad19 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 157.7, "l1_loss": 9177.6, "l0": 9206.95458984375, "frac_variance_explained": -1.0140625, "cossim": 0.00892791748046875, "l2_ratio": 1.1546875, "relative_reconstruction_bias": 278.1, "loss_original": 2.440642213821411, "loss_reconstructed": 14.571032238006591, "loss_zero": 12.452932643890382, "frac_recovered": -0.21182486265897751, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8e60c055227f8bae38af1b67c90a5d6ca3b964e4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e59ddcbca1990cf0eead1de0fbd23b519b135bb8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 99.2, "l1_loss": 1861.6, "l0": 3093.6417724609373, "frac_variance_explained": -0.06953125, "cossim": 0.3095703125, "l2_ratio": 0.3720703125, "relative_reconstruction_bias": 1.27578125, "loss_original": 2.440642213821411, "loss_reconstructed": 9.071795177459716, "loss_zero": 12.452932643890382, "frac_recovered": 0.3377668857574463, "frac_alive": 0.9960395097732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..78061fdff19f1a469fed9959fa50216c7bcb0a13 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..937d976033990f450d64ef4368b0169e9100b11b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 60.15, "l1_loss": 184.8, "l0": 60.16666831970215, "frac_variance_explained": 0.49140625, "cossim": 0.81484375, "l2_ratio": 0.758203125, "relative_reconstruction_bias": 0.933984375, "loss_original": 2.440642213821411, "loss_reconstructed": 4.2475543975830075, "loss_zero": 12.452932643890382, "frac_recovered": 0.8195143282413483, "frac_alive": 0.0748697891831398, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8c0d32079e8bcf181523b7b3b072dd3e4fe4e6e6 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bda72868b38e398d527178d5a11f4e6e44ec51e7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 42.6, "l1_loss": 255.2, "l0": 123.80416946411133, "frac_variance_explained": 0.74453125, "cossim": 0.910546875, "l2_ratio": 0.865625, "relative_reconstruction_bias": 0.95234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.6330320835113525, "loss_zero": 12.452932643890382, "frac_recovered": 0.9808517336845398, "frac_alive": 0.2724066972732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3bb552e996f88d97b3f8487ded1f2cdf130984e8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6a10acf188a369053e9a8b0bf4a06f16e4db62b4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 127.0, "l1_loss": 7939.2, "l0": 8307.60029296875, "frac_variance_explained": -0.7765625, "cossim": 0.097900390625, "l2_ratio": 0.767578125, "relative_reconstruction_bias": 8.23125, "loss_original": 2.440642213821411, "loss_reconstructed": 9.970696353912354, "loss_zero": 12.452932643890382, "frac_recovered": 0.24807583093643187, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f4575e2d691b305559bceff170f50300e4184016 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..34586eec6314913b23432072e6821ca63a6f73cd --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 74.5, "l1_loss": 217.1, "l0": 177.02916870117187, "frac_variance_explained": 0.218359375, "cossim": 0.69765625, "l2_ratio": 0.60625, "relative_reconstruction_bias": 0.872265625, "loss_original": 2.440642213821411, "loss_reconstructed": 7.28716459274292, "loss_zero": 12.452932643890382, "frac_recovered": 0.5160931020975112, "frac_alive": 0.6647678017616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f44b15e462c128e33295398d024f7075631a45da --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.04, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..de05f36a9b7e8d16713a713289beea9250afef46 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_2_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 49.65, "l1_loss": 235.6, "l0": 95.78333587646485, "frac_variance_explained": 0.653125, "cossim": 0.87890625, "l2_ratio": 0.8265625, "relative_reconstruction_bias": 0.94296875, "loss_original": 2.440642213821411, "loss_reconstructed": 2.977662134170532, "loss_zero": 12.452932643890382, "frac_recovered": 0.9464470505714416, "frac_alive": 0.1045464426279068, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7ceb6fb59f68decc4810aab9b37e35d8f77fcc66 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bdb3a7523b6b3d66d0568437ae2bf5408ec00adb --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 159.1, "l1_loss": 9286.4, "l0": 9211.675390625, "frac_variance_explained": -1.00859375, "cossim": 0.010518646240234375, "l2_ratio": 1.15546875, "relative_reconstruction_bias": 39.65, "loss_original": 2.440642213821411, "loss_reconstructed": 14.571032238006591, "loss_zero": 12.452932643890382, "frac_recovered": -0.21182486265897751, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c589957f9e5e1fc5e3be935b5e152205aa2b3942 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..45b510c0ae3a9753e5c49a40cc7c6badaf096494 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 101.55, "l1_loss": 1732.8, "l0": 2908.891748046875, "frac_variance_explained": -0.06796875, "cossim": 0.2841796875, "l2_ratio": 0.3509765625, "relative_reconstruction_bias": 1.30859375, "loss_original": 2.440642213821411, "loss_reconstructed": 9.223418617248536, "loss_zero": 12.452932643890382, "frac_recovered": 0.3226252317428589, "frac_alive": 0.9947916865348816, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6197470dd9f1da09d317d11719317cb61fc62106 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9ad4a8f000550397b3e4d87a442400bc7185a81d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 64.8, "l1_loss": 130.85, "l0": 31.262500953674316, "frac_variance_explained": 0.38984375, "cossim": 0.775390625, "l2_ratio": 0.713671875, "relative_reconstruction_bias": 0.921484375, "loss_original": 2.440642213821411, "loss_reconstructed": 5.239665079116821, "loss_zero": 12.452932643890382, "frac_recovered": 0.7204530894756317, "frac_alive": 0.0592990443110466, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd961dacbb2cd0026d3b23d5971910b11af792c0 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..f87bb7b9db5e8581e1ad0fff4c24bbf1ef79b67b --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 50.35, "l1_loss": 202.0, "l0": 67.2666690826416, "frac_variance_explained": 0.671875, "cossim": 0.875, "l2_ratio": 0.81640625, "relative_reconstruction_bias": 0.940234375, "loss_original": 2.440642213821411, "loss_reconstructed": 2.9362952709198, "loss_zero": 12.452932643890382, "frac_recovered": 0.9505707383155823, "frac_alive": 0.1246744766831398, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..65d8ca9054ed506ee85e2ca630d5b99dc04822de --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..443a27d6df80bc1570061ee786aff30e2a5f5d22 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 125.75, "l1_loss": 7833.6, "l0": 8293.233642578125, "frac_variance_explained": -0.76875, "cossim": 0.094921875, "l2_ratio": 0.768359375, "relative_reconstruction_bias": 8.31875, "loss_original": 2.440642213821411, "loss_reconstructed": 9.998102855682372, "loss_zero": 12.452932643890382, "frac_recovered": 0.245339997112751, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a03510d040c5f7a263154b18c831a56c553498 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..af6d3add4fd278891a689dea235c4b9591d1d8f2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 83.65, "l1_loss": 156.1, "l0": 130.44166946411133, "frac_variance_explained": 0.10234375, "cossim": 0.6140625, "l2_ratio": 0.511328125, "relative_reconstruction_bias": 0.83515625, "loss_original": 2.440642213821411, "loss_reconstructed": 8.593794536590575, "loss_zero": 12.452932643890382, "frac_recovered": 0.38564318120479585, "frac_alive": 0.6356879472732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..0770bd9c047db3340924b681cd31dcbff1bcb4f7 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.05, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1281e0e900b7e804045871a19f02bab6de02d09c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_3_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 56.275, "l1_loss": 177.5, "l0": 46.90416831970215, "frac_variance_explained": 0.623046875, "cossim": 0.8375, "l2_ratio": 0.78125, "relative_reconstruction_bias": 0.944140625, "loss_original": 2.440642213821411, "loss_reconstructed": 3.767059564590454, "loss_zero": 12.452932643890382, "frac_recovered": 0.8675203025341034, "frac_alive": 0.0457899309694767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..04f74656b9a88b60908e1d0e0bf559a235414d96 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..0845fd7b19f811cd8cb0ab51b98def405d0380a8 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 156.7, "l1_loss": 9132.8, "l0": 9200.85439453125, "frac_variance_explained": -1.00859375, "cossim": 0.01195831298828125, "l2_ratio": 1.1546875, "relative_reconstruction_bias": 116.4, "loss_original": 2.440642213821411, "loss_reconstructed": 14.571032238006591, "loss_zero": 12.452932643890382, "frac_recovered": -0.21182486265897751, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..006c49916c9a3d2612d8bfb46750924542e8bdd2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d8438f038d27fdc60d3f765aeba8664e0d1ba8b3 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 103.95, "l1_loss": 1843.2, "l0": 3018.029248046875, "frac_variance_explained": -0.090625, "cossim": 0.2638671875, "l2_ratio": 0.3619140625, "relative_reconstruction_bias": 1.475, "loss_original": 2.440642213821411, "loss_reconstructed": 9.312376308441163, "loss_zero": 12.452932643890382, "frac_recovered": 0.3137417733669281, "frac_alive": 0.9969618320465088, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..491613a96eaa5afdf233b1c42a90143a6623e47e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6c6b75911c81784657ba9cdf378b26b9999206cc --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 71.0, "l1_loss": 105.7, "l0": 18.870833778381346, "frac_variance_explained": 0.3015625, "cossim": 0.730078125, "l2_ratio": 0.6671875, "relative_reconstruction_bias": 0.92265625, "loss_original": 2.440642213821411, "loss_reconstructed": 6.353987121582032, "loss_zero": 12.452932643890382, "frac_recovered": 0.6091614365577698, "frac_alive": 0.05322265625, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7b2c301326ff60c22d5745fd28bd60295c32f85c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..392230e258dfe5e53151311a8cef84cc7c7e986f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 56.475, "l1_loss": 152.2, "l0": 37.69583435058594, "frac_variance_explained": 0.537109375, "cossim": 0.840625, "l2_ratio": 0.782421875, "relative_reconstruction_bias": 0.934375, "loss_original": 2.440642213821411, "loss_reconstructed": 3.5936453342437744, "loss_zero": 12.452932643890382, "frac_recovered": 0.8849301040172577, "frac_alive": 0.0482313372194767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f4eff2913160dc734de9c0320592ae1caae13c2 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..83654a69d38babeeec35cc954656540e30821aea --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 133.4, "l1_loss": 8348.8, "l0": 8286.41708984375, "frac_variance_explained": -0.734375, "cossim": 0.08974609375, "l2_ratio": 0.76796875, "relative_reconstruction_bias": 8.73125, "loss_original": 2.440642213821411, "loss_reconstructed": 10.01290521621704, "loss_zero": 12.452932643890382, "frac_recovered": 0.2438603311777115, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..518802f8ffe2e2a8e33af6807a78cf1ee2514a76 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2a49bf85242e24810f1cd5a3c0a33d94d19fdc5f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 87.2, "l1_loss": 122.55, "l0": 144.45000305175782, "frac_variance_explained": 0.03203125, "cossim": 0.559375, "l2_ratio": 0.4369140625, "relative_reconstruction_bias": 0.787109375, "loss_original": 2.440642213821411, "loss_reconstructed": 9.534128379821777, "loss_zero": 12.452932643890382, "frac_recovered": 0.2917644441127777, "frac_alive": 0.6735568642616272, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..63319307330ff4de2396d95baa9dd2f5eb59bddf --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.06, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..6085b8e58bf578c0d0963abc74cd6d450d23341a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_4_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 65.225, "l1_loss": 126.75, "l0": 24.633334159851074, "frac_variance_explained": 0.41171875, "cossim": 0.786328125, "l2_ratio": 0.723046875, "relative_reconstruction_bias": 0.9265625, "loss_original": 2.440642213821411, "loss_reconstructed": 4.907446241378784, "loss_zero": 12.452932643890382, "frac_recovered": 0.7536444127559662, "frac_alive": 0.0179578997194767, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_0/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_0/config.json new file mode 100644 index 0000000000000000000000000000000000000000..85c60d8804b50fbeb400371466fe821231081225 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_0/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "0" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_0/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_0/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..9443e8433152a5daaf4561fa633a1e0b67e8863d --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_0/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 156.6, "l1_loss": 9120.0, "l0": 9217.45029296875, "frac_variance_explained": -1.0078125, "cossim": 0.00892333984375, "l2_ratio": 1.15546875, "relative_reconstruction_bias": 153.9, "loss_original": 2.440642213821411, "loss_reconstructed": 14.571032238006591, "loss_zero": 12.452932643890382, "frac_recovered": -0.21182486265897751, "frac_alive": 1.0, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_154/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_154/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a6c87886360f48b2c59a969da69d99fdc0f2bb72 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_154/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "154" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_154/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_154/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..c353f70ac8ffe898648ff03e6402fd75ec3f54c5 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_154/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 101.0, "l1_loss": 1752.8, "l0": 2944.745947265625, "frac_variance_explained": -0.0921875, "cossim": 0.26025390625, "l2_ratio": 0.35078125, "relative_reconstruction_bias": 1.4578125, "loss_original": 2.440642213821411, "loss_reconstructed": 9.352425670623779, "loss_zero": 12.452932643890382, "frac_recovered": 0.30973615646362307, "frac_alive": 0.9954426884651184, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_1544/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_1544/config.json new file mode 100644 index 0000000000000000000000000000000000000000..97258bf3552aa6cad1874129ed77c10a5edb7e4f --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_1544/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "1544" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_1544/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_1544/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..4e6b462127842a9a8d991ccba4066abecc84abac --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_1544/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 76.4, "l1_loss": 89.05, "l0": 12.591667079925537, "frac_variance_explained": 0.2296875, "cossim": 0.690234375, "l2_ratio": 0.62109375, "relative_reconstruction_bias": 0.9078125, "loss_original": 2.440642213821411, "loss_reconstructed": 7.287093782424927, "loss_zero": 12.452932643890382, "frac_recovered": 0.5160394996404648, "frac_alive": 0.0379231758415699, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_15440/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_15440/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d03d35e74f8299722c37f8d7670f8a743bacc9e --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_15440/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "15440" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_15440/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_15440/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..a93fa1f03d63c048826a21744b4e5a1d1f6e2b67 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_15440/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 61.8, "l1_loss": 110.2, "l0": 20.550000381469726, "frac_variance_explained": 0.4359375, "cossim": 0.790234375, "l2_ratio": 0.719921875, "relative_reconstruction_bias": 0.912890625, "loss_original": 2.440642213821411, "loss_reconstructed": 4.692216539382935, "loss_zero": 12.452932643890382, "frac_recovered": 0.7752011001110077, "frac_alive": 0.0178493931889534, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_48/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ddc15ab13f44c45a2f6b3e69bbf380e02222d105 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_48/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "48" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_48/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_48/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..5d93e6a17354c131b75a1f7265a12b756033fb1a --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_48/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 127.05, "l1_loss": 7900.8, "l0": 8297.4294921875, "frac_variance_explained": -0.775, "cossim": 0.090673828125, "l2_ratio": 0.768359375, "relative_reconstruction_bias": 8.70625, "loss_original": 2.440642213821411, "loss_reconstructed": 10.02195405960083, "loss_zero": 12.452932643890382, "frac_recovered": 0.24295330047607422, "frac_alive": 0.9999457597732544, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_488/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_488/config.json new file mode 100644 index 0000000000000000000000000000000000000000..36e412f2c9d6e9fa8290b597303c435cf38f0cf4 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_488/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "488" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_488/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_488/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1475061a333590f3178322a107e1d9c885dbb260 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_488/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 103.5, "l1_loss": 49.0625, "l0": 136.8000045776367, "frac_variance_explained": 0.018359375, "cossim": 0.21806640625, "l2_ratio": 0.0637939453125, "relative_reconstruction_bias": 0.39658203125, "loss_original": 2.440642213821411, "loss_reconstructed": 10.39458122253418, "loss_zero": 12.452932643890382, "frac_recovered": 0.205659119784832, "frac_alive": 0.662109375, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json new file mode 100644 index 0000000000000000000000000000000000000000..774760e71ca65a94dcbb0773a1b3b9c0f82c0d9c --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/config.json @@ -0,0 +1,27 @@ +{ + "trainer": { + "dict_class": "AutoEncoder", + "trainer_class": "StandardTrainer", + "activation_dim": 2304, + "dict_size": 18432, + "lr": 0.0003, + "l1_penalty": 0.07, + "warmup_steps": 1000, + "resample_steps": null, + "device": "cuda:0", + "layer": 7, + "lm_name": "google/gemma-2-2b", + "wandb_name": "StandardTrainer-google/gemma-2-2b-resid_post_layer_7", + "submodule_name": "resid_post_layer_7", + "steps": "4882" + }, + "buffer": { + "d_submodule": 2304, + "io": "out", + "n_ctxs": 2000, + "ctx_len": 128, + "refresh_batch_size": 24, + "out_batch_size": 4096, + "device": "cuda:0" + } +} \ No newline at end of file diff --git a/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..ae57f286322cdd60e7d3745219d1524da1a61336 --- /dev/null +++ b/gemma-2-2b_sweep_standard_ctx128_ef8_0824/resid_post_layer_7_checkpoints/trainer_5_step_4882/eval_results.json @@ -0,0 +1 @@ +{"l2_loss": 66.8, "l1_loss": 95.9, "l0": 14.125000286102296, "frac_variance_explained": 0.337890625, "cossim": 0.7375, "l2_ratio": 0.669140625, "relative_reconstruction_bias": 0.91484375, "loss_original": 2.440642213821411, "loss_reconstructed": 5.975473880767822, "loss_zero": 12.452932643890382, "frac_recovered": 0.6469856142997742, "frac_alive": 0.014377170242369175, "hyperparameters": {"n_inputs": 250, "context_length": 128}} \ No newline at end of file