adamkarvonen commited on
Commit
d359938
·
verified ·
1 Parent(s): 454f3d9

Add files using upload-large-folder tool

Browse files
Files changed (43) hide show
  1. .gitattributes +6 -0
  2. old_relu_eval_results/absorption/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +268 -0
  3. old_relu_eval_results/absorption/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +268 -0
  4. old_relu_eval_results/absorption/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +268 -0
  5. old_relu_eval_results/absorption/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +268 -0
  6. old_relu_eval_results/absorption/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +268 -0
  7. old_relu_eval_results/absorption/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +268 -0
  8. old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +3 -0
  9. old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +3 -0
  10. old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +3 -0
  11. old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +3 -0
  12. old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +3 -0
  13. old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +3 -0
  14. old_relu_eval_results/core/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +0 -0
  15. old_relu_eval_results/core/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +0 -0
  16. old_relu_eval_results/core/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +0 -0
  17. old_relu_eval_results/core/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +0 -0
  18. old_relu_eval_results/core/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +0 -0
  19. old_relu_eval_results/core/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +0 -0
  20. old_relu_eval_results/scr/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +323 -0
  21. old_relu_eval_results/scr/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +323 -0
  22. old_relu_eval_results/scr/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +323 -0
  23. old_relu_eval_results/scr/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +323 -0
  24. old_relu_eval_results/scr/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +323 -0
  25. old_relu_eval_results/scr/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +323 -0
  26. old_relu_eval_results/sparse_probing/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +670 -0
  27. old_relu_eval_results/sparse_probing/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +670 -0
  28. old_relu_eval_results/sparse_probing/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +670 -0
  29. old_relu_eval_results/sparse_probing/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +670 -0
  30. old_relu_eval_results/sparse_probing/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +670 -0
  31. old_relu_eval_results/sparse_probing/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +670 -0
  32. old_relu_eval_results/tpp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +414 -0
  33. old_relu_eval_results/tpp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +414 -0
  34. old_relu_eval_results/tpp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +414 -0
  35. old_relu_eval_results/tpp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +414 -0
  36. old_relu_eval_results/tpp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +414 -0
  37. old_relu_eval_results/tpp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +414 -0
  38. old_relu_eval_results/unlearning/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json +74 -0
  39. old_relu_eval_results/unlearning/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json +74 -0
  40. old_relu_eval_results/unlearning/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json +74 -0
  41. old_relu_eval_results/unlearning/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json +74 -0
  42. old_relu_eval_results/unlearning/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json +74 -0
  43. old_relu_eval_results/unlearning/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json +74 -0
.gitattributes CHANGED
@@ -38,3 +38,9 @@ random_seed_eval_results/autointerp/temp_random_seeds_google_gemma-2-2b_top_k_re
38
  random_seed_eval_results/autointerp/temp_random_seeds_google_gemma-2-2b_top_k_resid_post_layer_12_trainer_3_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
39
  random_seed_eval_results/autointerp/temp_random_seeds_google_gemma-2-2b_top_k_resid_post_layer_12_trainer_4_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
40
  random_seed_eval_results/autointerp/temp_random_seeds_google_gemma-2-2b_top_k_resid_post_layer_12_trainer_2_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
38
  random_seed_eval_results/autointerp/temp_random_seeds_google_gemma-2-2b_top_k_resid_post_layer_12_trainer_3_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
39
  random_seed_eval_results/autointerp/temp_random_seeds_google_gemma-2-2b_top_k_resid_post_layer_12_trainer_4_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
40
  random_seed_eval_results/autointerp/temp_random_seeds_google_gemma-2-2b_top_k_resid_post_layer_12_trainer_2_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
41
+ old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
42
+ old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
43
+ old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
44
+ old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
45
+ old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
46
+ old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json filter=lfs diff=lfs merge=lfs -text
old_relu_eval_results/absorption/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "431e48c6-55b1-4fe2-a188-fdf282380ea2",
17
+ "datetime_epoch_millis": 1738783485930,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.09751356079257899,
21
+ "mean_full_absorption_score": 0.004661694890776431,
22
+ "mean_num_split_features": 1.0384615384615385,
23
+ "std_dev_absorption_fraction_score": 0.10468056684076392,
24
+ "std_dev_full_absorption_score": 0.00923750174821687,
25
+ "std_dev_num_split_features": 0.19611613513818404
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.10952248859777117,
32
+ "full_absorption_rate": 0.0007757951900698216,
33
+ "num_full_absorption": 2,
34
+ "num_probe_true_positives": 2578,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.037033530481398055,
40
+ "full_absorption_rate": 0.0,
41
+ "num_full_absorption": 0,
42
+ "num_probe_true_positives": 1664,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.32412380384249956,
48
+ "full_absorption_rate": 0.012851684612712747,
49
+ "num_full_absorption": 37,
50
+ "num_probe_true_positives": 2879,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.2652438933350776,
56
+ "full_absorption_rate": 0.015439429928741092,
57
+ "num_full_absorption": 26,
58
+ "num_probe_true_positives": 1684,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.029511883739283127,
64
+ "full_absorption_rate": 0.005829015544041451,
65
+ "num_full_absorption": 9,
66
+ "num_probe_true_positives": 1544,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.03661148893601744,
72
+ "full_absorption_rate": 0.0008598452278589854,
73
+ "num_full_absorption": 1,
74
+ "num_probe_true_positives": 1163,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.07005964729193298,
80
+ "full_absorption_rate": 0.0027223230490018148,
81
+ "num_full_absorption": 3,
82
+ "num_probe_true_positives": 1102,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.025913905709330304,
88
+ "full_absorption_rate": 0.0019821605550049554,
89
+ "num_full_absorption": 2,
90
+ "num_probe_true_positives": 1009,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.16489142397701123,
96
+ "full_absorption_rate": 0.005441354292623942,
97
+ "num_full_absorption": 9,
98
+ "num_probe_true_positives": 1654,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.017172634110049358,
104
+ "full_absorption_rate": 0.0,
105
+ "num_full_absorption": 0,
106
+ "num_probe_true_positives": 406,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.016921317745917687,
112
+ "full_absorption_rate": 0.0,
113
+ "num_full_absorption": 0,
114
+ "num_probe_true_positives": 665,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.043985977343370744,
120
+ "full_absorption_rate": 0.0,
121
+ "num_full_absorption": 0,
122
+ "num_probe_true_positives": 1195,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.02707755806490339,
128
+ "full_absorption_rate": 0.0011025358324145535,
129
+ "num_full_absorption": 2,
130
+ "num_probe_true_positives": 1814,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.04764539422889375,
136
+ "full_absorption_rate": 0.003778337531486146,
137
+ "num_full_absorption": 3,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.11199046697502817,
144
+ "full_absorption_rate": 0.002857142857142857,
145
+ "num_full_absorption": 3,
146
+ "num_probe_true_positives": 1050,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.2957251949612795,
152
+ "full_absorption_rate": 0.002926421404682274,
153
+ "num_full_absorption": 7,
154
+ "num_probe_true_positives": 2392,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.00767947180411824,
160
+ "full_absorption_rate": 0.0,
161
+ "num_full_absorption": 0,
162
+ "num_probe_true_positives": 174,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.06558605221479297,
168
+ "full_absorption_rate": 0.0006016847172081829,
169
+ "num_full_absorption": 1,
170
+ "num_probe_true_positives": 1662,
171
+ "num_split_features": 1
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.24032658294094394,
176
+ "full_absorption_rate": 0.015951790145338533,
177
+ "num_full_absorption": 45,
178
+ "num_probe_true_positives": 2821,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.04682942316731627,
184
+ "full_absorption_rate": 0.0,
185
+ "num_full_absorption": 0,
186
+ "num_probe_true_positives": 1677,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.33437220839551257,
192
+ "full_absorption_rate": 0.043824701195219126,
193
+ "num_full_absorption": 33,
194
+ "num_probe_true_positives": 753,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.017881854389667527,
200
+ "full_absorption_rate": 0.0011876484560570072,
201
+ "num_full_absorption": 1,
202
+ "num_probe_true_positives": 842,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.03906620984144505,
208
+ "full_absorption_rate": 0.0030721966205837174,
209
+ "num_full_absorption": 2,
210
+ "num_probe_true_positives": 651,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.11287436426111006,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 107,
219
+ "num_split_features": 2
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.030323895956477616,
224
+ "full_absorption_rate": 0.0,
225
+ "num_full_absorption": 0,
226
+ "num_probe_true_positives": 193,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.016981908295905283,
232
+ "full_absorption_rate": 0.0,
233
+ "num_full_absorption": 0,
234
+ "num_probe_true_positives": 239,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0",
241
+ "sae_lens_version": "5.4.1",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
old_relu_eval_results/absorption/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "2f7686cc-05f0-4e61-bd5d-1e2c4da21721",
17
+ "datetime_epoch_millis": 1738785879962,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.12074450732711571,
21
+ "mean_full_absorption_score": 0.013239901484226383,
22
+ "mean_num_split_features": 1.1153846153846154,
23
+ "std_dev_absorption_fraction_score": 0.1336485927173351,
24
+ "std_dev_full_absorption_score": 0.026396743147366758,
25
+ "std_dev_num_split_features": 0.3258125936084211
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.1828675228579153,
32
+ "full_absorption_rate": 0.004266873545384018,
33
+ "num_full_absorption": 11,
34
+ "num_probe_true_positives": 2578,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.06300828457462988,
40
+ "full_absorption_rate": 0.003605769230769231,
41
+ "num_full_absorption": 6,
42
+ "num_probe_true_positives": 1664,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.37049847095340266,
48
+ "full_absorption_rate": 0.04307051059395624,
49
+ "num_full_absorption": 124,
50
+ "num_probe_true_positives": 2879,
51
+ "num_split_features": 1
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.4320868702385805,
56
+ "full_absorption_rate": 0.083729216152019,
57
+ "num_full_absorption": 141,
58
+ "num_probe_true_positives": 1684,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.04056307305862558,
64
+ "full_absorption_rate": 0.007772020725388601,
65
+ "num_full_absorption": 12,
66
+ "num_probe_true_positives": 1544,
67
+ "num_split_features": 2
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.0644915910448518,
72
+ "full_absorption_rate": 0.0025795356835769563,
73
+ "num_full_absorption": 3,
74
+ "num_probe_true_positives": 1163,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.06433309126945641,
80
+ "full_absorption_rate": 0.0027223230490018148,
81
+ "num_full_absorption": 3,
82
+ "num_probe_true_positives": 1102,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.04143727370568341,
88
+ "full_absorption_rate": 0.0019821605550049554,
89
+ "num_full_absorption": 2,
90
+ "num_probe_true_positives": 1009,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.18541141915011472,
96
+ "full_absorption_rate": 0.009673518742442563,
97
+ "num_full_absorption": 16,
98
+ "num_probe_true_positives": 1654,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.0066235494743313895,
104
+ "full_absorption_rate": 0.0,
105
+ "num_full_absorption": 0,
106
+ "num_probe_true_positives": 406,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.010681486258684182,
112
+ "full_absorption_rate": 0.0,
113
+ "num_full_absorption": 0,
114
+ "num_probe_true_positives": 665,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.09145893836903148,
120
+ "full_absorption_rate": 0.00502092050209205,
121
+ "num_full_absorption": 6,
122
+ "num_probe_true_positives": 1195,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.06668737220718245,
128
+ "full_absorption_rate": 0.004410143329658214,
129
+ "num_full_absorption": 8,
130
+ "num_probe_true_positives": 1814,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.0602452762340166,
136
+ "full_absorption_rate": 0.003778337531486146,
137
+ "num_full_absorption": 3,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.12143825786919218,
144
+ "full_absorption_rate": 0.009523809523809525,
145
+ "num_full_absorption": 10,
146
+ "num_probe_true_positives": 1050,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.37822836353215283,
152
+ "full_absorption_rate": 0.022993311036789296,
153
+ "num_full_absorption": 55,
154
+ "num_probe_true_positives": 2392,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.006096479671690135,
160
+ "full_absorption_rate": 0.0,
161
+ "num_full_absorption": 0,
162
+ "num_probe_true_positives": 174,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.09546917920176193,
168
+ "full_absorption_rate": 0.007821901323706379,
169
+ "num_full_absorption": 13,
170
+ "num_probe_true_positives": 1662,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.44667699863373395,
176
+ "full_absorption_rate": 0.10953562566465792,
177
+ "num_full_absorption": 309,
178
+ "num_probe_true_positives": 2821,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.09178269157296917,
184
+ "full_absorption_rate": 0.0011926058437686344,
185
+ "num_full_absorption": 2,
186
+ "num_probe_true_positives": 1677,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.08318646522525573,
192
+ "full_absorption_rate": 0.0013280212483399733,
193
+ "num_full_absorption": 1,
194
+ "num_probe_true_positives": 753,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.01805468943015878,
200
+ "full_absorption_rate": 0.0011876484560570072,
201
+ "num_full_absorption": 1,
202
+ "num_probe_true_positives": 842,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.046187567611688574,
208
+ "full_absorption_rate": 0.007680491551459293,
209
+ "num_full_absorption": 5,
210
+ "num_probe_true_positives": 651,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.1272381587024434,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 107,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.030348097021386293,
224
+ "full_absorption_rate": 0.010362694300518135,
225
+ "num_full_absorption": 2,
226
+ "num_probe_true_positives": 193,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.014256022636069204,
232
+ "full_absorption_rate": 0.0,
233
+ "num_full_absorption": 0,
234
+ "num_probe_true_positives": 239,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1",
241
+ "sae_lens_version": "5.4.1",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
old_relu_eval_results/absorption/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "8ebb674d-9f15-4d5c-8f47-48448dd21b35",
17
+ "datetime_epoch_millis": 1738793140191,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.19926936403983087,
21
+ "mean_full_absorption_score": 0.05967727904676934,
22
+ "mean_num_split_features": 1.0769230769230769,
23
+ "std_dev_absorption_fraction_score": 0.200193825605343,
24
+ "std_dev_full_absorption_score": 0.10500077608288533,
25
+ "std_dev_num_split_features": 0.271746488194703
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.37783256022605965,
32
+ "full_absorption_rate": 0.024437548487199378,
33
+ "num_full_absorption": 63,
34
+ "num_probe_true_positives": 2578,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.15587574948959265,
40
+ "full_absorption_rate": 0.007211538461538462,
41
+ "num_full_absorption": 12,
42
+ "num_probe_true_positives": 1664,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.5719603969783236,
48
+ "full_absorption_rate": 0.2580757207363668,
49
+ "num_full_absorption": 743,
50
+ "num_probe_true_positives": 2879,
51
+ "num_split_features": 2
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.5621089686749688,
56
+ "full_absorption_rate": 0.2672209026128266,
57
+ "num_full_absorption": 450,
58
+ "num_probe_true_positives": 1684,
59
+ "num_split_features": 1
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.16834370820991082,
64
+ "full_absorption_rate": 0.05699481865284974,
65
+ "num_full_absorption": 88,
66
+ "num_probe_true_positives": 1544,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.11667156140480404,
72
+ "full_absorption_rate": 0.018056749785038694,
73
+ "num_full_absorption": 21,
74
+ "num_probe_true_positives": 1163,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.06481540727629757,
80
+ "full_absorption_rate": 0.0054446460980036296,
81
+ "num_full_absorption": 6,
82
+ "num_probe_true_positives": 1102,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.06261331028463099,
88
+ "full_absorption_rate": 0.0009910802775024777,
89
+ "num_full_absorption": 1,
90
+ "num_probe_true_positives": 1009,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.24185250920050555,
96
+ "full_absorption_rate": 0.039298669891172915,
97
+ "num_full_absorption": 65,
98
+ "num_probe_true_positives": 1654,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.016041299343090145,
104
+ "full_absorption_rate": 0.0024630541871921183,
105
+ "num_full_absorption": 1,
106
+ "num_probe_true_positives": 406,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.012157821213304845,
112
+ "full_absorption_rate": 0.0,
113
+ "num_full_absorption": 0,
114
+ "num_probe_true_positives": 665,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.13470793723735,
120
+ "full_absorption_rate": 0.015899581589958158,
121
+ "num_full_absorption": 19,
122
+ "num_probe_true_positives": 1195,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.20217104889211931,
128
+ "full_absorption_rate": 0.017089305402425578,
129
+ "num_full_absorption": 31,
130
+ "num_probe_true_positives": 1814,
131
+ "num_split_features": 1
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.06165938260438851,
136
+ "full_absorption_rate": 0.007556675062972292,
137
+ "num_full_absorption": 6,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.17666919527397562,
144
+ "full_absorption_rate": 0.05142857142857143,
145
+ "num_full_absorption": 54,
146
+ "num_probe_true_positives": 1050,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.602060568057986,
152
+ "full_absorption_rate": 0.20108695652173914,
153
+ "num_full_absorption": 481,
154
+ "num_probe_true_positives": 2392,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.00665463288277255,
160
+ "full_absorption_rate": 0.0,
161
+ "num_full_absorption": 0,
162
+ "num_probe_true_positives": 174,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.2618176100631538,
168
+ "full_absorption_rate": 0.05174488567990373,
169
+ "num_full_absorption": 86,
170
+ "num_probe_true_positives": 1662,
171
+ "num_split_features": 1
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.6729389684863054,
176
+ "full_absorption_rate": 0.4143920595533499,
177
+ "num_full_absorption": 1169,
178
+ "num_probe_true_positives": 2821,
179
+ "num_split_features": 1
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.212218166643232,
184
+ "full_absorption_rate": 0.015503875968992248,
185
+ "num_full_absorption": 26,
186
+ "num_probe_true_positives": 1677,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.2805675145009456,
192
+ "full_absorption_rate": 0.07702523240371846,
193
+ "num_full_absorption": 58,
194
+ "num_probe_true_positives": 753,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.02793276716008858,
200
+ "full_absorption_rate": 0.004750593824228029,
201
+ "num_full_absorption": 4,
202
+ "num_probe_true_positives": 842,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.0646989645046496,
208
+ "full_absorption_rate": 0.010752688172043012,
209
+ "num_full_absorption": 7,
210
+ "num_probe_true_positives": 651,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.08669041060559739,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 107,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.030923398913955746,
224
+ "full_absorption_rate": 0.0,
225
+ "num_full_absorption": 0,
226
+ "num_probe_true_positives": 193,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.009019606907593795,
232
+ "full_absorption_rate": 0.0041841004184100415,
233
+ "num_full_absorption": 1,
234
+ "num_probe_true_positives": 239,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2",
241
+ "sae_lens_version": "5.4.1",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
old_relu_eval_results/absorption/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "8ac82cb6-4d80-4788-b030-d3ff9ecabc15",
17
+ "datetime_epoch_millis": 1738790694222,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.27380462363873964,
21
+ "mean_full_absorption_score": 0.11174761587082688,
22
+ "mean_num_split_features": 1.3846153846153846,
23
+ "std_dev_absorption_fraction_score": 0.2085371119401205,
24
+ "std_dev_full_absorption_score": 0.10395305649996002,
25
+ "std_dev_num_split_features": 0.8978607053178383
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.6334103628494663,
32
+ "full_absorption_rate": 0.28316524437548485,
33
+ "num_full_absorption": 730,
34
+ "num_probe_true_positives": 2578,
35
+ "num_split_features": 1
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.5612972872403076,
40
+ "full_absorption_rate": 0.19831730769230768,
41
+ "num_full_absorption": 330,
42
+ "num_probe_true_positives": 1664,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.4456334749181992,
48
+ "full_absorption_rate": 0.13928447377561654,
49
+ "num_full_absorption": 401,
50
+ "num_probe_true_positives": 2879,
51
+ "num_split_features": 5
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.4663763563664617,
56
+ "full_absorption_rate": 0.1680522565320665,
57
+ "num_full_absorption": 283,
58
+ "num_probe_true_positives": 1684,
59
+ "num_split_features": 2
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.28617098631064275,
64
+ "full_absorption_rate": 0.1794041450777202,
65
+ "num_full_absorption": 277,
66
+ "num_probe_true_positives": 1544,
67
+ "num_split_features": 1
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.31944391642843717,
72
+ "full_absorption_rate": 0.13929492691315562,
73
+ "num_full_absorption": 162,
74
+ "num_probe_true_positives": 1163,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.326763907694398,
80
+ "full_absorption_rate": 0.1705989110707804,
81
+ "num_full_absorption": 188,
82
+ "num_probe_true_positives": 1102,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.11808222242991506,
88
+ "full_absorption_rate": 0.018830525272547076,
89
+ "num_full_absorption": 19,
90
+ "num_probe_true_positives": 1009,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.33518338041432005,
96
+ "full_absorption_rate": 0.1505441354292624,
97
+ "num_full_absorption": 249,
98
+ "num_probe_true_positives": 1654,
99
+ "num_split_features": 2
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.03867125135736307,
104
+ "full_absorption_rate": 0.0024630541871921183,
105
+ "num_full_absorption": 1,
106
+ "num_probe_true_positives": 406,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.02708629091329992,
112
+ "full_absorption_rate": 0.009022556390977444,
113
+ "num_full_absorption": 6,
114
+ "num_probe_true_positives": 665,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.27172567963607736,
120
+ "full_absorption_rate": 0.11799163179916318,
121
+ "num_full_absorption": 141,
122
+ "num_probe_true_positives": 1195,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.500443622143782,
128
+ "full_absorption_rate": 0.19570011025358325,
129
+ "num_full_absorption": 355,
130
+ "num_probe_true_positives": 1814,
131
+ "num_split_features": 2
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.13894620345252578,
136
+ "full_absorption_rate": 0.03904282115869018,
137
+ "num_full_absorption": 31,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.20516155603410746,
144
+ "full_absorption_rate": 0.11619047619047619,
145
+ "num_full_absorption": 122,
146
+ "num_probe_true_positives": 1050,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.6877451457818394,
152
+ "full_absorption_rate": 0.3520066889632107,
153
+ "num_full_absorption": 842,
154
+ "num_probe_true_positives": 2392,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.017662373425384282,
160
+ "full_absorption_rate": 0.0,
161
+ "num_full_absorption": 0,
162
+ "num_probe_true_positives": 174,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.31924702002139765,
168
+ "full_absorption_rate": 0.07220216606498195,
169
+ "num_full_absorption": 120,
170
+ "num_probe_true_positives": 1662,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.5966041750062926,
176
+ "full_absorption_rate": 0.3342786246012052,
177
+ "num_full_absorption": 943,
178
+ "num_probe_true_positives": 2821,
179
+ "num_split_features": 3
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.33112850242087766,
184
+ "full_absorption_rate": 0.11150864639236732,
185
+ "num_full_absorption": 187,
186
+ "num_probe_true_positives": 1677,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.1736379175397297,
192
+ "full_absorption_rate": 0.0451527224435591,
193
+ "num_full_absorption": 34,
194
+ "num_probe_true_positives": 753,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.06615323067748713,
200
+ "full_absorption_rate": 0.020190023752969122,
201
+ "num_full_absorption": 17,
202
+ "num_probe_true_positives": 842,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.11299054531700574,
208
+ "full_absorption_rate": 0.027649769585253458,
209
+ "num_full_absorption": 18,
210
+ "num_probe_true_positives": 651,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.09675091301510899,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 107,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.04066091772139128,
224
+ "full_absorption_rate": 0.010362694300518135,
225
+ "num_full_absorption": 2,
226
+ "num_probe_true_positives": 193,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.0019429754914134399,
232
+ "full_absorption_rate": 0.0041841004184100415,
233
+ "num_full_absorption": 1,
234
+ "num_probe_true_positives": 239,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3",
241
+ "sae_lens_version": "5.4.1",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
old_relu_eval_results/absorption/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "a6c157c2-8266-4539-a364-d4f3d4b95e63",
17
+ "datetime_epoch_millis": 1738795756067,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.2970629494295943,
21
+ "mean_full_absorption_score": 0.16060743819078988,
22
+ "mean_num_split_features": 1.6923076923076923,
23
+ "std_dev_absorption_fraction_score": 0.21833356962674993,
24
+ "std_dev_full_absorption_score": 0.13683720697531063,
25
+ "std_dev_num_split_features": 1.2575923272422036
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.4413904461749635,
32
+ "full_absorption_rate": 0.16446858029480219,
33
+ "num_full_absorption": 424,
34
+ "num_probe_true_positives": 2578,
35
+ "num_split_features": 5
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.450685431712029,
40
+ "full_absorption_rate": 0.18088942307692307,
41
+ "num_full_absorption": 301,
42
+ "num_probe_true_positives": 1664,
43
+ "num_split_features": 1
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.5821904564134256,
48
+ "full_absorption_rate": 0.30670371656825285,
49
+ "num_full_absorption": 883,
50
+ "num_probe_true_positives": 2879,
51
+ "num_split_features": 5
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.5742337169635329,
56
+ "full_absorption_rate": 0.32185273159144895,
57
+ "num_full_absorption": 542,
58
+ "num_probe_true_positives": 1684,
59
+ "num_split_features": 2
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.18981298174844446,
64
+ "full_absorption_rate": 0.13471502590673576,
65
+ "num_full_absorption": 208,
66
+ "num_probe_true_positives": 1544,
67
+ "num_split_features": 3
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.4303705672075229,
72
+ "full_absorption_rate": 0.24333619948409285,
73
+ "num_full_absorption": 283,
74
+ "num_probe_true_positives": 1163,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.39300790034337457,
80
+ "full_absorption_rate": 0.26406533575317603,
81
+ "num_full_absorption": 291,
82
+ "num_probe_true_positives": 1102,
83
+ "num_split_features": 1
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.1473633705426197,
88
+ "full_absorption_rate": 0.04162537165510406,
89
+ "num_full_absorption": 42,
90
+ "num_probe_true_positives": 1009,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.2615839241768494,
96
+ "full_absorption_rate": 0.14691656590084642,
97
+ "num_full_absorption": 243,
98
+ "num_probe_true_positives": 1654,
99
+ "num_split_features": 2
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.05117406667896947,
104
+ "full_absorption_rate": 0.007389162561576354,
105
+ "num_full_absorption": 3,
106
+ "num_probe_true_positives": 406,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.03448794297537904,
112
+ "full_absorption_rate": 0.01804511278195489,
113
+ "num_full_absorption": 12,
114
+ "num_probe_true_positives": 665,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.528743346032827,
120
+ "full_absorption_rate": 0.37489539748953976,
121
+ "num_full_absorption": 448,
122
+ "num_probe_true_positives": 1195,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.38150848462448217,
128
+ "full_absorption_rate": 0.1703417861080485,
129
+ "num_full_absorption": 309,
130
+ "num_probe_true_positives": 1814,
131
+ "num_split_features": 3
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.21705599444161572,
136
+ "full_absorption_rate": 0.0818639798488665,
137
+ "num_full_absorption": 65,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.19684352710791495,
144
+ "full_absorption_rate": 0.14476190476190476,
145
+ "num_full_absorption": 152,
146
+ "num_probe_true_positives": 1050,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.756351767767991,
152
+ "full_absorption_rate": 0.4744983277591973,
153
+ "num_full_absorption": 1135,
154
+ "num_probe_true_positives": 2392,
155
+ "num_split_features": 1
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.022738719106524842,
160
+ "full_absorption_rate": 0.005747126436781609,
161
+ "num_full_absorption": 1,
162
+ "num_probe_true_positives": 174,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.5716800098523727,
168
+ "full_absorption_rate": 0.32912154031287605,
169
+ "num_full_absorption": 547,
170
+ "num_probe_true_positives": 1662,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.5491532507334174,
176
+ "full_absorption_rate": 0.3548387096774194,
177
+ "num_full_absorption": 1001,
178
+ "num_probe_true_positives": 2821,
179
+ "num_split_features": 4
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.4160762059426235,
184
+ "full_absorption_rate": 0.2081097197376267,
185
+ "num_full_absorption": 349,
186
+ "num_probe_true_positives": 1677,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.17104284092706945,
192
+ "full_absorption_rate": 0.07171314741035857,
193
+ "num_full_absorption": 54,
194
+ "num_probe_true_positives": 753,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.08154865944312527,
200
+ "full_absorption_rate": 0.032066508313539195,
201
+ "num_full_absorption": 27,
202
+ "num_probe_true_positives": 842,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.14250459566400003,
208
+ "full_absorption_rate": 0.05837173579109063,
209
+ "num_full_absorption": 38,
210
+ "num_probe_true_positives": 651,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.06885068971289977,
216
+ "full_absorption_rate": 0.0,
217
+ "num_full_absorption": 0,
218
+ "num_probe_true_positives": 107,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.061906340273470535,
224
+ "full_absorption_rate": 0.031088082901554404,
225
+ "num_full_absorption": 6,
226
+ "num_probe_true_positives": 193,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.0013314486020077486,
232
+ "full_absorption_rate": 0.008368200836820083,
233
+ "num_full_absorption": 2,
234
+ "num_probe_true_positives": 239,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4",
241
+ "sae_lens_version": "5.4.1",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
old_relu_eval_results/absorption/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "absorption_first_letter",
3
+ "eval_config": {
4
+ "model_name": "gemma-2-2b",
5
+ "random_seed": 42,
6
+ "f1_jump_threshold": 0.03,
7
+ "max_k_value": 10,
8
+ "prompt_template": "{word} has the first letter:",
9
+ "prompt_token_pos": -6,
10
+ "llm_batch_size": 32,
11
+ "llm_dtype": "bfloat16",
12
+ "k_sparse_probe_l1_decay": 0.01,
13
+ "k_sparse_probe_batch_size": 4096,
14
+ "k_sparse_probe_num_epochs": 50
15
+ },
16
+ "eval_id": "59329e73-810f-472d-8ab5-7d0b59ee8032",
17
+ "datetime_epoch_millis": 1738788312573,
18
+ "eval_result_metrics": {
19
+ "mean": {
20
+ "mean_absorption_fraction_score": 0.27616667840053044,
21
+ "mean_full_absorption_score": 0.18311710219845018,
22
+ "mean_num_split_features": 2.0,
23
+ "std_dev_absorption_fraction_score": 0.17955401555761985,
24
+ "std_dev_full_absorption_score": 0.12841765476462225,
25
+ "std_dev_num_split_features": 1.6
26
+ }
27
+ },
28
+ "eval_result_details": [
29
+ {
30
+ "first_letter": "a",
31
+ "mean_absorption_fraction": 0.4135742482669448,
32
+ "full_absorption_rate": 0.21644685802948022,
33
+ "num_full_absorption": 558,
34
+ "num_probe_true_positives": 2578,
35
+ "num_split_features": 4
36
+ },
37
+ {
38
+ "first_letter": "b",
39
+ "mean_absorption_fraction": 0.4015526296583575,
40
+ "full_absorption_rate": 0.20072115384615385,
41
+ "num_full_absorption": 334,
42
+ "num_probe_true_positives": 1664,
43
+ "num_split_features": 3
44
+ },
45
+ {
46
+ "first_letter": "c",
47
+ "mean_absorption_fraction": 0.4404130651991934,
48
+ "full_absorption_rate": 0.2549496352900313,
49
+ "num_full_absorption": 734,
50
+ "num_probe_true_positives": 2879,
51
+ "num_split_features": 6
52
+ },
53
+ {
54
+ "first_letter": "d",
55
+ "mean_absorption_fraction": 0.4588083161124332,
56
+ "full_absorption_rate": 0.28028503562945367,
57
+ "num_full_absorption": 472,
58
+ "num_probe_true_positives": 1684,
59
+ "num_split_features": 2
60
+ },
61
+ {
62
+ "first_letter": "e",
63
+ "mean_absorption_fraction": 0.1447774581665048,
64
+ "full_absorption_rate": 0.13147668393782383,
65
+ "num_full_absorption": 203,
66
+ "num_probe_true_positives": 1544,
67
+ "num_split_features": 3
68
+ },
69
+ {
70
+ "first_letter": "f",
71
+ "mean_absorption_fraction": 0.3873865408321871,
72
+ "full_absorption_rate": 0.2656921754084265,
73
+ "num_full_absorption": 309,
74
+ "num_probe_true_positives": 1163,
75
+ "num_split_features": 1
76
+ },
77
+ {
78
+ "first_letter": "g",
79
+ "mean_absorption_fraction": 0.2375981200227858,
80
+ "full_absorption_rate": 0.16696914700544466,
81
+ "num_full_absorption": 184,
82
+ "num_probe_true_positives": 1102,
83
+ "num_split_features": 2
84
+ },
85
+ {
86
+ "first_letter": "h",
87
+ "mean_absorption_fraction": 0.18660967928208133,
88
+ "full_absorption_rate": 0.0802775024777007,
89
+ "num_full_absorption": 81,
90
+ "num_probe_true_positives": 1009,
91
+ "num_split_features": 1
92
+ },
93
+ {
94
+ "first_letter": "i",
95
+ "mean_absorption_fraction": 0.3631443822738456,
96
+ "full_absorption_rate": 0.29081015719467956,
97
+ "num_full_absorption": 481,
98
+ "num_probe_true_positives": 1654,
99
+ "num_split_features": 1
100
+ },
101
+ {
102
+ "first_letter": "j",
103
+ "mean_absorption_fraction": 0.07115954525673361,
104
+ "full_absorption_rate": 0.017241379310344827,
105
+ "num_full_absorption": 7,
106
+ "num_probe_true_positives": 406,
107
+ "num_split_features": 1
108
+ },
109
+ {
110
+ "first_letter": "k",
111
+ "mean_absorption_fraction": 0.04879204937766494,
112
+ "full_absorption_rate": 0.031578947368421054,
113
+ "num_full_absorption": 21,
114
+ "num_probe_true_positives": 665,
115
+ "num_split_features": 1
116
+ },
117
+ {
118
+ "first_letter": "l",
119
+ "mean_absorption_fraction": 0.4822787469830486,
120
+ "full_absorption_rate": 0.401673640167364,
121
+ "num_full_absorption": 480,
122
+ "num_probe_true_positives": 1195,
123
+ "num_split_features": 1
124
+ },
125
+ {
126
+ "first_letter": "m",
127
+ "mean_absorption_fraction": 0.38377290557583654,
128
+ "full_absorption_rate": 0.2552370452039691,
129
+ "num_full_absorption": 463,
130
+ "num_probe_true_positives": 1814,
131
+ "num_split_features": 3
132
+ },
133
+ {
134
+ "first_letter": "n",
135
+ "mean_absorption_fraction": 0.17978290229285845,
136
+ "full_absorption_rate": 0.10327455919395466,
137
+ "num_full_absorption": 82,
138
+ "num_probe_true_positives": 794,
139
+ "num_split_features": 1
140
+ },
141
+ {
142
+ "first_letter": "o",
143
+ "mean_absorption_fraction": 0.19812942766943445,
144
+ "full_absorption_rate": 0.17047619047619048,
145
+ "num_full_absorption": 179,
146
+ "num_probe_true_positives": 1050,
147
+ "num_split_features": 1
148
+ },
149
+ {
150
+ "first_letter": "p",
151
+ "mean_absorption_fraction": 0.6056222996145454,
152
+ "full_absorption_rate": 0.3448996655518395,
153
+ "num_full_absorption": 825,
154
+ "num_probe_true_positives": 2392,
155
+ "num_split_features": 5
156
+ },
157
+ {
158
+ "first_letter": "q",
159
+ "mean_absorption_fraction": 0.03644851552466968,
160
+ "full_absorption_rate": 0.022988505747126436,
161
+ "num_full_absorption": 4,
162
+ "num_probe_true_positives": 174,
163
+ "num_split_features": 1
164
+ },
165
+ {
166
+ "first_letter": "r",
167
+ "mean_absorption_fraction": 0.5460252773593768,
168
+ "full_absorption_rate": 0.3802647412755716,
169
+ "num_full_absorption": 632,
170
+ "num_probe_true_positives": 1662,
171
+ "num_split_features": 2
172
+ },
173
+ {
174
+ "first_letter": "s",
175
+ "mean_absorption_fraction": 0.43503717196696245,
176
+ "full_absorption_rate": 0.2977667493796526,
177
+ "num_full_absorption": 840,
178
+ "num_probe_true_positives": 2821,
179
+ "num_split_features": 6
180
+ },
181
+ {
182
+ "first_letter": "t",
183
+ "mean_absorption_fraction": 0.37168244953794066,
184
+ "full_absorption_rate": 0.24388789505068575,
185
+ "num_full_absorption": 409,
186
+ "num_probe_true_positives": 1677,
187
+ "num_split_features": 1
188
+ },
189
+ {
190
+ "first_letter": "u",
191
+ "mean_absorption_fraction": 0.4015811028782945,
192
+ "full_absorption_rate": 0.3930942895086321,
193
+ "num_full_absorption": 296,
194
+ "num_probe_true_positives": 753,
195
+ "num_split_features": 1
196
+ },
197
+ {
198
+ "first_letter": "v",
199
+ "mean_absorption_fraction": 0.09352294964084032,
200
+ "full_absorption_rate": 0.060570071258907364,
201
+ "num_full_absorption": 51,
202
+ "num_probe_true_positives": 842,
203
+ "num_split_features": 1
204
+ },
205
+ {
206
+ "first_letter": "w",
207
+ "mean_absorption_fraction": 0.16147162387821953,
208
+ "full_absorption_rate": 0.08294930875576037,
209
+ "num_full_absorption": 54,
210
+ "num_probe_true_positives": 651,
211
+ "num_split_features": 1
212
+ },
213
+ {
214
+ "first_letter": "x",
215
+ "mean_absorption_fraction": 0.08565750731693882,
216
+ "full_absorption_rate": 0.018691588785046728,
217
+ "num_full_absorption": 2,
218
+ "num_probe_true_positives": 107,
219
+ "num_split_features": 1
220
+ },
221
+ {
222
+ "first_letter": "y",
223
+ "mean_absorption_fraction": 0.04526030996735048,
224
+ "full_absorption_rate": 0.03626943005181347,
225
+ "num_full_absorption": 7,
226
+ "num_probe_true_positives": 193,
227
+ "num_split_features": 1
228
+ },
229
+ {
230
+ "first_letter": "z",
231
+ "mean_absorption_fraction": 0.00024441375874335945,
232
+ "full_absorption_rate": 0.012552301255230125,
233
+ "num_full_absorption": 3,
234
+ "num_probe_true_positives": 239,
235
+ "num_split_features": 1
236
+ }
237
+ ],
238
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
239
+ "sae_lens_id": "custom_sae",
240
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5",
241
+ "sae_lens_version": "5.4.1",
242
+ "sae_cfg_dict": {
243
+ "model_name": "gemma-2-2b",
244
+ "d_in": 2304,
245
+ "d_sae": 16384,
246
+ "hook_layer": 12,
247
+ "hook_name": "blocks.12.hook_resid_post",
248
+ "context_size": null,
249
+ "hook_head_index": null,
250
+ "architecture": "standard",
251
+ "apply_b_dec_to_input": null,
252
+ "finetuning_scaling_factor": null,
253
+ "activation_fn_str": "",
254
+ "prepend_bos": true,
255
+ "normalize_activations": "none",
256
+ "dtype": "bfloat16",
257
+ "device": "",
258
+ "dataset_path": "",
259
+ "dataset_trust_remote_code": true,
260
+ "seqpos_slice": [
261
+ null
262
+ ],
263
+ "training_tokens": -100000,
264
+ "sae_lens_training_version": null,
265
+ "neuronpedia_id": null
266
+ },
267
+ "eval_result_unstructured": null
268
+ }
old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fade7ed161c5db9c7c7ac163c599bd3859bb24ecc70c0a1c824a46d9e37fda6d
3
+ size 27824414
old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5adce21b197509292cdd2f4da8895a5762bf93e1f0b2fbcd3ba2daf6d93f8e9d
3
+ size 27583837
old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2e3c88b391388eccb6fa9d88e5f912d3862401c3acef072ed8a20527e0ac372
3
+ size 27435249
old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f9d90013affb818d37b90201097e3c443522b80e5e3832fe9715633decb95ec
3
+ size 27194850
old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8acd6328873671a729f89408de8ca580ec5b368493dabd9363953487b74019b8
3
+ size 27102881
old_relu_eval_results/autointerp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a72d03c8276dce0adb0b65cdecd992edbff8cc4f97a61d17ad63b7e936c53cb
3
+ size 26823026
old_relu_eval_results/core/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
The diff for this file is too large to render. See raw diff
 
old_relu_eval_results/core/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
The diff for this file is too large to render. See raw diff
 
old_relu_eval_results/core/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
The diff for this file is too large to render. See raw diff
 
old_relu_eval_results/core/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
The diff for this file is too large to render. See raw diff
 
old_relu_eval_results/core/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
The diff for this file is too large to render. See raw diff
 
old_relu_eval_results/core/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
The diff for this file is too large to render. See raw diff
 
old_relu_eval_results/scr/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "f84c237b-b63d-44d6-9184-83af39eb778e",
73
+ "datetime_epoch_millis": 1738800834845,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.23234423354969463,
77
+ "scr_metric_threshold_2": 0.09820290124961623,
78
+ "scr_dir2_threshold_2": 0.10051744442943668,
79
+ "scr_dir1_threshold_5": 0.22836236745436214,
80
+ "scr_metric_threshold_5": 0.1612618897051168,
81
+ "scr_dir2_threshold_5": 0.16833831095315674,
82
+ "scr_dir1_threshold_10": 0.23399130656918807,
83
+ "scr_metric_threshold_10": 0.21393385636206694,
84
+ "scr_dir2_threshold_10": 0.2210026206525063,
85
+ "scr_dir1_threshold_20": 0.24236966949927263,
86
+ "scr_metric_threshold_20": 0.26432320579946816,
87
+ "scr_dir2_threshold_20": 0.26846685069449183,
88
+ "scr_dir1_threshold_50": 0.21470674119765001,
89
+ "scr_metric_threshold_50": 0.3265154510614142,
90
+ "scr_dir2_threshold_50": 0.3172547508429693,
91
+ "scr_dir1_threshold_100": 0.20032932591386368,
92
+ "scr_metric_threshold_100": 0.38983340792408966,
93
+ "scr_dir2_threshold_100": 0.38098404313994894,
94
+ "scr_dir1_threshold_500": 0.037709607495273495,
95
+ "scr_metric_threshold_500": 0.3428859758427782,
96
+ "scr_dir2_threshold_500": 0.34688913659267395
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.4687504074535596,
103
+ "scr_metric_threshold_2": 0.012315258641572036,
104
+ "scr_dir2_threshold_2": 0.012315258641572036,
105
+ "scr_dir1_threshold_5": 0.5156247962732202,
106
+ "scr_metric_threshold_5": 0.022167494916722333,
107
+ "scr_dir2_threshold_5": 0.022167494916722333,
108
+ "scr_dir1_threshold_10": 0.578124912688523,
109
+ "scr_metric_threshold_10": 0.03201958438240929,
110
+ "scr_dir2_threshold_10": 0.03201958438240929,
111
+ "scr_dir1_threshold_20": 0.4843752037267798,
112
+ "scr_metric_threshold_20": 0.06403931557428191,
113
+ "scr_dir2_threshold_20": 0.06403931557428191,
114
+ "scr_dir1_threshold_50": 0.5,
115
+ "scr_metric_threshold_50": 0.08128076575816078,
116
+ "scr_dir2_threshold_50": 0.08128076575816078,
117
+ "scr_dir1_threshold_100": 0.43749988358469727,
118
+ "scr_metric_threshold_100": 0.10098509149899802,
119
+ "scr_dir2_threshold_100": 0.10098509149899802,
120
+ "scr_dir1_threshold_500": 0.3593749708961743,
121
+ "scr_metric_threshold_500": 0.16748757624916502,
122
+ "scr_dir2_threshold_500": 0.16748757624916502
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.4205609298770807,
127
+ "scr_metric_threshold_2": 0.1239194296368033,
128
+ "scr_dir2_threshold_2": 0.1239194296368033,
129
+ "scr_dir1_threshold_5": 0.35514009320607276,
130
+ "scr_metric_threshold_5": 0.1556196529738595,
131
+ "scr_dir2_threshold_5": 0.1556196529738595,
132
+ "scr_dir1_threshold_10": 0.30841116289226267,
133
+ "scr_metric_threshold_10": 0.23342947946078924,
134
+ "scr_dir2_threshold_10": 0.23342947946078924,
135
+ "scr_dir1_threshold_20": 0.2616822325784525,
136
+ "scr_metric_threshold_20": 0.2881845043727822,
137
+ "scr_dir2_threshold_20": 0.2881845043727822,
138
+ "scr_dir1_threshold_50": 0.17757004660303638,
139
+ "scr_metric_threshold_50": 0.37463975262183136,
140
+ "scr_dir2_threshold_50": 0.37463975262183136,
141
+ "scr_dir1_threshold_100": 0.08411218597541614,
142
+ "scr_metric_threshold_100": 0.4783861879377377,
143
+ "scr_dir2_threshold_100": 0.4783861879377377,
144
+ "scr_dir1_threshold_500": -0.6168223257845253,
145
+ "scr_metric_threshold_500": 0.18155626180283607,
146
+ "scr_dir2_threshold_500": 0.18155626180283607
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.5156247962732202,
151
+ "scr_metric_threshold_2": 0.03544318161412681,
152
+ "scr_dir2_threshold_2": 0.03544318161412681,
153
+ "scr_dir1_threshold_5": 0.4843752037267798,
154
+ "scr_metric_threshold_5": 0.05063300880745302,
155
+ "scr_dir2_threshold_5": 0.05063300880745302,
156
+ "scr_dir1_threshold_10": 0.4843752037267798,
157
+ "scr_metric_threshold_10": 0.09620264128525105,
158
+ "scr_dir2_threshold_10": 0.09620264128525105,
159
+ "scr_dir1_threshold_20": 0.5156247962732202,
160
+ "scr_metric_threshold_20": 0.12911405928564068,
161
+ "scr_dir2_threshold_20": 0.12911405928564068,
162
+ "scr_dir1_threshold_50": 0.39062549476503666,
163
+ "scr_metric_threshold_50": 0.1797469171952743,
164
+ "scr_dir2_threshold_50": 0.1797469171952743,
165
+ "scr_dir1_threshold_100": 0.32812537834973393,
166
+ "scr_metric_threshold_100": 0.23797476415048074,
167
+ "scr_dir2_threshold_100": 0.23797476415048074,
168
+ "scr_dir1_threshold_500": -0.12499930150818353,
169
+ "scr_metric_threshold_500": 0.07848105047818764,
170
+ "scr_dir2_threshold_500": 0.07848105047818764
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.34645683334217237,
175
+ "scr_metric_threshold_2": 0.06528194057151832,
176
+ "scr_dir2_threshold_2": 0.06528194057151832,
177
+ "scr_dir1_threshold_5": 0.26771677194471133,
178
+ "scr_metric_threshold_5": 0.13946602022454502,
179
+ "scr_dir2_threshold_5": 0.13946602022454502,
180
+ "scr_dir1_threshold_10": 0.21259844736981667,
181
+ "scr_metric_threshold_10": 0.1869436826330466,
182
+ "scr_dir2_threshold_10": 0.1869436826330466,
183
+ "scr_dir1_threshold_20": 0.18110232894527498,
184
+ "scr_metric_threshold_20": 0.2670624037614951,
185
+ "scr_dir2_threshold_20": 0.2670624037614951,
186
+ "scr_dir1_threshold_50": -0.07086614912327223,
187
+ "scr_metric_threshold_50": 0.3412463065461462,
188
+ "scr_dir2_threshold_50": 0.3412463065461462,
189
+ "scr_dir1_threshold_100": -0.14960621052073325,
190
+ "scr_metric_threshold_100": 0.42433234841230566,
191
+ "scr_dir2_threshold_100": 0.42433234841230566,
192
+ "scr_dir1_threshold_500": -0.4724408377125527,
193
+ "scr_metric_threshold_500": 0.08011872112844853,
194
+ "scr_dir2_threshold_500": 0.08011872112844853
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.021739172687599125,
199
+ "scr_metric_threshold_2": 0.17647042323973455,
200
+ "scr_dir2_threshold_2": 0.17647042323973455,
201
+ "scr_dir1_threshold_5": 0.010869424374669583,
202
+ "scr_metric_threshold_5": 0.38039217519558505,
203
+ "scr_dir2_threshold_5": 0.38039217519558505,
204
+ "scr_dir1_threshold_10": -0.021739172687599125,
205
+ "scr_metric_threshold_10": 0.5058823941900663,
206
+ "scr_dir2_threshold_10": 0.5058823941900663,
207
+ "scr_dir1_threshold_20": -0.005434712187334791,
208
+ "scr_metric_threshold_20": 0.5647058686033112,
209
+ "scr_dir2_threshold_20": 0.5647058686033112,
210
+ "scr_dir1_threshold_50": -0.07608694243746696,
211
+ "scr_metric_threshold_50": 0.6470586860331121,
212
+ "scr_dir2_threshold_50": 0.6470586860331121,
213
+ "scr_dir1_threshold_100": -0.059782805875462586,
214
+ "scr_metric_threshold_100": 0.6941176058099336,
215
+ "scr_dir2_threshold_100": 0.6941176058099336,
216
+ "scr_dir1_threshold_500": 0.010869424374669583,
217
+ "scr_metric_threshold_500": 0.7411765255867552,
218
+ "scr_dir2_threshold_500": 0.7411765255867552
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.015463717977866399,
223
+ "scr_metric_threshold_2": 0.08467752208135552,
224
+ "scr_dir2_threshold_2": 0.08467752208135552,
225
+ "scr_dir1_threshold_5": 0.08247408426990457,
226
+ "scr_metric_threshold_5": 0.12096785435318151,
227
+ "scr_dir2_threshold_5": 0.12096785435318151,
228
+ "scr_dir1_threshold_10": 0.1340204253436484,
229
+ "scr_metric_threshold_10": 0.15322574055158325,
230
+ "scr_dir2_threshold_10": 0.15322574055158325,
231
+ "scr_dir1_threshold_20": 0.2061853642949754,
232
+ "scr_metric_threshold_20": 0.20161293036111277,
233
+ "scr_dir2_threshold_20": 0.20161293036111277,
234
+ "scr_dir1_threshold_50": 0.24742256005014163,
235
+ "scr_metric_threshold_50": 0.2661291834404855,
236
+ "scr_dir2_threshold_50": 0.2661291834404855,
237
+ "scr_dir1_threshold_100": 0.2886597558053079,
238
+ "scr_metric_threshold_100": 0.3508064651805564,
239
+ "scr_dir2_threshold_100": 0.3508064651805564,
240
+ "scr_dir1_threshold_500": 0.2886597558053079,
241
+ "scr_metric_threshold_500": 0.5564516012738088,
242
+ "scr_dir2_threshold_500": 0.5564516012738088
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.03153139365860286,
247
+ "scr_metric_threshold_2": 0.2488888370843636,
248
+ "scr_dir2_threshold_2": 0.2488888370843636,
249
+ "scr_dir1_threshold_5": 0.07207194871208326,
250
+ "scr_metric_threshold_5": 0.38222229404213226,
251
+ "scr_dir2_threshold_5": 0.38222229404213226,
252
+ "scr_dir1_threshold_10": 0.09459444946519599,
253
+ "scr_metric_threshold_10": 0.42222230463851246,
254
+ "scr_dir2_threshold_10": 0.42222230463851246,
255
+ "scr_dir1_threshold_20": 0.16666666666666666,
256
+ "scr_metric_threshold_20": 0.4711110887409752,
257
+ "scr_dir2_threshold_20": 0.4711110887409752,
258
+ "scr_dir1_threshold_50": 0.35585583408644605,
259
+ "scr_metric_threshold_50": 0.5288889112590248,
260
+ "scr_dir2_threshold_50": 0.5288889112590248,
261
+ "scr_dir1_threshold_100": 0.45045055204102946,
262
+ "scr_metric_threshold_100": 0.6088889324517851,
263
+ "scr_dir2_threshold_100": 0.6088889324517851,
264
+ "scr_dir1_threshold_500": 0.6081080573128186,
265
+ "scr_metric_threshold_500": 0.6888889536445455,
266
+ "scr_dir2_threshold_500": 0.6888889536445455
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.03862661712745569,
271
+ "scr_metric_threshold_2": 0.03862661712745569,
272
+ "scr_dir2_threshold_2": 0.05714296256601923,
273
+ "scr_dir1_threshold_5": 0.03862661712745569,
274
+ "scr_metric_threshold_5": 0.03862661712745569,
275
+ "scr_dir2_threshold_5": 0.09523798711177515,
276
+ "scr_dir1_threshold_10": 0.08154502375487709,
277
+ "scr_metric_threshold_10": 0.08154502375487709,
278
+ "scr_dir2_threshold_10": 0.13809513807839202,
279
+ "scr_dir1_threshold_20": 0.12875547569614632,
280
+ "scr_metric_threshold_20": 0.12875547569614632,
281
+ "scr_dir2_threshold_20": 0.1619046348563358,
282
+ "scr_dir1_threshold_50": 0.19313308563727843,
283
+ "scr_metric_threshold_50": 0.19313308563727843,
284
+ "scr_dir2_threshold_50": 0.11904748388971893,
285
+ "scr_dir1_threshold_100": 0.22317586795092056,
286
+ "scr_metric_threshold_100": 0.22317586795092056,
287
+ "scr_dir2_threshold_100": 0.1523809496777944,
288
+ "scr_dir1_threshold_500": 0.2489271165784791,
289
+ "scr_metric_threshold_500": 0.2489271165784791,
290
+ "scr_dir2_threshold_500": 0.280952402577645
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0",
296
+ "sae_lens_version": "5.4.1",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
old_relu_eval_results/scr/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "e55f5870-c524-4a98-bbbd-3745c4e6ba5f",
73
+ "datetime_epoch_millis": 1738801279714,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.23710567889490614,
77
+ "scr_metric_threshold_2": 0.09984043514036153,
78
+ "scr_dir2_threshold_2": 0.10096448219391552,
79
+ "scr_dir1_threshold_5": 0.23277434004406478,
80
+ "scr_metric_threshold_5": 0.1533444054899716,
81
+ "scr_dir2_threshold_5": 0.1564892090403169,
82
+ "scr_dir1_threshold_10": 0.25301255397775474,
83
+ "scr_metric_threshold_10": 0.21059082421971573,
84
+ "scr_dir2_threshold_10": 0.2181373055614877,
85
+ "scr_dir1_threshold_20": 0.24499730767158928,
86
+ "scr_metric_threshold_20": 0.2566161420240508,
87
+ "scr_dir2_threshold_20": 0.2605247568722085,
88
+ "scr_dir1_threshold_50": 0.22983255987764495,
89
+ "scr_metric_threshold_50": 0.32999437667348414,
90
+ "scr_dir2_threshold_50": 0.3275827600927625,
91
+ "scr_dir1_threshold_100": 0.1800306346905109,
92
+ "scr_metric_threshold_100": 0.38054020119475096,
93
+ "scr_dir2_threshold_100": 0.37907635825688035,
94
+ "scr_dir1_threshold_500": 0.0590895362527501,
95
+ "scr_metric_threshold_500": 0.3194485925466525,
96
+ "scr_dir2_threshold_500": 0.33138143776464996
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.4531256111803394,
103
+ "scr_metric_threshold_2": 0.017241303374415515,
104
+ "scr_dir2_threshold_2": 0.017241303374415515,
105
+ "scr_dir1_threshold_5": 0.4687504074535596,
106
+ "scr_metric_threshold_5": 0.02709353964956581,
107
+ "scr_dir2_threshold_5": 0.02709353964956581,
108
+ "scr_dir1_threshold_10": 0.5,
109
+ "scr_metric_threshold_10": 0.039408798291137845,
110
+ "scr_dir2_threshold_10": 0.039408798291137845,
111
+ "scr_dir1_threshold_20": 0.5312505238688624,
112
+ "scr_metric_threshold_20": 0.0467980121998664,
113
+ "scr_dir2_threshold_20": 0.0467980121998664,
114
+ "scr_dir1_threshold_50": 0.4843752037267798,
115
+ "scr_metric_threshold_50": 0.08374378812458251,
116
+ "scr_dir2_threshold_50": 0.08374378812458251,
117
+ "scr_dir1_threshold_100": 0.3125005820765137,
118
+ "scr_metric_threshold_100": 0.09113300203331107,
119
+ "scr_dir2_threshold_100": 0.09113300203331107,
120
+ "scr_dir1_threshold_500": 0.15624982537704588,
121
+ "scr_metric_threshold_500": 0.16009850914989981,
122
+ "scr_dir2_threshold_500": 0.16009850914989981
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.411214698172087,
127
+ "scr_metric_threshold_2": 0.1268012368908431,
128
+ "scr_dir2_threshold_2": 0.1268012368908431,
129
+ "scr_dir1_threshold_5": 0.3271025121966708,
130
+ "scr_metric_threshold_5": 0.19308366259030446,
131
+ "scr_dir2_threshold_5": 0.19308366259030446,
132
+ "scr_dir1_threshold_10": 0.25233655792624843,
133
+ "scr_metric_threshold_10": 0.2881845043727822,
134
+ "scr_dir2_threshold_10": 0.2881845043727822,
135
+ "scr_dir1_threshold_20": 0.1588786972986282,
136
+ "scr_metric_threshold_20": 0.311239305947719,
137
+ "scr_dir2_threshold_20": 0.311239305947719,
138
+ "scr_dir1_threshold_50": 0.19626139590744457,
139
+ "scr_metric_threshold_50": 0.4034583404761569,
140
+ "scr_dir2_threshold_50": 0.4034583404761569,
141
+ "scr_dir1_threshold_100": 0.056074604966014206,
142
+ "scr_metric_threshold_100": 0.4755043806836979,
143
+ "scr_dir2_threshold_100": 0.4755043806836979,
144
+ "scr_dir1_threshold_500": 0.09345786062762024,
145
+ "scr_metric_threshold_500": 0.1556196529738595,
146
+ "scr_dir2_threshold_500": 0.1556196529738595
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.5312505238688624,
151
+ "scr_metric_threshold_2": 0.030379805284471813,
152
+ "scr_dir2_threshold_2": 0.030379805284471813,
153
+ "scr_dir1_threshold_5": 0.5156247962732202,
154
+ "scr_metric_threshold_5": 0.058227846955206435,
155
+ "scr_dir2_threshold_5": 0.058227846955206435,
156
+ "scr_dir1_threshold_10": 0.5156247962732202,
157
+ "scr_metric_threshold_10": 0.09873425400116885,
158
+ "scr_dir2_threshold_10": 0.09873425400116885,
159
+ "scr_dir1_threshold_20": 0.4843752037267798,
160
+ "scr_metric_threshold_20": 0.12911405928564068,
161
+ "scr_dir2_threshold_20": 0.12911405928564068,
162
+ "scr_dir1_threshold_50": 0.39062549476503666,
163
+ "scr_metric_threshold_50": 0.1848101426271099,
164
+ "scr_dir2_threshold_50": 0.1848101426271099,
165
+ "scr_dir1_threshold_100": 0.17187555297268803,
166
+ "scr_metric_threshold_100": 0.23797476415048074,
167
+ "scr_dir2_threshold_100": 0.23797476415048074,
168
+ "scr_dir1_threshold_500": -0.5468743888196606,
169
+ "scr_metric_threshold_500": 0.09873425400116885,
170
+ "scr_dir2_threshold_500": 0.09873425400116885
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.33858292106798354,
175
+ "scr_metric_threshold_2": 0.09198818094766784,
176
+ "scr_dir2_threshold_2": 0.09198818094766784,
177
+ "scr_dir1_threshold_5": 0.259842390342736,
178
+ "scr_metric_threshold_5": 0.15430280078147524,
179
+ "scr_dir2_threshold_5": 0.15430280078147524,
180
+ "scr_dir1_threshold_10": 0.2834645964930889,
181
+ "scr_metric_threshold_10": 0.2136499230091961,
182
+ "scr_dir2_threshold_10": 0.2136499230091961,
183
+ "scr_dir1_threshold_20": 0.16535450439689736,
184
+ "scr_metric_threshold_20": 0.249258302466854,
185
+ "scr_dir2_threshold_20": 0.249258302466854,
186
+ "scr_dir1_threshold_50": 0.03149611842454171,
187
+ "scr_metric_threshold_50": 0.3531157663653655,
188
+ "scr_dir2_threshold_50": 0.3531157663653655,
189
+ "scr_dir1_threshold_100": -0.03149611842454171,
190
+ "scr_metric_threshold_100": 0.4154303861991729,
191
+ "scr_dir2_threshold_100": 0.4154303861991729,
192
+ "scr_dir1_threshold_500": -0.10236179822002743,
193
+ "scr_metric_threshold_500": 0.09198818094766784,
194
+ "scr_dir2_threshold_500": 0.09198818094766784
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.03260859706226871,
199
+ "scr_metric_threshold_2": 0.18431369340772616,
200
+ "scr_dir2_threshold_2": 0.18431369340772616,
201
+ "scr_dir1_threshold_5": 0.0380433092496035,
202
+ "scr_metric_threshold_5": 0.25490195620110373,
203
+ "scr_dir2_threshold_5": 0.25490195620110373,
204
+ "scr_dir1_threshold_10": 0.07065223025013216,
205
+ "scr_metric_threshold_10": 0.4274508612286972,
206
+ "scr_dir2_threshold_10": 0.4274508612286972,
207
+ "scr_dir1_threshold_20": 0.1195652878126652,
208
+ "scr_metric_threshold_20": 0.5294117372066224,
209
+ "scr_dir2_threshold_20": 0.5294117372066224,
210
+ "scr_dir1_threshold_50": 0.016304136562004374,
211
+ "scr_metric_threshold_50": 0.623529343016556,
212
+ "scr_dir2_threshold_50": 0.623529343016556,
213
+ "scr_dir1_threshold_100": 0.010869424374669583,
214
+ "scr_metric_threshold_100": 0.6784312992176598,
215
+ "scr_dir2_threshold_100": 0.6784312992176598,
216
+ "scr_dir1_threshold_500": 0.11413025168707046,
217
+ "scr_metric_threshold_500": 0.6980391240220748,
218
+ "scr_dir2_threshold_500": 0.6980391240220748
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.04639146117402712,
223
+ "scr_metric_threshold_2": 0.060483807005948444,
224
+ "scr_dir2_threshold_2": 0.060483807005948444,
225
+ "scr_dir1_threshold_5": 0.0979381094881989,
226
+ "scr_metric_threshold_5": 0.14112912335516434,
227
+ "scr_dir2_threshold_5": 0.14112912335516434,
228
+ "scr_dir1_threshold_10": 0.18041219375810347,
229
+ "scr_metric_threshold_10": 0.15322574055158325,
230
+ "scr_dir2_threshold_10": 0.15322574055158325,
231
+ "scr_dir1_threshold_20": 0.22680396217255852,
232
+ "scr_metric_threshold_20": 0.21774199363095595,
233
+ "scr_dir2_threshold_20": 0.21774199363095595,
234
+ "scr_dir1_threshold_50": 0.23711341473156403,
235
+ "scr_metric_threshold_50": 0.2862904524424683,
236
+ "scr_dir2_threshold_50": 0.2862904524424683,
237
+ "scr_dir1_threshold_100": 0.26804115792772476,
238
+ "scr_metric_threshold_100": 0.3306451961785736,
239
+ "scr_dir2_threshold_100": 0.3306451961785736,
240
+ "scr_dir1_threshold_500": 0.32989664432004623,
241
+ "scr_metric_threshold_500": 0.5040322057321397,
242
+ "scr_dir2_threshold_500": 0.5040322057321397
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.045045001506225466,
247
+ "scr_metric_threshold_2": 0.2488888370843636,
248
+ "scr_dir2_threshold_2": 0.2488888370843636,
249
+ "scr_dir1_threshold_5": 0.09909916440732847,
250
+ "scr_metric_threshold_5": 0.3422222834457521,
251
+ "scr_dir2_threshold_5": 0.3422222834457521,
252
+ "scr_dir1_threshold_10": 0.148648612366299,
253
+ "scr_metric_threshold_10": 0.39111106754821484,
254
+ "scr_dir2_threshold_10": 0.39111106754821484,
255
+ "scr_dir1_threshold_20": 0.1621622202139216,
256
+ "scr_metric_threshold_20": 0.45777766357234706,
257
+ "scr_dir2_threshold_20": 0.45777766357234706,
258
+ "scr_dir1_threshold_50": 0.3108108325802206,
259
+ "scr_metric_threshold_50": 0.5333332980120661,
260
+ "scr_dir2_threshold_50": 0.5333332980120661,
261
+ "scr_dir1_threshold_100": 0.4549549984937745,
262
+ "scr_metric_threshold_100": 0.6177777059578677,
263
+ "scr_dir2_threshold_100": 0.6177777059578677,
264
+ "scr_dir1_threshold_500": 0.25225222322637253,
265
+ "scr_metric_threshold_500": 0.671111141722876,
266
+ "scr_dir2_threshold_500": 0.671111141722876
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.03862661712745569,
271
+ "scr_metric_threshold_2": 0.03862661712745569,
272
+ "scr_dir2_threshold_2": 0.04761899355588758,
273
+ "scr_dir1_threshold_5": 0.05579403094120067,
274
+ "scr_metric_threshold_5": 0.05579403094120067,
275
+ "scr_dir2_threshold_5": 0.08095245934396302,
276
+ "scr_dir1_threshold_10": 0.07296144475494565,
277
+ "scr_metric_threshold_10": 0.07296144475494565,
278
+ "scr_dir2_threshold_10": 0.1333332954891213,
279
+ "scr_dir1_threshold_20": 0.11158806188240133,
280
+ "scr_metric_threshold_20": 0.11158806188240133,
281
+ "scr_dir2_threshold_20": 0.14285698066766273,
282
+ "scr_dir1_threshold_50": 0.17167388232356773,
283
+ "scr_metric_threshold_50": 0.17167388232356773,
284
+ "scr_dir2_threshold_50": 0.1523809496777944,
285
+ "scr_dir1_threshold_100": 0.19742487513724416,
286
+ "scr_metric_threshold_100": 0.19742487513724416,
287
+ "scr_dir2_threshold_100": 0.18571413163427958,
288
+ "scr_dir1_threshold_500": 0.17596567182353345,
289
+ "scr_metric_threshold_500": 0.17596567182353345,
290
+ "scr_dir2_threshold_500": 0.27142843356751334
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1",
296
+ "sae_lens_version": "5.4.1",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
old_relu_eval_results/scr/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "3c85fb5d-0428-4028-827e-87b37ec72a2d",
73
+ "datetime_epoch_millis": 1738802658127,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.2469231071310974,
77
+ "scr_metric_threshold_2": 0.10476438206495488,
78
+ "scr_dir2_threshold_2": 0.10439652483056149,
79
+ "scr_dir1_threshold_5": 0.266262331969709,
80
+ "scr_metric_threshold_5": 0.16042571052625748,
81
+ "scr_dir2_threshold_5": 0.16547375379885432,
82
+ "scr_dir1_threshold_10": 0.2695043161578262,
83
+ "scr_metric_threshold_10": 0.20632414219873058,
84
+ "scr_dir2_threshold_10": 0.21190100220122254,
85
+ "scr_dir1_threshold_20": 0.2804753590978531,
86
+ "scr_metric_threshold_20": 0.2545814385824276,
87
+ "scr_dir2_threshold_20": 0.2506855391924104,
88
+ "scr_dir1_threshold_50": 0.22659001552042826,
89
+ "scr_metric_threshold_50": 0.3220650150830612,
90
+ "scr_dir2_threshold_50": 0.3238788383620121,
91
+ "scr_dir1_threshold_100": 0.11656540795529736,
92
+ "scr_metric_threshold_100": 0.35549666827962545,
93
+ "scr_dir2_threshold_100": 0.35367262506496205,
94
+ "scr_dir1_threshold_500": -0.15991300862854543,
95
+ "scr_metric_threshold_500": 0.3376438530231709,
96
+ "scr_dir2_threshold_500": 0.3442118974127407
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.42187508731147705,
103
+ "scr_metric_threshold_2": 0.0073890670992652185,
104
+ "scr_dir2_threshold_2": 0.0073890670992652185,
105
+ "scr_dir1_threshold_5": 0.42187508731147705,
106
+ "scr_metric_threshold_5": 0.02709353964956581,
107
+ "scr_dir2_threshold_5": 0.02709353964956581,
108
+ "scr_dir1_threshold_10": 0.4531256111803394,
109
+ "scr_metric_threshold_10": 0.03448275355829437,
110
+ "scr_dir2_threshold_10": 0.03448275355829437,
111
+ "scr_dir1_threshold_20": 0.4062502910382569,
112
+ "scr_metric_threshold_20": 0.03694577592471611,
113
+ "scr_dir2_threshold_20": 0.03694577592471611,
114
+ "scr_dir1_threshold_50": 0.4062502910382569,
115
+ "scr_metric_threshold_50": 0.07881774339173903,
116
+ "scr_dir2_threshold_50": 0.07881774339173903,
117
+ "scr_dir1_threshold_100": 0.4062502910382569,
118
+ "scr_metric_threshold_100": 0.10591128304130484,
119
+ "scr_dir2_threshold_100": 0.10591128304130484,
120
+ "scr_dir1_threshold_500": 0.0937506402841651,
121
+ "scr_metric_threshold_500": 0.17241376779147183,
122
+ "scr_dir2_threshold_500": 0.17241376779147183
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.3738319995632706,
127
+ "scr_metric_threshold_2": 0.1325648513989227,
128
+ "scr_dir2_threshold_2": 0.1325648513989227,
129
+ "scr_dir1_threshold_5": 0.34579441855386867,
130
+ "scr_metric_threshold_5": 0.19884727709838407,
131
+ "scr_dir2_threshold_5": 0.19884727709838407,
132
+ "scr_dir1_threshold_10": 0.2242989769168465,
133
+ "scr_metric_threshold_10": 0.2708934890772342,
134
+ "scr_dir2_threshold_10": 0.2708934890772342,
135
+ "scr_dir1_threshold_20": 0.2149533022646424,
136
+ "scr_metric_threshold_20": 0.2997119051602506,
137
+ "scr_dir2_threshold_20": 0.2997119051602506,
138
+ "scr_dir1_threshold_50": 0.2616822325784525,
139
+ "scr_metric_threshold_50": 0.40057636145080794,
140
+ "scr_dir2_threshold_50": 0.40057636145080794,
141
+ "scr_dir1_threshold_100": -0.35514009320607276,
142
+ "scr_metric_threshold_100": 0.4495677718547213,
143
+ "scr_dir2_threshold_100": 0.4495677718547213,
144
+ "scr_dir1_threshold_500": -1.065420836671008,
145
+ "scr_metric_threshold_500": 0.24783868750229743,
146
+ "scr_dir2_threshold_500": 0.24783868750229743
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.4062502910382569,
151
+ "scr_metric_threshold_2": 0.0126583653752278,
152
+ "scr_dir2_threshold_2": 0.0126583653752278,
153
+ "scr_dir1_threshold_5": 0.5312505238688624,
154
+ "scr_metric_threshold_5": 0.030379805284471813,
155
+ "scr_dir2_threshold_5": 0.030379805284471813,
156
+ "scr_dir1_threshold_10": 0.5,
157
+ "scr_metric_threshold_10": 0.04810139609153522,
158
+ "scr_dir2_threshold_10": 0.04810139609153522,
159
+ "scr_dir1_threshold_20": 0.5468753201420825,
160
+ "scr_metric_threshold_20": 0.11139246847857727,
161
+ "scr_dir2_threshold_20": 0.11139246847857727,
162
+ "scr_dir1_threshold_50": 0.2968748544808716,
163
+ "scr_metric_threshold_50": 0.1620253263882109,
164
+ "scr_dir2_threshold_50": 0.1620253263882109,
165
+ "scr_dir1_threshold_100": 0.1250002328306055,
166
+ "scr_metric_threshold_100": 0.2050633461500911,
167
+ "scr_dir2_threshold_100": 0.2050633461500911,
168
+ "scr_dir1_threshold_500": -0.9531246798579175,
169
+ "scr_metric_threshold_500": 0.14683549919488467,
170
+ "scr_dir2_threshold_500": 0.14683549919488467
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.30708680264344185,
175
+ "scr_metric_threshold_2": 0.10089032002917621,
176
+ "scr_dir2_threshold_2": 0.10089032002917621,
177
+ "scr_dir1_threshold_5": 0.2362206535201696,
178
+ "scr_metric_threshold_5": 0.14243334096225593,
179
+ "scr_dir2_threshold_5": 0.14243334096225593,
180
+ "scr_dir1_threshold_10": 0.2362206535201696,
181
+ "scr_metric_threshold_10": 0.2017804631899768,
182
+ "scr_dir2_threshold_10": 0.2017804631899768,
183
+ "scr_dir1_threshold_20": 0.17322841667108618,
184
+ "scr_metric_threshold_20": 0.26112776228607326,
185
+ "scr_dir2_threshold_20": 0.26112776228607326,
186
+ "scr_dir1_threshold_50": -0.10236179822002743,
187
+ "scr_metric_threshold_50": 0.3560830871030764,
188
+ "scr_dir2_threshold_50": 0.3560830871030764,
189
+ "scr_dir1_threshold_100": -0.05511785524710812,
190
+ "scr_metric_threshold_100": 0.412463065461462,
191
+ "scr_dir2_threshold_100": 0.412463065461462,
192
+ "scr_dir1_threshold_500": -0.259842390342736,
193
+ "scr_metric_threshold_500": 0.1899110033707575,
194
+ "scr_dir2_threshold_500": 0.1899110033707575
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.048913057562533044,
199
+ "scr_metric_threshold_2": 0.1647058686033112,
200
+ "scr_dir2_threshold_2": 0.1647058686033112,
201
+ "scr_dir1_threshold_5": 0.08695636681213655,
202
+ "scr_metric_threshold_5": 0.3490195620110374,
203
+ "scr_dir2_threshold_5": 0.3490195620110374,
204
+ "scr_dir1_threshold_10": 0.08695636681213655,
205
+ "scr_metric_threshold_10": 0.443137167820971,
206
+ "scr_dir2_threshold_10": 0.443137167820971,
207
+ "scr_dir1_threshold_20": 0.10326082731240087,
208
+ "scr_metric_threshold_20": 0.5098039124022075,
209
+ "scr_dir2_threshold_20": 0.5098039124022075,
210
+ "scr_dir1_threshold_50": -0.005434712187334791,
211
+ "scr_metric_threshold_50": 0.6156863065922739,
212
+ "scr_dir2_threshold_50": 0.6156863065922739,
213
+ "scr_dir1_threshold_100": -0.0869566907503965,
214
+ "scr_metric_threshold_100": 0.6274508612286972,
215
+ "scr_dir2_threshold_100": 0.6274508612286972,
216
+ "scr_dir1_threshold_500": 0.0,
217
+ "scr_metric_threshold_500": 0.6901960875977925,
218
+ "scr_dir2_threshold_500": 0.6901960875977925
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.15463902322123155,
223
+ "scr_metric_threshold_2": 0.08870972781349516,
224
+ "scr_dir2_threshold_2": 0.08870972781349516,
225
+ "scr_dir1_threshold_5": 0.1958762189763978,
226
+ "scr_metric_threshold_5": 0.1290322658174608,
227
+ "scr_dir2_threshold_5": 0.1290322658174608,
228
+ "scr_dir1_threshold_10": 0.23711341473156403,
229
+ "scr_metric_threshold_10": 0.1491935348194436,
230
+ "scr_dir2_threshold_10": 0.1491935348194436,
231
+ "scr_dir1_threshold_20": 0.2886597558053079,
232
+ "scr_metric_threshold_20": 0.20967734182539205,
233
+ "scr_dir2_threshold_20": 0.20967734182539205,
234
+ "scr_dir1_threshold_50": 0.36082469475663487,
235
+ "scr_metric_threshold_50": 0.25403232590278196,
236
+ "scr_dir2_threshold_50": 0.25403232590278196,
237
+ "scr_dir1_threshold_100": 0.38659786529350676,
238
+ "scr_metric_threshold_100": 0.282258006369044,
239
+ "scr_dir2_threshold_100": 0.282258006369044,
240
+ "scr_dir1_threshold_500": 0.37113384007521244,
241
+ "scr_metric_threshold_500": 0.43951619299405154,
242
+ "scr_dir2_threshold_500": 0.43951619299405154
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.20270277526740202,
247
+ "scr_metric_threshold_2": 0.27111103575907436,
248
+ "scr_dir2_threshold_2": 0.27111103575907436,
249
+ "scr_dir1_threshold_5": 0.24774777677362747,
250
+ "scr_metric_threshold_5": 0.3422222834457521,
251
+ "scr_dir2_threshold_5": 0.3422222834457521,
252
+ "scr_dir1_threshold_10": 0.3153152790329657,
253
+ "scr_metric_threshold_10": 0.4000001059638017,
254
+ "scr_dir2_threshold_10": 0.4000001059638017,
255
+ "scr_dir1_threshold_20": 0.3603602805391911,
256
+ "scr_metric_threshold_20": 0.45777766357234706,
257
+ "scr_dir2_threshold_20": 0.45777766357234706,
258
+ "scr_dir1_threshold_50": 0.4189188898930392,
259
+ "scr_metric_threshold_50": 0.5333332980120661,
260
+ "scr_dir2_threshold_50": 0.5333332980120661,
261
+ "scr_dir1_threshold_100": 0.297297224732598,
262
+ "scr_metric_threshold_100": 0.5466667231806942,
263
+ "scr_dir2_threshold_100": 0.5466667231806942,
264
+ "scr_dir1_threshold_500": 0.3153152790329657,
265
+ "scr_metric_threshold_500": 0.595555507283157,
266
+ "scr_dir2_threshold_500": 0.595555507283157
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.060085820441166386,
271
+ "scr_metric_threshold_2": 0.060085820441166386,
272
+ "scr_dir2_threshold_2": 0.05714296256601923,
273
+ "scr_dir1_threshold_5": 0.0643776099411321,
274
+ "scr_metric_threshold_5": 0.0643776099411321,
275
+ "scr_dir2_threshold_5": 0.10476195612190681,
276
+ "scr_dir1_threshold_10": 0.10300422706858779,
277
+ "scr_metric_threshold_10": 0.10300422706858779,
278
+ "scr_dir2_threshold_10": 0.14761910708852366,
279
+ "scr_dir1_threshold_20": 0.15021467900985702,
280
+ "scr_metric_threshold_20": 0.15021467900985702,
281
+ "scr_dir2_threshold_20": 0.11904748388971893,
282
+ "scr_dir1_threshold_50": 0.17596567182353345,
283
+ "scr_metric_threshold_50": 0.17596567182353345,
284
+ "scr_dir2_threshold_50": 0.19047625805514054,
285
+ "scr_dir1_threshold_100": 0.21459228895098914,
286
+ "scr_metric_threshold_100": 0.21459228895098914,
287
+ "scr_dir2_threshold_100": 0.19999994323368195,
288
+ "scr_dir1_threshold_500": 0.21888407845095484,
289
+ "scr_metric_threshold_500": 0.21888407845095484,
290
+ "scr_dir2_threshold_500": 0.27142843356751334
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2",
296
+ "sae_lens_version": "5.4.1",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
old_relu_eval_results/scr/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "941954f3-cc39-43ab-951e-ef7dac65494f",
73
+ "datetime_epoch_millis": 1738802202746,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.1998397667116698,
77
+ "scr_metric_threshold_2": 0.09773468518068165,
78
+ "scr_dir2_threshold_2": 0.1036206457814039,
79
+ "scr_dir1_threshold_5": 0.19389433803907274,
80
+ "scr_metric_threshold_5": 0.1494009315204827,
81
+ "scr_dir2_threshold_5": 0.15724885650288437,
82
+ "scr_dir1_threshold_10": 0.20252891396157224,
83
+ "scr_metric_threshold_10": 0.20638626724773887,
84
+ "scr_dir2_threshold_10": 0.21464552766444467,
85
+ "scr_dir1_threshold_20": 0.1869289133935145,
86
+ "scr_metric_threshold_20": 0.2550003527011395,
87
+ "scr_dir2_threshold_20": 0.2671248172217763,
88
+ "scr_dir1_threshold_50": 0.1218689486768515,
89
+ "scr_metric_threshold_50": 0.29936929010341096,
90
+ "scr_dir2_threshold_50": 0.3102521588192187,
91
+ "scr_dir1_threshold_100": 0.0633538494619269,
92
+ "scr_metric_threshold_100": 0.2931972361072011,
93
+ "scr_dir2_threshold_100": 0.31354517528118225,
94
+ "scr_dir1_threshold_500": -0.1510882278136979,
95
+ "scr_metric_threshold_500": 0.270963096956086,
96
+ "scr_dir2_threshold_500": 0.290943143416725
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.3749997671693945,
103
+ "scr_metric_threshold_2": 0.012315258641572036,
104
+ "scr_dir2_threshold_2": 0.012315258641572036,
105
+ "scr_dir1_threshold_5": 0.39062549476503666,
106
+ "scr_metric_threshold_5": 0.02709353964956581,
107
+ "scr_dir2_threshold_5": 0.02709353964956581,
108
+ "scr_dir1_threshold_10": 0.4062502910382569,
109
+ "scr_metric_threshold_10": 0.03694577592471611,
110
+ "scr_dir2_threshold_10": 0.03694577592471611,
111
+ "scr_dir1_threshold_20": 0.4062502910382569,
112
+ "scr_metric_threshold_20": 0.051724056932709886,
113
+ "scr_dir2_threshold_20": 0.051724056932709886,
114
+ "scr_dir1_threshold_50": 0.3593749708961743,
115
+ "scr_metric_threshold_50": 0.07635457421585395,
116
+ "scr_dir2_threshold_50": 0.07635457421585395,
117
+ "scr_dir1_threshold_100": 0.3125005820765137,
118
+ "scr_metric_threshold_100": 0.10837430540772658,
119
+ "scr_dir2_threshold_100": 0.10837430540772658,
120
+ "scr_dir1_threshold_500": -0.10937450523496334,
121
+ "scr_metric_threshold_500": 0.06157629320786018,
122
+ "scr_dir2_threshold_500": 0.06157629320786018
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.35514009320607276,
127
+ "scr_metric_threshold_2": 0.16714705376132788,
128
+ "scr_dir2_threshold_2": 0.16714705376132788,
129
+ "scr_dir1_threshold_5": 0.2429903262212547,
130
+ "scr_metric_threshold_5": 0.22190207867332085,
131
+ "scr_dir2_threshold_5": 0.22190207867332085,
132
+ "scr_dir1_threshold_10": 0.2056076276124383,
133
+ "scr_metric_threshold_10": 0.3054755196683302,
134
+ "scr_dir2_threshold_10": 0.3054755196683302,
135
+ "scr_dir1_threshold_20": 0.14018679094143036,
136
+ "scr_metric_threshold_20": 0.36599433085971195,
137
+ "scr_dir2_threshold_20": 0.36599433085971195,
138
+ "scr_dir1_threshold_50": 0.04672893031381012,
139
+ "scr_metric_threshold_50": 0.4092219549842365,
140
+ "scr_dir2_threshold_50": 0.4092219549842365,
141
+ "scr_dir1_threshold_100": -0.35514009320607276,
142
+ "scr_metric_threshold_100": 0.20749287063181265,
143
+ "scr_dir2_threshold_100": 0.20749287063181265,
144
+ "scr_dir1_threshold_500": -0.8317756280491677,
145
+ "scr_metric_threshold_500": 0.18443806905687588,
146
+ "scr_dir2_threshold_500": 0.18443806905687588
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.42187508731147705,
151
+ "scr_metric_threshold_2": 0.017721590807063405,
152
+ "scr_dir2_threshold_2": 0.017721590807063405,
153
+ "scr_dir1_threshold_5": 0.43749988358469727,
154
+ "scr_metric_threshold_5": 0.030379805284471813,
155
+ "scr_dir2_threshold_5": 0.030379805284471813,
156
+ "scr_dir1_threshold_10": 0.3749997671693945,
157
+ "scr_metric_threshold_10": 0.06582283600077923,
158
+ "scr_dir2_threshold_10": 0.06582283600077923,
159
+ "scr_dir1_threshold_20": 0.42187508731147705,
160
+ "scr_metric_threshold_20": 0.11392408119449507,
161
+ "scr_dir2_threshold_20": 0.11392408119449507,
162
+ "scr_dir1_threshold_50": 0.18750034924590825,
163
+ "scr_metric_threshold_50": 0.1544304882404575,
164
+ "scr_dir2_threshold_50": 0.1544304882404575,
165
+ "scr_dir1_threshold_100": 0.2968748544808716,
166
+ "scr_metric_threshold_100": 0.18227852991119212,
167
+ "scr_dir2_threshold_100": 0.18227852991119212,
168
+ "scr_dir1_threshold_500": -0.5312495925464404,
169
+ "scr_metric_threshold_500": 0.13417728471747628,
170
+ "scr_dir2_threshold_500": 0.13417728471747628
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.2440945657943584,
175
+ "scr_metric_threshold_2": 0.12166174206152826,
176
+ "scr_dir2_threshold_2": 0.12166174206152826,
177
+ "scr_dir1_threshold_5": 0.1968506228214391,
178
+ "scr_metric_threshold_5": 0.18100904115762476,
179
+ "scr_dir2_threshold_5": 0.18100904115762476,
180
+ "scr_dir1_threshold_10": 0.2519684780685472,
181
+ "scr_metric_threshold_10": 0.23145402430383724,
182
+ "scr_dir2_threshold_10": 0.23145402430383724,
183
+ "scr_dir1_threshold_20": 0.007874381601975306,
184
+ "scr_metric_threshold_20": 0.2640950830237842,
185
+ "scr_dir2_threshold_20": 0.2640950830237842,
186
+ "scr_dir1_threshold_50": -0.13385791664456914,
187
+ "scr_metric_threshold_50": 0.35014844562765457,
188
+ "scr_dir2_threshold_50": 0.35014844562765457,
189
+ "scr_dir1_threshold_100": -0.05511785524710812,
190
+ "scr_metric_threshold_100": 0.41839770693688383,
191
+ "scr_dir2_threshold_100": 0.41839770693688383,
192
+ "scr_dir1_threshold_500": -0.259842390342736,
193
+ "scr_metric_threshold_500": 0.17210690207611637,
194
+ "scr_dir2_threshold_500": 0.17210690207611637
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.03260859706226871,
199
+ "scr_metric_threshold_2": 0.13725477363090466,
200
+ "scr_dir2_threshold_2": 0.13725477363090466,
201
+ "scr_dir1_threshold_5": 0.021739172687599125,
202
+ "scr_metric_threshold_5": 0.2823528174298009,
203
+ "scr_dir2_threshold_5": 0.2823528174298009,
204
+ "scr_dir1_threshold_10": 0.03260859706226871,
205
+ "scr_metric_threshold_10": 0.41176455463642336,
206
+ "scr_dir2_threshold_10": 0.41176455463642336,
207
+ "scr_dir1_threshold_20": 0.05434776974986783,
208
+ "scr_metric_threshold_20": 0.5254902189944813,
209
+ "scr_dir2_threshold_20": 0.5254902189944813,
210
+ "scr_dir1_threshold_50": -0.005434712187334791,
211
+ "scr_metric_threshold_50": 0.6117645546364233,
212
+ "scr_dir2_threshold_50": 0.6117645546364233,
213
+ "scr_dir1_threshold_100": -0.125,
214
+ "scr_metric_threshold_100": 0.6156863065922739,
215
+ "scr_dir2_threshold_100": 0.6156863065922739,
216
+ "scr_dir1_threshold_500": -0.016304460500264333,
217
+ "scr_metric_threshold_500": 0.643137167820971,
218
+ "scr_dir2_threshold_500": 0.643137167820971
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.07731951161061577,
223
+ "scr_metric_threshold_2": 0.060483807005948444,
224
+ "scr_dir2_threshold_2": 0.060483807005948444,
225
+ "scr_dir1_threshold_5": 0.12886585268435963,
226
+ "scr_metric_threshold_5": 0.12096785435318151,
227
+ "scr_dir2_threshold_5": 0.12096785435318151,
228
+ "scr_dir1_threshold_10": 0.15463902322123155,
229
+ "scr_metric_threshold_10": 0.1491935348194436,
230
+ "scr_dir2_threshold_10": 0.1491935348194436,
231
+ "scr_dir1_threshold_20": 0.190721646317109,
232
+ "scr_metric_threshold_20": 0.1935485188968335,
233
+ "scr_dir2_threshold_20": 0.1935485188968335,
234
+ "scr_dir1_threshold_50": 0.18556676641739225,
235
+ "scr_metric_threshold_50": 0.2661291834404855,
236
+ "scr_dir2_threshold_50": 0.2661291834404855,
237
+ "scr_dir1_threshold_100": 0.2010307916356866,
238
+ "scr_metric_threshold_100": 0.2862904524424683,
239
+ "scr_dir2_threshold_100": 0.2862904524424683,
240
+ "scr_dir1_threshold_500": 0.25257713270943044,
241
+ "scr_metric_threshold_500": 0.3629033227182599,
242
+ "scr_dir2_threshold_500": 0.3629033227182599
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.05405389441171559,
247
+ "scr_metric_threshold_2": 0.22666663840965287,
248
+ "scr_dir2_threshold_2": 0.22666663840965287,
249
+ "scr_dir1_threshold_5": 0.0810811101069608,
250
+ "scr_metric_threshold_5": 0.2800000741746612,
251
+ "scr_dir2_threshold_5": 0.2800000741746612,
252
+ "scr_dir1_threshold_10": 0.11261250376556366,
253
+ "scr_metric_threshold_10": 0.36888886887350414,
254
+ "scr_dir2_threshold_10": 0.36888886887350414,
255
+ "scr_dir1_threshold_20": 0.17117111311941172,
256
+ "scr_metric_threshold_20": 0.42222230463851246,
257
+ "scr_dir2_threshold_20": 0.42222230463851246,
258
+ "scr_dir1_threshold_50": 0.27927917043223033,
259
+ "scr_metric_threshold_50": 0.4711110887409752,
260
+ "scr_dir2_threshold_50": 0.4711110887409752,
261
+ "scr_dir1_threshold_100": 0.18018027451428928,
262
+ "scr_metric_threshold_100": 0.4755554754940165,
263
+ "scr_dir2_threshold_100": 0.4755554754940165,
264
+ "scr_dir1_threshold_500": 0.1756755595721568,
265
+ "scr_metric_threshold_500": 0.4977776741687272,
266
+ "scr_dir2_threshold_500": 0.4977776741687272
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.03862661712745569,
271
+ "scr_metric_threshold_2": 0.03862661712745569,
272
+ "scr_dir2_threshold_2": 0.08571430193323373,
273
+ "scr_dir1_threshold_5": 0.05150224144123495,
274
+ "scr_metric_threshold_5": 0.05150224144123495,
275
+ "scr_dir2_threshold_5": 0.11428564130044823,
276
+ "scr_dir1_threshold_10": 0.08154502375487709,
277
+ "scr_metric_threshold_10": 0.08154502375487709,
278
+ "scr_dir2_threshold_10": 0.14761910708852366,
279
+ "scr_dir1_threshold_20": 0.10300422706858779,
280
+ "scr_metric_threshold_20": 0.10300422706858779,
281
+ "scr_dir2_threshold_20": 0.19999994323368195,
282
+ "scr_dir1_threshold_50": 0.05579403094120067,
283
+ "scr_metric_threshold_50": 0.05579403094120067,
284
+ "scr_dir2_threshold_50": 0.14285698066766273,
285
+ "scr_dir1_threshold_100": 0.05150224144123495,
286
+ "scr_metric_threshold_100": 0.05150224144123495,
287
+ "scr_dir2_threshold_100": 0.21428575483308432,
288
+ "scr_dir1_threshold_500": 0.11158806188240133,
289
+ "scr_metric_threshold_500": 0.11158806188240133,
290
+ "scr_dir2_threshold_500": 0.27142843356751334
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3",
296
+ "sae_lens_version": "5.4.1",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
old_relu_eval_results/scr/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "c41f7329-1294-48a8-9743-0a7d0c4f653a",
73
+ "datetime_epoch_millis": 1738803092208,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.16662920298711476,
77
+ "scr_metric_threshold_2": 0.09319447246027145,
78
+ "scr_dir2_threshold_2": 0.09836765398605983,
79
+ "scr_dir1_threshold_5": 0.18595827828202774,
80
+ "scr_metric_threshold_5": 0.14338717270565676,
81
+ "scr_dir2_threshold_5": 0.14980960699387474,
82
+ "scr_dir1_threshold_10": 0.142764782657517,
83
+ "scr_metric_threshold_10": 0.18711664692078173,
84
+ "scr_dir2_threshold_10": 0.19460437162639055,
85
+ "scr_dir1_threshold_20": 0.16422745930077481,
86
+ "scr_metric_threshold_20": 0.22564147974603221,
87
+ "scr_dir2_threshold_20": 0.23467224389709979,
88
+ "scr_dir1_threshold_50": 0.07719326484466627,
89
+ "scr_metric_threshold_50": 0.27652624846177276,
90
+ "scr_dir2_threshold_50": 0.2778112550108285,
91
+ "scr_dir1_threshold_100": 0.048861431225419735,
92
+ "scr_metric_threshold_100": 0.3011672908990718,
93
+ "scr_dir2_threshold_100": 0.3098454762519983,
94
+ "scr_dir1_threshold_500": -0.16907412416599507,
95
+ "scr_metric_threshold_500": 0.2543551795776049,
96
+ "scr_dir2_threshold_500": 0.26570810838714465
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.32812537834973393,
103
+ "scr_metric_threshold_2": 0.017241303374415515,
104
+ "scr_dir2_threshold_2": 0.017241303374415515,
105
+ "scr_dir1_threshold_5": 0.3593749708961743,
106
+ "scr_metric_threshold_5": 0.03448275355829437,
107
+ "scr_dir2_threshold_5": 0.03448275355829437,
108
+ "scr_dir1_threshold_10": 0.3437501746229541,
109
+ "scr_metric_threshold_10": 0.044334989833444666,
110
+ "scr_dir2_threshold_10": 0.044334989833444666,
111
+ "scr_dir1_threshold_20": 0.3437501746229541,
112
+ "scr_metric_threshold_20": 0.06403931557428191,
113
+ "scr_dir2_threshold_20": 0.06403931557428191,
114
+ "scr_dir1_threshold_50": 0.2968748544808716,
115
+ "scr_metric_threshold_50": 0.07142852948301047,
116
+ "scr_dir2_threshold_50": 0.07142852948301047,
117
+ "scr_dir1_threshold_100": 0.2343756693879908,
118
+ "scr_metric_threshold_100": 0.09852206913257629,
119
+ "scr_dir2_threshold_100": 0.09852206913257629,
120
+ "scr_dir1_threshold_500": -0.14062502910382568,
121
+ "scr_metric_threshold_500": 0.05418707929913162,
122
+ "scr_dir2_threshold_500": 0.05418707929913162
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.2897198135878545,
127
+ "scr_metric_threshold_2": 0.1498558666944707,
128
+ "scr_dir2_threshold_2": 0.1498558666944707,
129
+ "scr_dir1_threshold_5": 0.33644874390166457,
130
+ "scr_metric_threshold_5": 0.21325648513989226,
131
+ "scr_dir2_threshold_5": 0.21325648513989226,
132
+ "scr_dir1_threshold_10": 0.16822437195083229,
133
+ "scr_metric_threshold_10": 0.23631128671482904,
134
+ "scr_dir2_threshold_10": 0.23631128671482904,
135
+ "scr_dir1_threshold_20": 0.1588786972986282,
136
+ "scr_metric_threshold_20": 0.328530321243267,
137
+ "scr_dir2_threshold_20": 0.328530321243267,
138
+ "scr_dir1_threshold_50": -0.3177573945972564,
139
+ "scr_metric_threshold_50": 0.37175794536779155,
140
+ "scr_dir2_threshold_50": 0.37175794536779155,
141
+ "scr_dir1_threshold_100": -0.2149533022646424,
142
+ "scr_metric_threshold_100": 0.4034583404761569,
143
+ "scr_dir2_threshold_100": 0.4034583404761569,
144
+ "scr_dir1_threshold_500": -0.9532710696861899,
145
+ "scr_metric_threshold_500": 0.19596546984434426,
146
+ "scr_dir2_threshold_500": 0.19596546984434426
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.3593749708961743,
151
+ "scr_metric_threshold_2": 0.025316579852636207,
152
+ "scr_dir2_threshold_2": 0.025316579852636207,
153
+ "scr_dir1_threshold_5": 0.39062549476503666,
154
+ "scr_metric_threshold_5": 0.04050640704596242,
155
+ "scr_dir2_threshold_5": 0.04050640704596242,
156
+ "scr_dir1_threshold_10": 0.2656252619344312,
157
+ "scr_metric_threshold_10": 0.06329122328486143,
158
+ "scr_dir2_threshold_10": 0.06329122328486143,
159
+ "scr_dir1_threshold_20": 0.3125005820765137,
160
+ "scr_metric_threshold_20": 0.10379747943300446,
161
+ "scr_dir2_threshold_20": 0.10379747943300446,
162
+ "scr_dir1_threshold_50": 0.21874994179234863,
163
+ "scr_metric_threshold_50": 0.14683549919488467,
164
+ "scr_dir2_threshold_50": 0.14683549919488467,
165
+ "scr_dir1_threshold_100": 0.15624982537704588,
166
+ "scr_metric_threshold_100": 0.1848101426271099,
167
+ "scr_dir2_threshold_100": 0.1848101426271099,
168
+ "scr_dir1_threshold_500": -0.4062493597158349,
169
+ "scr_metric_threshold_500": 0.10379747943300446,
170
+ "scr_dir2_threshold_500": 0.10379747943300446
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.1968506228214391,
175
+ "scr_metric_threshold_2": 0.11572710058610643,
176
+ "scr_dir2_threshold_2": 0.11572710058610643,
177
+ "scr_dir1_threshold_5": 0.22834674124598078,
178
+ "scr_metric_threshold_5": 0.16913958133840545,
179
+ "scr_dir2_threshold_5": 0.16913958133840545,
180
+ "scr_dir1_threshold_10": 0.07874006139746104,
181
+ "scr_metric_threshold_10": 0.22551938282841538,
182
+ "scr_dir2_threshold_10": 0.22551938282841538,
183
+ "scr_dir1_threshold_20": 0.08661444299943634,
184
+ "scr_metric_threshold_20": 0.26112776228607326,
185
+ "scr_dir2_threshold_20": 0.26112776228607326,
186
+ "scr_dir1_threshold_50": -0.023621736822566405,
187
+ "scr_metric_threshold_50": 0.33531166507072435,
188
+ "scr_dir2_threshold_50": 0.33531166507072435,
189
+ "scr_dir1_threshold_100": -0.09448788594583864,
190
+ "scr_metric_threshold_100": 0.3887241458230234,
191
+ "scr_dir2_threshold_100": 0.3887241458230234,
192
+ "scr_dir1_threshold_500": -0.2283462719181943,
193
+ "scr_metric_threshold_500": 0.1899110033707575,
194
+ "scr_dir2_threshold_500": 0.1899110033707575
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.027173884874933916,
199
+ "scr_metric_threshold_2": 0.12549021899448132,
200
+ "scr_dir2_threshold_2": 0.12549021899448132,
201
+ "scr_dir1_threshold_5": 0.0380433092496035,
202
+ "scr_metric_threshold_5": 0.24313716782097103,
203
+ "scr_dir2_threshold_5": 0.24313716782097103,
204
+ "scr_dir1_threshold_10": 0.0380433092496035,
205
+ "scr_metric_threshold_10": 0.3686273868154523,
206
+ "scr_dir2_threshold_10": 0.3686273868154523,
207
+ "scr_dir1_threshold_20": 0.06521719412453741,
208
+ "scr_metric_threshold_20": 0.41176455463642336,
209
+ "scr_dir2_threshold_20": 0.41176455463642336,
210
+ "scr_dir1_threshold_50": -0.07065223025013216,
211
+ "scr_metric_threshold_50": 0.5294117372066224,
212
+ "scr_dir2_threshold_50": 0.5294117372066224,
213
+ "scr_dir1_threshold_100": -0.0815219785630617,
214
+ "scr_metric_threshold_100": 0.5647058686033112,
215
+ "scr_dir2_threshold_100": 0.5647058686033112,
216
+ "scr_dir1_threshold_500": -0.03804363318786346,
217
+ "scr_metric_threshold_500": 0.5882352116198672,
218
+ "scr_dir2_threshold_500": 0.5882352116198672
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.05670091373303265,
223
+ "scr_metric_threshold_2": 0.06854845881151235,
224
+ "scr_dir2_threshold_2": 0.06854845881151235,
225
+ "scr_dir1_threshold_5": 0.08247408426990457,
226
+ "scr_metric_threshold_5": 0.14112912335516434,
227
+ "scr_dir2_threshold_5": 0.14112912335516434,
228
+ "scr_dir1_threshold_10": 0.1340204253436484,
229
+ "scr_metric_threshold_10": 0.16129039235714715,
230
+ "scr_dir2_threshold_10": 0.16129039235714715,
231
+ "scr_dir1_threshold_20": 0.14432987790265395,
232
+ "scr_metric_threshold_20": 0.18145166135912996,
233
+ "scr_dir2_threshold_20": 0.18145166135912996,
234
+ "scr_dir1_threshold_50": 0.12886585268435963,
235
+ "scr_metric_threshold_50": 0.23790326263293876,
236
+ "scr_dir2_threshold_50": 0.23790326263293876,
237
+ "scr_dir1_threshold_100": 0.12886585268435963,
238
+ "scr_metric_threshold_100": 0.2782258006369044,
239
+ "scr_dir2_threshold_100": 0.2782258006369044,
240
+ "scr_dir1_threshold_500": 0.1030926821474877,
241
+ "scr_metric_threshold_500": 0.35483867091269605,
242
+ "scr_dir2_threshold_500": 0.35483867091269605
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.045045001506225466,
247
+ "scr_metric_threshold_2": 0.21333321324102472,
248
+ "scr_dir2_threshold_2": 0.21333321324102472,
249
+ "scr_dir1_threshold_5": 0.01801805430036767,
250
+ "scr_metric_threshold_5": 0.27111103575907436,
251
+ "scr_dir2_threshold_5": 0.27111103575907436,
252
+ "scr_dir1_threshold_10": 0.045045001506225466,
253
+ "scr_metric_threshold_10": 0.32888885827712394,
254
+ "scr_dir2_threshold_10": 0.32888885827712394,
255
+ "scr_dir1_threshold_20": 0.1081080573128186,
256
+ "scr_metric_threshold_20": 0.36000009536742156,
257
+ "scr_dir2_threshold_20": 0.36000009536742156,
258
+ "scr_dir1_threshold_50": 0.24774777677362747,
259
+ "scr_metric_threshold_50": 0.38222229404213226,
260
+ "scr_dir2_threshold_50": 0.38222229404213226,
261
+ "scr_dir1_threshold_100": 0.19369361387252446,
262
+ "scr_metric_threshold_100": 0.42222230463851246,
263
+ "scr_dir2_threshold_100": 0.42222230463851246,
264
+ "scr_dir1_threshold_500": 0.22072082956776967,
265
+ "scr_metric_threshold_500": 0.45777766357234706,
266
+ "scr_dir2_threshold_500": 0.45777766357234706
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.030043038127524242,
271
+ "scr_metric_threshold_2": 0.030043038127524242,
272
+ "scr_dir2_threshold_2": 0.07142849033383136,
273
+ "scr_dir1_threshold_5": 0.03433482762748996,
274
+ "scr_metric_threshold_5": 0.03433482762748996,
275
+ "scr_dir2_threshold_5": 0.08571430193323373,
276
+ "scr_dir1_threshold_10": 0.06866965525497992,
277
+ "scr_metric_threshold_10": 0.06866965525497992,
278
+ "scr_dir2_threshold_10": 0.12857145289985059,
279
+ "scr_dir1_threshold_20": 0.09442064806865635,
280
+ "scr_metric_threshold_20": 0.09442064806865635,
281
+ "scr_dir2_threshold_20": 0.16666676127719673,
282
+ "scr_dir1_threshold_50": 0.13733905469607777,
283
+ "scr_metric_threshold_50": 0.13733905469607777,
284
+ "scr_dir2_threshold_50": 0.14761910708852366,
285
+ "scr_dir1_threshold_100": 0.06866965525497992,
286
+ "scr_metric_threshold_100": 0.06866965525497992,
287
+ "scr_dir2_threshold_100": 0.13809513807839202,
288
+ "scr_dir1_threshold_500": 0.09012885856869063,
289
+ "scr_metric_threshold_500": 0.09012885856869063,
290
+ "scr_dir2_threshold_500": 0.18095228904500887
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4",
296
+ "sae_lens_version": "5.4.1",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
old_relu_eval_results/scr/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "scr",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": true,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "f172b11d-e73d-4265-bcff-b86b3f4483ba",
73
+ "datetime_epoch_millis": 1738801762536,
74
+ "eval_result_metrics": {
75
+ "scr_metrics": {
76
+ "scr_dir1_threshold_2": 0.1194317497817165,
77
+ "scr_metric_threshold_2": 0.07554747805773677,
78
+ "scr_dir2_threshold_2": 0.08125716524775614,
79
+ "scr_dir1_threshold_5": 0.14323898317939607,
80
+ "scr_metric_threshold_5": 0.11980903429955778,
81
+ "scr_dir2_threshold_5": 0.12575375153644316,
82
+ "scr_dir1_threshold_10": 0.1363385070457653,
83
+ "scr_metric_threshold_10": 0.1432143223046469,
84
+ "scr_dir2_threshold_10": 0.1479021298215021,
85
+ "scr_dir1_threshold_20": 0.09621199123652632,
86
+ "scr_metric_threshold_20": 0.16714578684669865,
87
+ "scr_dir2_threshold_20": 0.18160776541995755,
88
+ "scr_dir1_threshold_50": 0.009138466559556352,
89
+ "scr_metric_threshold_50": 0.22111083513489554,
90
+ "scr_dir2_threshold_50": 0.2409887142151447,
91
+ "scr_dir1_threshold_100": 0.0010560166167327316,
92
+ "scr_metric_threshold_100": 0.24574769061347318,
93
+ "scr_dir2_threshold_100": 0.2606209691420875,
94
+ "scr_dir1_threshold_500": -0.12225683433428539,
95
+ "scr_metric_threshold_500": 0.20851088490922143,
96
+ "scr_dir2_threshold_500": 0.22618408062658932
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_professor_nurse_results",
102
+ "scr_dir1_threshold_2": 0.18750034924590825,
103
+ "scr_metric_threshold_2": 0.017241303374415515,
104
+ "scr_dir2_threshold_2": 0.017241303374415515,
105
+ "scr_dir1_threshold_5": 0.20312514551912844,
106
+ "scr_metric_threshold_5": 0.049261034566288144,
107
+ "scr_dir2_threshold_5": 0.049261034566288144,
108
+ "scr_dir1_threshold_10": 0.2968748544808716,
109
+ "scr_metric_threshold_10": 0.06157629320786018,
110
+ "scr_dir2_threshold_10": 0.06157629320786018,
111
+ "scr_dir1_threshold_20": 0.20312514551912844,
112
+ "scr_metric_threshold_20": 0.05911327084143844,
113
+ "scr_dir2_threshold_20": 0.05911327084143844,
114
+ "scr_dir1_threshold_50": 0.0937506402841651,
115
+ "scr_metric_threshold_50": 0.08128076575816078,
116
+ "scr_dir2_threshold_50": 0.08128076575816078,
117
+ "scr_dir1_threshold_100": 0.14062502910382568,
118
+ "scr_metric_threshold_100": 0.08620681049100425,
119
+ "scr_dir2_threshold_100": 0.08620681049100425,
120
+ "scr_dir1_threshold_500": -0.28125005820765137,
121
+ "scr_metric_threshold_500": 0.06403931557428191,
122
+ "scr_dir2_threshold_500": 0.06403931557428191
123
+ },
124
+ {
125
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_architect_journalist_results",
126
+ "scr_dir1_threshold_2": 0.2336446515690506,
127
+ "scr_metric_threshold_2": 0.11239202884933491,
128
+ "scr_dir2_threshold_2": 0.11239202884933491,
129
+ "scr_dir1_threshold_5": 0.2710279072306566,
130
+ "scr_metric_threshold_5": 0.1325648513989227,
131
+ "scr_dir2_threshold_5": 0.1325648513989227,
132
+ "scr_dir1_threshold_10": 0.14953246559363445,
133
+ "scr_metric_threshold_10": 0.16426524650728808,
134
+ "scr_dir2_threshold_10": 0.16426524650728808,
135
+ "scr_dir1_threshold_20": 0.16822437195083229,
136
+ "scr_metric_threshold_20": 0.1757924755234473,
137
+ "scr_dir2_threshold_20": 0.1757924755234473,
138
+ "scr_dir1_threshold_50": -0.2056076276124383,
139
+ "scr_metric_threshold_50": 0.3025937124142904,
140
+ "scr_dir2_threshold_50": 0.3025937124142904,
141
+ "scr_dir1_threshold_100": -0.24299088327404436,
142
+ "scr_metric_threshold_100": 0.328530321243267,
143
+ "scr_dir2_threshold_100": 0.328530321243267,
144
+ "scr_dir1_threshold_500": -0.46728986019089086,
145
+ "scr_metric_threshold_500": 0.21902027141928104,
146
+ "scr_dir2_threshold_500": 0.21902027141928104
147
+ },
148
+ {
149
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_surgeon_psychologist_results",
150
+ "scr_dir1_threshold_2": 0.250000465661211,
151
+ "scr_metric_threshold_2": 0.025316579852636207,
152
+ "scr_dir2_threshold_2": 0.025316579852636207,
153
+ "scr_dir1_threshold_5": 0.3125005820765137,
154
+ "scr_metric_threshold_5": 0.04303801976188022,
155
+ "scr_dir2_threshold_5": 0.04303801976188022,
156
+ "scr_dir1_threshold_10": 0.28125005820765137,
157
+ "scr_metric_threshold_10": 0.055696234239288635,
158
+ "scr_dir2_threshold_10": 0.055696234239288635,
159
+ "scr_dir1_threshold_20": 0.1250002328306055,
160
+ "scr_metric_threshold_20": 0.08860765223967824,
161
+ "scr_dir2_threshold_20": 0.08860765223967824,
162
+ "scr_dir1_threshold_50": 0.03125052386886235,
163
+ "scr_metric_threshold_50": 0.1265822956719035,
164
+ "scr_dir2_threshold_50": 0.1265822956719035,
165
+ "scr_dir1_threshold_100": -0.07812491268852294,
166
+ "scr_metric_threshold_100": 0.15696210095637528,
167
+ "scr_dir2_threshold_100": 0.15696210095637528,
168
+ "scr_dir1_threshold_500": -0.3593749708961743,
169
+ "scr_metric_threshold_500": 0.10126586671708666,
170
+ "scr_dir2_threshold_500": 0.10126586671708666
171
+ },
172
+ {
173
+ "dataset_name": "LabHC/bias_in_bios_class_set1_scr_attorney_teacher_results",
174
+ "scr_dir1_threshold_2": 0.14960621052073325,
175
+ "scr_metric_threshold_2": 0.07715140039073762,
176
+ "scr_dir2_threshold_2": 0.07715140039073762,
177
+ "scr_dir1_threshold_5": 0.16535450439689736,
178
+ "scr_metric_threshold_5": 0.13946602022454502,
179
+ "scr_dir2_threshold_5": 0.13946602022454502,
180
+ "scr_dir1_threshold_10": 0.07874006139746104,
181
+ "scr_metric_threshold_10": 0.15727012151918615,
182
+ "scr_dir2_threshold_10": 0.15727012151918615,
183
+ "scr_dir1_threshold_20": 0.07874006139746104,
184
+ "scr_metric_threshold_20": 0.1988131424522659,
185
+ "scr_dir2_threshold_20": 0.1988131424522659,
186
+ "scr_dir1_threshold_50": 0.007874381601975306,
187
+ "scr_metric_threshold_50": 0.26112776228607326,
188
+ "scr_dir2_threshold_50": 0.26112776228607326,
189
+ "scr_dir1_threshold_100": -0.07874006139746104,
190
+ "scr_metric_threshold_100": 0.2759645428430035,
191
+ "scr_dir2_threshold_100": 0.2759645428430035,
192
+ "scr_dir1_threshold_500": -0.18110232894527498,
193
+ "scr_metric_threshold_500": 0.1364985226184585,
194
+ "scr_dir2_threshold_500": 0.1364985226184585
195
+ },
196
+ {
197
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Books_CDs_and_Vinyl_results",
198
+ "scr_dir1_threshold_2": 0.027173884874933916,
199
+ "scr_metric_threshold_2": 0.10588216044635698,
200
+ "scr_dir2_threshold_2": 0.10588216044635698,
201
+ "scr_dir1_threshold_5": 0.05434776974986783,
202
+ "scr_metric_threshold_5": 0.21568630659227384,
203
+ "scr_dir2_threshold_5": 0.21568630659227384,
204
+ "scr_dir1_threshold_10": 0.059782481937202626,
205
+ "scr_metric_threshold_10": 0.262744992625386,
206
+ "scr_dir2_threshold_10": 0.262744992625386,
207
+ "scr_dir1_threshold_20": 0.06521719412453741,
208
+ "scr_metric_threshold_20": 0.3647058686033112,
209
+ "scr_dir2_threshold_20": 0.3647058686033112,
210
+ "scr_dir1_threshold_50": -0.027173884874933916,
211
+ "scr_metric_threshold_50": 0.45490195620110374,
212
+ "scr_dir2_threshold_50": 0.45490195620110374,
213
+ "scr_dir1_threshold_100": 0.0,
214
+ "scr_metric_threshold_100": 0.4823528174298009,
215
+ "scr_dir2_threshold_100": 0.4823528174298009,
216
+ "scr_dir1_threshold_500": 0.05434776974986783,
217
+ "scr_metric_threshold_500": 0.5137254306143486,
218
+ "scr_dir2_threshold_500": 0.5137254306143486
219
+ },
220
+ {
221
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Software_Electronics_results",
222
+ "scr_dir1_threshold_2": 0.04123688851473832,
223
+ "scr_metric_threshold_2": 0.08064531634921589,
224
+ "scr_dir2_threshold_2": 0.08064531634921589,
225
+ "scr_dir1_threshold_5": 0.08762865692919337,
226
+ "scr_metric_threshold_5": 0.10887099681547797,
227
+ "scr_dir2_threshold_5": 0.10887099681547797,
228
+ "scr_dir1_threshold_10": 0.0979381094881989,
229
+ "scr_metric_threshold_10": 0.10483879108333834,
230
+ "scr_dir2_threshold_10": 0.10483879108333834,
231
+ "scr_dir1_threshold_20": 0.030927743196160724,
232
+ "scr_metric_threshold_20": 0.1572581866250075,
233
+ "scr_dir2_threshold_20": 0.1572581866250075,
234
+ "scr_dir1_threshold_50": 0.09278322958848217,
235
+ "scr_metric_threshold_50": 0.20967734182539205,
236
+ "scr_dir2_threshold_50": 0.20967734182539205,
237
+ "scr_dir1_threshold_100": 0.13917499800293723,
238
+ "scr_metric_threshold_100": 0.2419354683650784,
239
+ "scr_dir2_threshold_100": 0.2419354683650784,
240
+ "scr_dir1_threshold_500": 0.04639146117402712,
241
+ "scr_metric_threshold_500": 0.2701613891726251,
242
+ "scr_dir2_threshold_500": 0.2701613891726251
243
+ },
244
+ {
245
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Pet_Supplies_Office_Products_results",
246
+ "scr_dir1_threshold_2": 0.0405405550534804,
247
+ "scr_metric_threshold_2": 0.16000004238552068,
248
+ "scr_dir2_threshold_2": 0.16000004238552068,
249
+ "scr_dir1_threshold_5": 0.009008892905490125,
250
+ "scr_metric_threshold_5": 0.22666663840965287,
251
+ "scr_dir2_threshold_5": 0.22666663840965287,
252
+ "scr_dir1_threshold_10": 0.045045001506225466,
253
+ "scr_metric_threshold_10": 0.25777787549995046,
254
+ "scr_dir2_threshold_10": 0.25777787549995046,
255
+ "scr_dir1_threshold_20": 0.08558555655970586,
256
+ "scr_metric_threshold_20": 0.2800000741746612,
257
+ "scr_dir2_threshold_20": 0.2800000741746612,
258
+ "scr_dir1_threshold_50": 0.06306305580659313,
259
+ "scr_metric_threshold_50": 0.3155554331084958,
260
+ "scr_dir2_threshold_50": 0.3155554331084958,
261
+ "scr_dir1_threshold_100": 0.08558555655970586,
262
+ "scr_metric_threshold_100": 0.3511110569518347,
263
+ "scr_dir2_threshold_100": 0.3511110569518347,
264
+ "scr_dir1_threshold_500": 0.18018027451428928,
265
+ "scr_metric_threshold_500": 0.33333324503016526,
266
+ "scr_dir2_threshold_500": 0.33333324503016526
267
+ },
268
+ {
269
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_scr_Industrial_and_Scientific_Toys_and_Games_results",
270
+ "scr_dir1_threshold_2": 0.025750992813676425,
271
+ "scr_metric_threshold_2": 0.025750992813676425,
272
+ "scr_dir2_threshold_2": 0.07142849033383136,
273
+ "scr_dir1_threshold_5": 0.042918406627421406,
274
+ "scr_metric_threshold_5": 0.042918406627421406,
275
+ "scr_dir2_threshold_5": 0.09047614452250444,
276
+ "scr_dir1_threshold_10": 0.08154502375487709,
277
+ "scr_metric_threshold_10": 0.08154502375487709,
278
+ "scr_dir2_threshold_10": 0.11904748388971893,
279
+ "scr_dir1_threshold_20": 0.012875624313779262,
280
+ "scr_metric_threshold_20": 0.012875624313779262,
281
+ "scr_dir2_threshold_20": 0.12857145289985059,
282
+ "scr_dir1_threshold_50": 0.01716741381374498,
283
+ "scr_metric_threshold_50": 0.01716741381374498,
284
+ "scr_dir2_threshold_50": 0.17619044645573817,
285
+ "scr_dir1_threshold_100": 0.042918406627421406,
286
+ "scr_metric_threshold_100": 0.042918406627421406,
287
+ "scr_dir2_threshold_100": 0.1619046348563358,
288
+ "scr_dir1_threshold_500": 0.030043038127524242,
289
+ "scr_metric_threshold_500": 0.030043038127524242,
290
+ "scr_dir2_threshold_500": 0.17142860386646747
291
+ }
292
+ ],
293
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
294
+ "sae_lens_id": "custom_sae",
295
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5",
296
+ "sae_lens_version": "5.4.1",
297
+ "sae_cfg_dict": {
298
+ "model_name": "gemma-2-2b",
299
+ "d_in": 2304,
300
+ "d_sae": 16384,
301
+ "hook_layer": 12,
302
+ "hook_name": "blocks.12.hook_resid_post",
303
+ "context_size": null,
304
+ "hook_head_index": null,
305
+ "architecture": "standard",
306
+ "apply_b_dec_to_input": null,
307
+ "finetuning_scaling_factor": null,
308
+ "activation_fn_str": "",
309
+ "prepend_bos": true,
310
+ "normalize_activations": "none",
311
+ "dtype": "bfloat16",
312
+ "device": "",
313
+ "dataset_path": "",
314
+ "dataset_trust_remote_code": true,
315
+ "seqpos_slice": [
316
+ null
317
+ ],
318
+ "training_tokens": -100000,
319
+ "sae_lens_training_version": null,
320
+ "neuronpedia_id": null
321
+ },
322
+ "eval_result_unstructured": null
323
+ }
old_relu_eval_results/sparse_probing/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "1d2f727d-ba91-4f28-ae1e-d5e769b99804",
30
+ "datetime_epoch_millis": 1738809581267,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9598687920719384,
34
+ "llm_top_1_test_accuracy": 0.6589812500000001,
35
+ "llm_top_2_test_accuracy": 0.7174812500000001,
36
+ "llm_top_5_test_accuracy": 0.7826062500000001,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.959237540513277,
44
+ "sae_top_1_test_accuracy": 0.7985375,
45
+ "sae_top_2_test_accuracy": 0.83240625,
46
+ "sae_top_5_test_accuracy": 0.8882062500000001,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.9694000363349915,
57
+ "llm_top_1_test_accuracy": 0.6436000000000001,
58
+ "llm_top_2_test_accuracy": 0.6874,
59
+ "llm_top_5_test_accuracy": 0.7908,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9688000321388245,
65
+ "sae_top_1_test_accuracy": 0.8117999999999999,
66
+ "sae_top_2_test_accuracy": 0.8220000000000001,
67
+ "sae_top_5_test_accuracy": 0.8633999999999998,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9544000387191772,
76
+ "llm_top_1_test_accuracy": 0.67,
77
+ "llm_top_2_test_accuracy": 0.7148,
78
+ "llm_top_5_test_accuracy": 0.7716,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9506000518798828,
84
+ "sae_top_1_test_accuracy": 0.7866,
85
+ "sae_top_2_test_accuracy": 0.8413999999999999,
86
+ "sae_top_5_test_accuracy": 0.8710000000000001,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9320000410079956,
95
+ "llm_top_1_test_accuracy": 0.6918,
96
+ "llm_top_2_test_accuracy": 0.7338,
97
+ "llm_top_5_test_accuracy": 0.765,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9304000496864319,
103
+ "sae_top_1_test_accuracy": 0.8116,
104
+ "sae_top_2_test_accuracy": 0.8392000000000002,
105
+ "sae_top_5_test_accuracy": 0.8754,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9196000456809997,
114
+ "llm_top_1_test_accuracy": 0.6048,
115
+ "llm_top_2_test_accuracy": 0.6406000000000001,
116
+ "llm_top_5_test_accuracy": 0.6696,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9196000337600708,
122
+ "sae_top_1_test_accuracy": 0.7326,
123
+ "sae_top_2_test_accuracy": 0.776,
124
+ "sae_top_5_test_accuracy": 0.8576,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9820000529289246,
133
+ "llm_top_1_test_accuracy": 0.672,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9805000424385071,
141
+ "sae_top_1_test_accuracy": 0.733,
142
+ "sae_top_2_test_accuracy": 0.773,
143
+ "sae_top_5_test_accuracy": 0.909,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.969200050830841,
152
+ "llm_top_1_test_accuracy": 0.6428,
153
+ "llm_top_2_test_accuracy": 0.6920000000000001,
154
+ "llm_top_5_test_accuracy": 0.7656000000000001,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9714000463485718,
160
+ "sae_top_1_test_accuracy": 0.7604,
161
+ "sae_top_2_test_accuracy": 0.8074,
162
+ "sae_top_5_test_accuracy": 0.8694000000000001,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9527500420808792,
171
+ "llm_top_1_test_accuracy": 0.69725,
172
+ "llm_top_2_test_accuracy": 0.76625,
173
+ "llm_top_5_test_accuracy": 0.8192499999999999,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9530000388622284,
179
+ "sae_top_1_test_accuracy": 0.8065,
180
+ "sae_top_2_test_accuracy": 0.8452500000000001,
181
+ "sae_top_5_test_accuracy": 0.86225,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9996000289916992,
190
+ "llm_top_1_test_accuracy": 0.6496000000000001,
191
+ "llm_top_2_test_accuracy": 0.7809999999999999,
192
+ "llm_top_5_test_accuracy": 0.913,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9996000289916992,
198
+ "sae_top_1_test_accuracy": 0.9458,
199
+ "sae_top_2_test_accuracy": 0.9549999999999998,
200
+ "sae_top_5_test_accuracy": 0.9975999999999999,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0",
210
+ "sae_lens_version": "5.4.1",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.956000030040741,
240
+ "1": 0.968000054359436,
241
+ "2": 0.9570000171661377,
242
+ "6": 0.9880000352859497,
243
+ "9": 0.9750000238418579
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9520000219345093,
249
+ "6": 0.9930000305175781,
250
+ "9": 0.984000027179718
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.568,
254
+ "1": 0.629,
255
+ "2": 0.679,
256
+ "6": 0.791,
257
+ "9": 0.551
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.585,
261
+ "1": 0.666,
262
+ "2": 0.673,
263
+ "6": 0.801,
264
+ "9": 0.712
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.72,
268
+ "1": 0.707,
269
+ "2": 0.764,
270
+ "6": 0.899,
271
+ "9": 0.864
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.625,
275
+ "1": 0.663,
276
+ "2": 0.863,
277
+ "6": 0.973,
278
+ "9": 0.935
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.66,
282
+ "1": 0.665,
283
+ "2": 0.863,
284
+ "6": 0.983,
285
+ "9": 0.939
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.741,
289
+ "1": 0.771,
290
+ "2": 0.876,
291
+ "6": 0.986,
292
+ "9": 0.943
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9650000333786011,
298
+ "13": 0.9490000605583191,
299
+ "14": 0.9540000557899475,
300
+ "18": 0.9300000667572021,
301
+ "19": 0.9550000429153442
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.971000075340271,
305
+ "13": 0.9520000219345093,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9360000491142273,
308
+ "19": 0.9570000171661377
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.561,
312
+ "13": 0.672,
313
+ "14": 0.631,
314
+ "18": 0.7,
315
+ "19": 0.786
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.69,
319
+ "13": 0.72,
320
+ "14": 0.677,
321
+ "18": 0.721,
322
+ "19": 0.766
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.776,
326
+ "13": 0.742,
327
+ "14": 0.768,
328
+ "18": 0.731,
329
+ "19": 0.841
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.729,
333
+ "13": 0.751,
334
+ "14": 0.878,
335
+ "18": 0.732,
336
+ "19": 0.843
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.857,
340
+ "13": 0.773,
341
+ "14": 0.88,
342
+ "18": 0.855,
343
+ "19": 0.842
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.933,
347
+ "13": 0.831,
348
+ "14": 0.888,
349
+ "18": 0.856,
350
+ "19": 0.847
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9530000686645508,
356
+ "21": 0.9290000200271606,
357
+ "22": 0.9140000343322754,
358
+ "25": 0.968000054359436,
359
+ "26": 0.8880000710487366
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.956000030040741,
363
+ "21": 0.9260000586509705,
364
+ "22": 0.9170000553131104,
365
+ "25": 0.9640000462532043,
366
+ "26": 0.8970000147819519
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.699,
370
+ "21": 0.772,
371
+ "22": 0.641,
372
+ "25": 0.703,
373
+ "26": 0.644
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.811,
377
+ "21": 0.757,
378
+ "22": 0.655,
379
+ "25": 0.762,
380
+ "26": 0.684
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.861,
384
+ "21": 0.788,
385
+ "22": 0.712,
386
+ "25": 0.796,
387
+ "26": 0.668
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.856,
391
+ "21": 0.77,
392
+ "22": 0.844,
393
+ "25": 0.881,
394
+ "26": 0.707
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.899,
398
+ "21": 0.805,
399
+ "22": 0.846,
400
+ "25": 0.886,
401
+ "26": 0.76
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.92,
405
+ "21": 0.848,
406
+ "22": 0.891,
407
+ "25": 0.891,
408
+ "26": 0.827
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9570000171661377,
414
+ "2": 0.9410000443458557,
415
+ "3": 0.9240000247955322,
416
+ "5": 0.909000039100647,
417
+ "6": 0.8670000433921814
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.956000030040741,
421
+ "2": 0.937000036239624,
422
+ "3": 0.9160000681877136,
423
+ "5": 0.9170000553131104,
424
+ "6": 0.8720000386238098
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.665,
428
+ "2": 0.596,
429
+ "3": 0.599,
430
+ "5": 0.576,
431
+ "6": 0.588
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.73,
435
+ "2": 0.632,
436
+ "3": 0.617,
437
+ "5": 0.615,
438
+ "6": 0.609
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.758,
442
+ "2": 0.646,
443
+ "3": 0.627,
444
+ "5": 0.646,
445
+ "6": 0.671
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.845,
449
+ "2": 0.669,
450
+ "3": 0.668,
451
+ "5": 0.717,
452
+ "6": 0.764
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.876,
456
+ "2": 0.698,
457
+ "3": 0.672,
458
+ "5": 0.856,
459
+ "6": 0.778
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.91,
463
+ "2": 0.907,
464
+ "3": 0.802,
465
+ "5": 0.879,
466
+ "6": 0.79
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9810000658035278,
472
+ "5.0": 0.9800000190734863
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.672,
480
+ "5.0": 0.672
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.733,
492
+ "5.0": 0.733
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.773,
496
+ "5.0": 0.773
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.909,
500
+ "5.0": 0.909
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9550000429153442,
506
+ "Python": 0.9880000352859497,
507
+ "HTML": 0.9890000224113464,
508
+ "Java": 0.9670000672340393,
509
+ "PHP": 0.9580000638961792
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9530000686645508,
513
+ "Python": 0.9860000610351562,
514
+ "HTML": 0.9880000352859497,
515
+ "Java": 0.9640000462532043,
516
+ "PHP": 0.9550000429153442
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.657,
520
+ "Python": 0.637,
521
+ "HTML": 0.714,
522
+ "Java": 0.624,
523
+ "PHP": 0.582
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.658,
527
+ "Python": 0.664,
528
+ "HTML": 0.801,
529
+ "Java": 0.697,
530
+ "PHP": 0.64
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.752,
534
+ "Python": 0.726,
535
+ "HTML": 0.928,
536
+ "Java": 0.728,
537
+ "PHP": 0.694
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.611,
541
+ "Python": 0.934,
542
+ "HTML": 0.68,
543
+ "Java": 0.655,
544
+ "PHP": 0.922
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.641,
548
+ "Python": 0.931,
549
+ "HTML": 0.887,
550
+ "Java": 0.655,
551
+ "PHP": 0.923
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.756,
555
+ "Python": 0.937,
556
+ "HTML": 0.93,
557
+ "Java": 0.805,
558
+ "PHP": 0.919
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9380000233650208,
564
+ "1": 0.984000027179718,
565
+ "2": 0.9360000491142273,
566
+ "3": 0.9540000557899475
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.9380000233650208,
570
+ "1": 0.9880000352859497,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9550000429153442
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.805,
576
+ "1": 0.67,
577
+ "2": 0.648,
578
+ "3": 0.666
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.794,
582
+ "1": 0.795,
583
+ "2": 0.686,
584
+ "3": 0.79
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.818,
588
+ "1": 0.867,
589
+ "2": 0.756,
590
+ "3": 0.836
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.86,
594
+ "1": 0.958,
595
+ "2": 0.801,
596
+ "3": 0.607
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.859,
600
+ "1": 0.965,
601
+ "2": 0.833,
602
+ "3": 0.724
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.871,
606
+ "1": 0.964,
607
+ "2": 0.835,
608
+ "3": 0.779
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.999000072479248,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 1.0,
617
+ "nl": 0.999000072479248
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 0.999000072479248,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.748,
628
+ "fr": 0.59,
629
+ "de": 0.754,
630
+ "es": 0.494,
631
+ "nl": 0.662
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.596,
636
+ "de": 0.831,
637
+ "es": 0.91,
638
+ "nl": 0.746
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.887,
642
+ "fr": 0.922,
643
+ "de": 0.909,
644
+ "es": 0.982,
645
+ "nl": 0.865
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.995,
649
+ "fr": 0.993,
650
+ "de": 0.903,
651
+ "es": 0.92,
652
+ "nl": 0.918
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.997,
656
+ "fr": 0.992,
657
+ "de": 0.908,
658
+ "es": 0.944,
659
+ "nl": 0.934
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.999,
663
+ "fr": 0.998,
664
+ "de": 0.995,
665
+ "es": 0.997,
666
+ "nl": 0.999
667
+ }
668
+ }
669
+ }
670
+ }
old_relu_eval_results/sparse_probing/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "12d3597f-e01f-4acd-b4e4-3916a8253103",
30
+ "datetime_epoch_millis": 1738809822946,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9598687920719384,
34
+ "llm_top_1_test_accuracy": 0.6589812500000001,
35
+ "llm_top_2_test_accuracy": 0.7174812500000001,
36
+ "llm_top_5_test_accuracy": 0.7826062500000001,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9591187916696072,
44
+ "sae_top_1_test_accuracy": 0.7973625000000001,
45
+ "sae_top_2_test_accuracy": 0.8503124999999999,
46
+ "sae_top_5_test_accuracy": 0.8916437500000002,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.9694000363349915,
57
+ "llm_top_1_test_accuracy": 0.6436000000000001,
58
+ "llm_top_2_test_accuracy": 0.6874,
59
+ "llm_top_5_test_accuracy": 0.7908,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9698000431060791,
65
+ "sae_top_1_test_accuracy": 0.8183999999999999,
66
+ "sae_top_2_test_accuracy": 0.8507999999999999,
67
+ "sae_top_5_test_accuracy": 0.8998000000000002,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9544000387191772,
76
+ "llm_top_1_test_accuracy": 0.67,
77
+ "llm_top_2_test_accuracy": 0.7148,
78
+ "llm_top_5_test_accuracy": 0.7716,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9524000406265258,
84
+ "sae_top_1_test_accuracy": 0.7678,
85
+ "sae_top_2_test_accuracy": 0.7921999999999999,
86
+ "sae_top_5_test_accuracy": 0.885,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9320000410079956,
95
+ "llm_top_1_test_accuracy": 0.6918,
96
+ "llm_top_2_test_accuracy": 0.7338,
97
+ "llm_top_5_test_accuracy": 0.765,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9310000538825989,
103
+ "sae_top_1_test_accuracy": 0.8183999999999999,
104
+ "sae_top_2_test_accuracy": 0.8488,
105
+ "sae_top_5_test_accuracy": 0.8701999999999999,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9196000456809997,
114
+ "llm_top_1_test_accuracy": 0.6048,
115
+ "llm_top_2_test_accuracy": 0.6406000000000001,
116
+ "llm_top_5_test_accuracy": 0.6696,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9168000459671021,
122
+ "sae_top_1_test_accuracy": 0.737,
123
+ "sae_top_2_test_accuracy": 0.7807999999999999,
124
+ "sae_top_5_test_accuracy": 0.8092,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9820000529289246,
133
+ "llm_top_1_test_accuracy": 0.672,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9775000512599945,
141
+ "sae_top_1_test_accuracy": 0.773,
142
+ "sae_top_2_test_accuracy": 0.903,
143
+ "sae_top_5_test_accuracy": 0.93,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.969200050830841,
152
+ "llm_top_1_test_accuracy": 0.6428,
153
+ "llm_top_2_test_accuracy": 0.6920000000000001,
154
+ "llm_top_5_test_accuracy": 0.7656000000000001,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9718000411987304,
160
+ "sae_top_1_test_accuracy": 0.7976000000000001,
161
+ "sae_top_2_test_accuracy": 0.8288,
162
+ "sae_top_5_test_accuracy": 0.8718,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9527500420808792,
171
+ "llm_top_1_test_accuracy": 0.69725,
172
+ "llm_top_2_test_accuracy": 0.76625,
173
+ "llm_top_5_test_accuracy": 0.8192499999999999,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9542500376701355,
179
+ "sae_top_1_test_accuracy": 0.7125,
180
+ "sae_top_2_test_accuracy": 0.8314999999999999,
181
+ "sae_top_5_test_accuracy": 0.87075,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9996000289916992,
190
+ "llm_top_1_test_accuracy": 0.6496000000000001,
191
+ "llm_top_2_test_accuracy": 0.7809999999999999,
192
+ "llm_top_5_test_accuracy": 0.913,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9994000196456909,
198
+ "sae_top_1_test_accuracy": 0.9542000000000002,
199
+ "sae_top_2_test_accuracy": 0.9666,
200
+ "sae_top_5_test_accuracy": 0.9964000000000001,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1",
210
+ "sae_lens_version": "5.4.1",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9540000557899475,
240
+ "1": 0.9700000286102295,
241
+ "2": 0.9550000429153442,
242
+ "6": 0.9910000562667847,
243
+ "9": 0.9790000319480896
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9520000219345093,
249
+ "6": 0.9930000305175781,
250
+ "9": 0.984000027179718
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.568,
254
+ "1": 0.629,
255
+ "2": 0.679,
256
+ "6": 0.791,
257
+ "9": 0.551
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.585,
261
+ "1": 0.666,
262
+ "2": 0.673,
263
+ "6": 0.801,
264
+ "9": 0.712
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.72,
268
+ "1": 0.707,
269
+ "2": 0.764,
270
+ "6": 0.899,
271
+ "9": 0.864
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.624,
275
+ "1": 0.693,
276
+ "2": 0.866,
277
+ "6": 0.98,
278
+ "9": 0.929
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.633,
282
+ "1": 0.819,
283
+ "2": 0.884,
284
+ "6": 0.981,
285
+ "9": 0.937
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.799,
289
+ "1": 0.878,
290
+ "2": 0.888,
291
+ "6": 0.986,
292
+ "9": 0.948
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9700000286102295,
298
+ "13": 0.9390000700950623,
299
+ "14": 0.9500000476837158,
300
+ "18": 0.9380000233650208,
301
+ "19": 0.9650000333786011
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.971000075340271,
305
+ "13": 0.9520000219345093,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9360000491142273,
308
+ "19": 0.9570000171661377
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.561,
312
+ "13": 0.672,
313
+ "14": 0.631,
314
+ "18": 0.7,
315
+ "19": 0.786
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.69,
319
+ "13": 0.72,
320
+ "14": 0.677,
321
+ "18": 0.721,
322
+ "19": 0.766
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.776,
326
+ "13": 0.742,
327
+ "14": 0.768,
328
+ "18": 0.731,
329
+ "19": 0.841
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.731,
333
+ "13": 0.692,
334
+ "14": 0.858,
335
+ "18": 0.729,
336
+ "19": 0.829
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.844,
340
+ "13": 0.68,
341
+ "14": 0.861,
342
+ "18": 0.731,
343
+ "19": 0.845
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.951,
347
+ "13": 0.863,
348
+ "14": 0.88,
349
+ "18": 0.884,
350
+ "19": 0.847
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9620000720024109,
356
+ "21": 0.9230000376701355,
357
+ "22": 0.9220000505447388,
358
+ "25": 0.9590000510215759,
359
+ "26": 0.8890000581741333
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.956000030040741,
363
+ "21": 0.9260000586509705,
364
+ "22": 0.9170000553131104,
365
+ "25": 0.9640000462532043,
366
+ "26": 0.8970000147819519
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.699,
370
+ "21": 0.772,
371
+ "22": 0.641,
372
+ "25": 0.703,
373
+ "26": 0.644
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.811,
377
+ "21": 0.757,
378
+ "22": 0.655,
379
+ "25": 0.762,
380
+ "26": 0.684
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.861,
384
+ "21": 0.788,
385
+ "22": 0.712,
386
+ "25": 0.796,
387
+ "26": 0.668
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.881,
391
+ "21": 0.76,
392
+ "22": 0.862,
393
+ "25": 0.88,
394
+ "26": 0.709
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.898,
398
+ "21": 0.792,
399
+ "22": 0.897,
400
+ "25": 0.891,
401
+ "26": 0.766
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.938,
405
+ "21": 0.842,
406
+ "22": 0.889,
407
+ "25": 0.877,
408
+ "26": 0.805
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9570000171661377,
414
+ "2": 0.9300000667572021,
415
+ "3": 0.9110000729560852,
416
+ "5": 0.9120000600814819,
417
+ "6": 0.8740000128746033
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.956000030040741,
421
+ "2": 0.937000036239624,
422
+ "3": 0.9160000681877136,
423
+ "5": 0.9170000553131104,
424
+ "6": 0.8720000386238098
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.665,
428
+ "2": 0.596,
429
+ "3": 0.599,
430
+ "5": 0.576,
431
+ "6": 0.588
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.73,
435
+ "2": 0.632,
436
+ "3": 0.617,
437
+ "5": 0.615,
438
+ "6": 0.609
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.758,
442
+ "2": 0.646,
443
+ "3": 0.627,
444
+ "5": 0.646,
445
+ "6": 0.671
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.85,
449
+ "2": 0.751,
450
+ "3": 0.697,
451
+ "5": 0.662,
452
+ "6": 0.725
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.858,
456
+ "2": 0.806,
457
+ "3": 0.696,
458
+ "5": 0.776,
459
+ "6": 0.768
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.901,
463
+ "2": 0.852,
464
+ "3": 0.73,
465
+ "5": 0.807,
466
+ "6": 0.756
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9770000576972961,
472
+ "5.0": 0.9780000448226929
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.672,
480
+ "5.0": 0.672
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.773,
492
+ "5.0": 0.773
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.903,
496
+ "5.0": 0.903
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.93,
500
+ "5.0": 0.93
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9600000381469727,
506
+ "Python": 0.9900000691413879,
507
+ "HTML": 0.9880000352859497,
508
+ "Java": 0.9640000462532043,
509
+ "PHP": 0.9570000171661377
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9530000686645508,
513
+ "Python": 0.9860000610351562,
514
+ "HTML": 0.9880000352859497,
515
+ "Java": 0.9640000462532043,
516
+ "PHP": 0.9550000429153442
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.657,
520
+ "Python": 0.637,
521
+ "HTML": 0.714,
522
+ "Java": 0.624,
523
+ "PHP": 0.582
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.658,
527
+ "Python": 0.664,
528
+ "HTML": 0.801,
529
+ "Java": 0.697,
530
+ "PHP": 0.64
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.752,
534
+ "Python": 0.726,
535
+ "HTML": 0.928,
536
+ "Java": 0.728,
537
+ "PHP": 0.694
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.615,
541
+ "Python": 0.93,
542
+ "HTML": 0.881,
543
+ "Java": 0.647,
544
+ "PHP": 0.915
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.734,
548
+ "Python": 0.935,
549
+ "HTML": 0.906,
550
+ "Java": 0.656,
551
+ "PHP": 0.913
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.738,
555
+ "Python": 0.957,
556
+ "HTML": 0.958,
557
+ "Java": 0.791,
558
+ "PHP": 0.915
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9360000491142273,
564
+ "1": 0.987000048160553,
565
+ "2": 0.9340000152587891,
566
+ "3": 0.9600000381469727
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.9380000233650208,
570
+ "1": 0.9880000352859497,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9550000429153442
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.805,
576
+ "1": 0.67,
577
+ "2": 0.648,
578
+ "3": 0.666
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.794,
582
+ "1": 0.795,
583
+ "2": 0.686,
584
+ "3": 0.79
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.818,
588
+ "1": 0.867,
589
+ "2": 0.756,
590
+ "3": 0.836
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.849,
594
+ "1": 0.637,
595
+ "2": 0.739,
596
+ "3": 0.625
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.853,
600
+ "1": 0.858,
601
+ "2": 0.824,
602
+ "3": 0.791
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.866,
606
+ "1": 0.971,
607
+ "2": 0.837,
608
+ "3": 0.809
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 1.0,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 0.9980000257492065,
617
+ "nl": 0.999000072479248
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 0.999000072479248,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.748,
628
+ "fr": 0.59,
629
+ "de": 0.754,
630
+ "es": 0.494,
631
+ "nl": 0.662
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.596,
636
+ "de": 0.831,
637
+ "es": 0.91,
638
+ "nl": 0.746
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.887,
642
+ "fr": 0.922,
643
+ "de": 0.909,
644
+ "es": 0.982,
645
+ "nl": 0.865
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 1.0,
649
+ "fr": 0.991,
650
+ "de": 0.919,
651
+ "es": 0.937,
652
+ "nl": 0.924
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.996,
656
+ "fr": 0.988,
657
+ "de": 0.913,
658
+ "es": 0.94,
659
+ "nl": 0.996
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.998,
663
+ "fr": 0.99,
664
+ "de": 0.999,
665
+ "es": 0.995,
666
+ "nl": 1.0
667
+ }
668
+ }
669
+ }
670
+ }
old_relu_eval_results/sparse_probing/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "b46e4809-b52d-4af3-a3b9-c16c6abcee8d",
30
+ "datetime_epoch_millis": 1738810625768,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9598687920719384,
34
+ "llm_top_1_test_accuracy": 0.6589812500000001,
35
+ "llm_top_2_test_accuracy": 0.7174812500000001,
36
+ "llm_top_5_test_accuracy": 0.7826062500000001,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9566000409424305,
44
+ "sae_top_1_test_accuracy": 0.8076125000000001,
45
+ "sae_top_2_test_accuracy": 0.85893125,
46
+ "sae_top_5_test_accuracy": 0.8915875,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.9694000363349915,
57
+ "llm_top_1_test_accuracy": 0.6436000000000001,
58
+ "llm_top_2_test_accuracy": 0.6874,
59
+ "llm_top_5_test_accuracy": 0.7908,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9660000443458557,
65
+ "sae_top_1_test_accuracy": 0.7590000000000001,
66
+ "sae_top_2_test_accuracy": 0.8640000000000001,
67
+ "sae_top_5_test_accuracy": 0.8916000000000001,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9544000387191772,
76
+ "llm_top_1_test_accuracy": 0.67,
77
+ "llm_top_2_test_accuracy": 0.7148,
78
+ "llm_top_5_test_accuracy": 0.7716,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9500000596046447,
84
+ "sae_top_1_test_accuracy": 0.7979999999999999,
85
+ "sae_top_2_test_accuracy": 0.8022,
86
+ "sae_top_5_test_accuracy": 0.8744,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9320000410079956,
95
+ "llm_top_1_test_accuracy": 0.6918,
96
+ "llm_top_2_test_accuracy": 0.7338,
97
+ "llm_top_5_test_accuracy": 0.765,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9332000494003296,
103
+ "sae_top_1_test_accuracy": 0.8299999999999998,
104
+ "sae_top_2_test_accuracy": 0.8457999999999999,
105
+ "sae_top_5_test_accuracy": 0.8716000000000002,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9196000456809997,
114
+ "llm_top_1_test_accuracy": 0.6048,
115
+ "llm_top_2_test_accuracy": 0.6406000000000001,
116
+ "llm_top_5_test_accuracy": 0.6696,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9124000430107116,
122
+ "sae_top_1_test_accuracy": 0.7344000000000002,
123
+ "sae_top_2_test_accuracy": 0.7668,
124
+ "sae_top_5_test_accuracy": 0.8311999999999999,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9820000529289246,
133
+ "llm_top_1_test_accuracy": 0.672,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9745000302791595,
141
+ "sae_top_1_test_accuracy": 0.858,
142
+ "sae_top_2_test_accuracy": 0.932,
143
+ "sae_top_5_test_accuracy": 0.932,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.969200050830841,
152
+ "llm_top_1_test_accuracy": 0.6428,
153
+ "llm_top_2_test_accuracy": 0.6920000000000001,
154
+ "llm_top_5_test_accuracy": 0.7656000000000001,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.963800048828125,
160
+ "sae_top_1_test_accuracy": 0.8106000000000002,
161
+ "sae_top_2_test_accuracy": 0.8326,
162
+ "sae_top_5_test_accuracy": 0.8598000000000001,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9527500420808792,
171
+ "llm_top_1_test_accuracy": 0.69725,
172
+ "llm_top_2_test_accuracy": 0.76625,
173
+ "llm_top_5_test_accuracy": 0.8192499999999999,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9535000324249268,
179
+ "sae_top_1_test_accuracy": 0.7625000000000001,
180
+ "sae_top_2_test_accuracy": 0.8552500000000001,
181
+ "sae_top_5_test_accuracy": 0.8755000000000001,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9996000289916992,
190
+ "llm_top_1_test_accuracy": 0.6496000000000001,
191
+ "llm_top_2_test_accuracy": 0.7809999999999999,
192
+ "llm_top_5_test_accuracy": 0.913,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9994000196456909,
198
+ "sae_top_1_test_accuracy": 0.9084,
199
+ "sae_top_2_test_accuracy": 0.9728,
200
+ "sae_top_5_test_accuracy": 0.9966000000000002,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2",
210
+ "sae_lens_version": "5.4.1",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9510000348091125,
240
+ "1": 0.9630000591278076,
241
+ "2": 0.9520000219345093,
242
+ "6": 0.9910000562667847,
243
+ "9": 0.9730000495910645
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9520000219345093,
249
+ "6": 0.9930000305175781,
250
+ "9": 0.984000027179718
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.568,
254
+ "1": 0.629,
255
+ "2": 0.679,
256
+ "6": 0.791,
257
+ "9": 0.551
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.585,
261
+ "1": 0.666,
262
+ "2": 0.673,
263
+ "6": 0.801,
264
+ "9": 0.712
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.72,
268
+ "1": 0.707,
269
+ "2": 0.764,
270
+ "6": 0.899,
271
+ "9": 0.864
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.617,
275
+ "1": 0.643,
276
+ "2": 0.852,
277
+ "6": 0.757,
278
+ "9": 0.926
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.855,
282
+ "1": 0.666,
283
+ "2": 0.888,
284
+ "6": 0.98,
285
+ "9": 0.931
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.886,
289
+ "1": 0.751,
290
+ "2": 0.908,
291
+ "6": 0.986,
292
+ "9": 0.927
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9590000510215759,
298
+ "13": 0.9530000686645508,
299
+ "14": 0.9580000638961792,
300
+ "18": 0.9250000715255737,
301
+ "19": 0.9550000429153442
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.971000075340271,
305
+ "13": 0.9520000219345093,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9360000491142273,
308
+ "19": 0.9570000171661377
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.561,
312
+ "13": 0.672,
313
+ "14": 0.631,
314
+ "18": 0.7,
315
+ "19": 0.786
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.69,
319
+ "13": 0.72,
320
+ "14": 0.677,
321
+ "18": 0.721,
322
+ "19": 0.766
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.776,
326
+ "13": 0.742,
327
+ "14": 0.768,
328
+ "18": 0.731,
329
+ "19": 0.841
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.854,
333
+ "13": 0.707,
334
+ "14": 0.864,
335
+ "18": 0.722,
336
+ "19": 0.843
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.855,
340
+ "13": 0.686,
341
+ "14": 0.882,
342
+ "18": 0.725,
343
+ "19": 0.863
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.954,
347
+ "13": 0.792,
348
+ "14": 0.882,
349
+ "18": 0.898,
350
+ "19": 0.846
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9650000333786011,
356
+ "21": 0.9260000586509705,
357
+ "22": 0.909000039100647,
358
+ "25": 0.971000075340271,
359
+ "26": 0.8950000405311584
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.956000030040741,
363
+ "21": 0.9260000586509705,
364
+ "22": 0.9170000553131104,
365
+ "25": 0.9640000462532043,
366
+ "26": 0.8970000147819519
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.699,
370
+ "21": 0.772,
371
+ "22": 0.641,
372
+ "25": 0.703,
373
+ "26": 0.644
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.811,
377
+ "21": 0.757,
378
+ "22": 0.655,
379
+ "25": 0.762,
380
+ "26": 0.684
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.861,
384
+ "21": 0.788,
385
+ "22": 0.712,
386
+ "25": 0.796,
387
+ "26": 0.668
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.876,
391
+ "21": 0.789,
392
+ "22": 0.885,
393
+ "25": 0.896,
394
+ "26": 0.704
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.907,
398
+ "21": 0.799,
399
+ "22": 0.888,
400
+ "25": 0.889,
401
+ "26": 0.746
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.939,
405
+ "21": 0.847,
406
+ "22": 0.901,
407
+ "25": 0.904,
408
+ "26": 0.767
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9490000605583191,
414
+ "2": 0.9220000505447388,
415
+ "3": 0.9120000600814819,
416
+ "5": 0.9200000166893005,
417
+ "6": 0.859000027179718
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.956000030040741,
421
+ "2": 0.937000036239624,
422
+ "3": 0.9160000681877136,
423
+ "5": 0.9170000553131104,
424
+ "6": 0.8720000386238098
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.665,
428
+ "2": 0.596,
429
+ "3": 0.599,
430
+ "5": 0.576,
431
+ "6": 0.588
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.73,
435
+ "2": 0.632,
436
+ "3": 0.617,
437
+ "5": 0.615,
438
+ "6": 0.609
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.758,
442
+ "2": 0.646,
443
+ "3": 0.627,
444
+ "5": 0.646,
445
+ "6": 0.671
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.854,
449
+ "2": 0.779,
450
+ "3": 0.676,
451
+ "5": 0.676,
452
+ "6": 0.687
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.859,
456
+ "2": 0.793,
457
+ "3": 0.695,
458
+ "5": 0.736,
459
+ "6": 0.751
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.885,
463
+ "2": 0.854,
464
+ "3": 0.805,
465
+ "5": 0.859,
466
+ "6": 0.753
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9740000367164612,
472
+ "5.0": 0.9750000238418579
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.672,
480
+ "5.0": 0.672
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.858,
492
+ "5.0": 0.858
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.932,
496
+ "5.0": 0.932
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.932,
500
+ "5.0": 0.932
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9390000700950623,
506
+ "Python": 0.9830000400543213,
507
+ "HTML": 0.9860000610351562,
508
+ "Java": 0.9570000171661377,
509
+ "PHP": 0.9540000557899475
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9530000686645508,
513
+ "Python": 0.9860000610351562,
514
+ "HTML": 0.9880000352859497,
515
+ "Java": 0.9640000462532043,
516
+ "PHP": 0.9550000429153442
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.657,
520
+ "Python": 0.637,
521
+ "HTML": 0.714,
522
+ "Java": 0.624,
523
+ "PHP": 0.582
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.658,
527
+ "Python": 0.664,
528
+ "HTML": 0.801,
529
+ "Java": 0.697,
530
+ "PHP": 0.64
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.752,
534
+ "Python": 0.726,
535
+ "HTML": 0.928,
536
+ "Java": 0.728,
537
+ "PHP": 0.694
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.632,
541
+ "Python": 0.914,
542
+ "HTML": 0.88,
543
+ "Java": 0.708,
544
+ "PHP": 0.919
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.639,
548
+ "Python": 0.923,
549
+ "HTML": 0.893,
550
+ "Java": 0.785,
551
+ "PHP": 0.923
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.714,
555
+ "Python": 0.942,
556
+ "HTML": 0.927,
557
+ "Java": 0.793,
558
+ "PHP": 0.923
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9420000314712524,
564
+ "1": 0.9890000224113464,
565
+ "2": 0.9290000200271606,
566
+ "3": 0.9540000557899475
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.9380000233650208,
570
+ "1": 0.9880000352859497,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9550000429153442
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.805,
576
+ "1": 0.67,
577
+ "2": 0.648,
578
+ "3": 0.666
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.794,
582
+ "1": 0.795,
583
+ "2": 0.686,
584
+ "3": 0.79
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.818,
588
+ "1": 0.867,
589
+ "2": 0.756,
590
+ "3": 0.836
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.683,
594
+ "1": 0.936,
595
+ "2": 0.774,
596
+ "3": 0.657
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.848,
600
+ "1": 0.932,
601
+ "2": 0.828,
602
+ "3": 0.813
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.852,
606
+ "1": 0.946,
607
+ "2": 0.841,
608
+ "3": 0.863
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.9980000257492065,
614
+ "fr": 1.0,
615
+ "de": 1.0,
616
+ "es": 1.0,
617
+ "nl": 0.999000072479248
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 0.999000072479248,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.748,
628
+ "fr": 0.59,
629
+ "de": 0.754,
630
+ "es": 0.494,
631
+ "nl": 0.662
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.596,
636
+ "de": 0.831,
637
+ "es": 0.91,
638
+ "nl": 0.746
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.887,
642
+ "fr": 0.922,
643
+ "de": 0.909,
644
+ "es": 0.982,
645
+ "nl": 0.865
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.69,
649
+ "fr": 0.993,
650
+ "de": 0.917,
651
+ "es": 0.944,
652
+ "nl": 0.998
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.999,
656
+ "fr": 0.994,
657
+ "de": 0.922,
658
+ "es": 0.951,
659
+ "nl": 0.998
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.998,
663
+ "fr": 0.993,
664
+ "de": 0.999,
665
+ "es": 0.995,
666
+ "nl": 0.998
667
+ }
668
+ }
669
+ }
670
+ }
old_relu_eval_results/sparse_probing/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "6425df5a-b293-4597-bb77-df77861d6457",
30
+ "datetime_epoch_millis": 1738810319161,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9598687920719384,
34
+ "llm_top_1_test_accuracy": 0.6589812500000001,
35
+ "llm_top_2_test_accuracy": 0.7174812500000001,
36
+ "llm_top_5_test_accuracy": 0.7826062500000001,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9553375504910946,
44
+ "sae_top_1_test_accuracy": 0.77961875,
45
+ "sae_top_2_test_accuracy": 0.853675,
46
+ "sae_top_5_test_accuracy": 0.89161875,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.9694000363349915,
57
+ "llm_top_1_test_accuracy": 0.6436000000000001,
58
+ "llm_top_2_test_accuracy": 0.6874,
59
+ "llm_top_5_test_accuracy": 0.7908,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9662000417709351,
65
+ "sae_top_1_test_accuracy": 0.8164,
66
+ "sae_top_2_test_accuracy": 0.9014,
67
+ "sae_top_5_test_accuracy": 0.9178,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9544000387191772,
76
+ "llm_top_1_test_accuracy": 0.67,
77
+ "llm_top_2_test_accuracy": 0.7148,
78
+ "llm_top_5_test_accuracy": 0.7716,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9474000453948974,
84
+ "sae_top_1_test_accuracy": 0.766,
85
+ "sae_top_2_test_accuracy": 0.7964,
86
+ "sae_top_5_test_accuracy": 0.8795999999999999,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9320000410079956,
95
+ "llm_top_1_test_accuracy": 0.6918,
96
+ "llm_top_2_test_accuracy": 0.7338,
97
+ "llm_top_5_test_accuracy": 0.765,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9244000434875488,
103
+ "sae_top_1_test_accuracy": 0.8320000000000001,
104
+ "sae_top_2_test_accuracy": 0.8568000000000001,
105
+ "sae_top_5_test_accuracy": 0.8774000000000001,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9196000456809997,
114
+ "llm_top_1_test_accuracy": 0.6048,
115
+ "llm_top_2_test_accuracy": 0.6406000000000001,
116
+ "llm_top_5_test_accuracy": 0.6696,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9170000433921814,
122
+ "sae_top_1_test_accuracy": 0.6744,
123
+ "sae_top_2_test_accuracy": 0.7302000000000001,
124
+ "sae_top_5_test_accuracy": 0.8102,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9820000529289246,
133
+ "llm_top_1_test_accuracy": 0.672,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.971000075340271,
141
+ "sae_top_1_test_accuracy": 0.805,
142
+ "sae_top_2_test_accuracy": 0.934,
143
+ "sae_top_5_test_accuracy": 0.943,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.969200050830841,
152
+ "llm_top_1_test_accuracy": 0.6428,
153
+ "llm_top_2_test_accuracy": 0.6920000000000001,
154
+ "llm_top_5_test_accuracy": 0.7656000000000001,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9680000424385071,
160
+ "sae_top_1_test_accuracy": 0.7978000000000001,
161
+ "sae_top_2_test_accuracy": 0.8178000000000001,
162
+ "sae_top_5_test_accuracy": 0.8513999999999999,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9527500420808792,
171
+ "llm_top_1_test_accuracy": 0.69725,
172
+ "llm_top_2_test_accuracy": 0.76625,
173
+ "llm_top_5_test_accuracy": 0.8192499999999999,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9495000541210175,
179
+ "sae_top_1_test_accuracy": 0.6577500000000001,
180
+ "sae_top_2_test_accuracy": 0.813,
181
+ "sae_top_5_test_accuracy": 0.86075,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9996000289916992,
190
+ "llm_top_1_test_accuracy": 0.6496000000000001,
191
+ "llm_top_2_test_accuracy": 0.7809999999999999,
192
+ "llm_top_5_test_accuracy": 0.913,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9992000579833984,
198
+ "sae_top_1_test_accuracy": 0.8876,
199
+ "sae_top_2_test_accuracy": 0.9798,
200
+ "sae_top_5_test_accuracy": 0.9928000000000001,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3",
210
+ "sae_lens_version": "5.4.1",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9500000476837158,
240
+ "1": 0.9640000462532043,
241
+ "2": 0.9520000219345093,
242
+ "6": 0.9880000352859497,
243
+ "9": 0.9770000576972961
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9520000219345093,
249
+ "6": 0.9930000305175781,
250
+ "9": 0.984000027179718
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.568,
254
+ "1": 0.629,
255
+ "2": 0.679,
256
+ "6": 0.791,
257
+ "9": 0.551
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.585,
261
+ "1": 0.666,
262
+ "2": 0.673,
263
+ "6": 0.801,
264
+ "9": 0.712
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.72,
268
+ "1": 0.707,
269
+ "2": 0.764,
270
+ "6": 0.899,
271
+ "9": 0.864
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.617,
275
+ "1": 0.823,
276
+ "2": 0.843,
277
+ "6": 0.977,
278
+ "9": 0.822
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.881,
282
+ "1": 0.848,
283
+ "2": 0.884,
284
+ "6": 0.976,
285
+ "9": 0.918
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.896,
289
+ "1": 0.883,
290
+ "2": 0.904,
291
+ "6": 0.986,
292
+ "9": 0.92
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9580000638961792,
298
+ "13": 0.956000030040741,
299
+ "14": 0.9550000429153442,
300
+ "18": 0.9110000729560852,
301
+ "19": 0.9570000171661377
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.971000075340271,
305
+ "13": 0.9520000219345093,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9360000491142273,
308
+ "19": 0.9570000171661377
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.561,
312
+ "13": 0.672,
313
+ "14": 0.631,
314
+ "18": 0.7,
315
+ "19": 0.786
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.69,
319
+ "13": 0.72,
320
+ "14": 0.677,
321
+ "18": 0.721,
322
+ "19": 0.766
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.776,
326
+ "13": 0.742,
327
+ "14": 0.768,
328
+ "18": 0.731,
329
+ "19": 0.841
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.733,
333
+ "13": 0.69,
334
+ "14": 0.839,
335
+ "18": 0.735,
336
+ "19": 0.833
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.859,
340
+ "13": 0.705,
341
+ "14": 0.85,
342
+ "18": 0.728,
343
+ "19": 0.84
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.947,
347
+ "13": 0.79,
348
+ "14": 0.885,
349
+ "18": 0.917,
350
+ "19": 0.859
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9550000429153442,
356
+ "21": 0.9220000505447388,
357
+ "22": 0.9100000262260437,
358
+ "25": 0.9540000557899475,
359
+ "26": 0.8810000419616699
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.956000030040741,
363
+ "21": 0.9260000586509705,
364
+ "22": 0.9170000553131104,
365
+ "25": 0.9640000462532043,
366
+ "26": 0.8970000147819519
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.699,
370
+ "21": 0.772,
371
+ "22": 0.641,
372
+ "25": 0.703,
373
+ "26": 0.644
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.811,
377
+ "21": 0.757,
378
+ "22": 0.655,
379
+ "25": 0.762,
380
+ "26": 0.684
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.861,
384
+ "21": 0.788,
385
+ "22": 0.712,
386
+ "25": 0.796,
387
+ "26": 0.668
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.882,
391
+ "21": 0.811,
392
+ "22": 0.888,
393
+ "25": 0.879,
394
+ "26": 0.7
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.899,
398
+ "21": 0.834,
399
+ "22": 0.899,
400
+ "25": 0.882,
401
+ "26": 0.77
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.937,
405
+ "21": 0.855,
406
+ "22": 0.885,
407
+ "25": 0.902,
408
+ "26": 0.808
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9420000314712524,
414
+ "2": 0.9360000491142273,
415
+ "3": 0.9260000586509705,
416
+ "5": 0.9170000553131104,
417
+ "6": 0.8640000224113464
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.956000030040741,
421
+ "2": 0.937000036239624,
422
+ "3": 0.9160000681877136,
423
+ "5": 0.9170000553131104,
424
+ "6": 0.8720000386238098
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.665,
428
+ "2": 0.596,
429
+ "3": 0.599,
430
+ "5": 0.576,
431
+ "6": 0.588
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.73,
435
+ "2": 0.632,
436
+ "3": 0.617,
437
+ "5": 0.615,
438
+ "6": 0.609
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.758,
442
+ "2": 0.646,
443
+ "3": 0.627,
444
+ "5": 0.646,
445
+ "6": 0.671
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.861,
449
+ "2": 0.629,
450
+ "3": 0.597,
451
+ "5": 0.605,
452
+ "6": 0.68
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.903,
456
+ "2": 0.752,
457
+ "3": 0.648,
458
+ "5": 0.631,
459
+ "6": 0.717
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.918,
463
+ "2": 0.809,
464
+ "3": 0.698,
465
+ "5": 0.873,
466
+ "6": 0.753
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.971000075340271,
472
+ "5.0": 0.971000075340271
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.672,
480
+ "5.0": 0.672
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.805,
492
+ "5.0": 0.805
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.934,
496
+ "5.0": 0.934
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.943,
500
+ "5.0": 0.943
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9620000720024109,
506
+ "Python": 0.984000027179718,
507
+ "HTML": 0.984000027179718,
508
+ "Java": 0.9550000429153442,
509
+ "PHP": 0.9550000429153442
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9530000686645508,
513
+ "Python": 0.9860000610351562,
514
+ "HTML": 0.9880000352859497,
515
+ "Java": 0.9640000462532043,
516
+ "PHP": 0.9550000429153442
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.657,
520
+ "Python": 0.637,
521
+ "HTML": 0.714,
522
+ "Java": 0.624,
523
+ "PHP": 0.582
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.658,
527
+ "Python": 0.664,
528
+ "HTML": 0.801,
529
+ "Java": 0.697,
530
+ "PHP": 0.64
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.752,
534
+ "Python": 0.726,
535
+ "HTML": 0.928,
536
+ "Java": 0.728,
537
+ "PHP": 0.694
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.604,
541
+ "Python": 0.906,
542
+ "HTML": 0.876,
543
+ "Java": 0.713,
544
+ "PHP": 0.89
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.69,
548
+ "Python": 0.914,
549
+ "HTML": 0.889,
550
+ "Java": 0.699,
551
+ "PHP": 0.897
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.717,
555
+ "Python": 0.92,
556
+ "HTML": 0.915,
557
+ "Java": 0.79,
558
+ "PHP": 0.915
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9350000619888306,
564
+ "1": 0.9860000610351562,
565
+ "2": 0.9310000538825989,
566
+ "3": 0.9460000395774841
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.9380000233650208,
570
+ "1": 0.9880000352859497,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9550000429153442
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.805,
576
+ "1": 0.67,
577
+ "2": 0.648,
578
+ "3": 0.666
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.794,
582
+ "1": 0.795,
583
+ "2": 0.686,
584
+ "3": 0.79
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.818,
588
+ "1": 0.867,
589
+ "2": 0.756,
590
+ "3": 0.836
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.715,
594
+ "1": 0.606,
595
+ "2": 0.695,
596
+ "3": 0.615
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.839,
600
+ "1": 0.837,
601
+ "2": 0.815,
602
+ "3": 0.761
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.871,
606
+ "1": 0.935,
607
+ "2": 0.818,
608
+ "3": 0.819
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.999000072479248,
614
+ "fr": 0.999000072479248,
615
+ "de": 1.0,
616
+ "es": 0.999000072479248,
617
+ "nl": 0.999000072479248
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 0.999000072479248,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.748,
628
+ "fr": 0.59,
629
+ "de": 0.754,
630
+ "es": 0.494,
631
+ "nl": 0.662
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.596,
636
+ "de": 0.831,
637
+ "es": 0.91,
638
+ "nl": 0.746
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.887,
642
+ "fr": 0.922,
643
+ "de": 0.909,
644
+ "es": 0.982,
645
+ "nl": 0.865
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.685,
649
+ "fr": 0.984,
650
+ "de": 0.918,
651
+ "es": 0.942,
652
+ "nl": 0.909
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.996,
656
+ "fr": 0.99,
657
+ "de": 0.926,
658
+ "es": 0.991,
659
+ "nl": 0.996
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.999,
663
+ "fr": 0.991,
664
+ "de": 0.986,
665
+ "es": 0.99,
666
+ "nl": 0.998
667
+ }
668
+ }
669
+ }
670
+ }
old_relu_eval_results/sparse_probing/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "79d387eb-180f-4d7c-8356-24c5e2fb2eca",
30
+ "datetime_epoch_millis": 1738810868662,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9598687920719384,
34
+ "llm_top_1_test_accuracy": 0.6589812500000001,
35
+ "llm_top_2_test_accuracy": 0.7174812500000001,
36
+ "llm_top_5_test_accuracy": 0.7826062500000001,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9545250445604324,
44
+ "sae_top_1_test_accuracy": 0.7856687499999999,
45
+ "sae_top_2_test_accuracy": 0.8240375,
46
+ "sae_top_5_test_accuracy": 0.8696375000000001,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.9694000363349915,
57
+ "llm_top_1_test_accuracy": 0.6436000000000001,
58
+ "llm_top_2_test_accuracy": 0.6874,
59
+ "llm_top_5_test_accuracy": 0.7908,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9622000455856323,
65
+ "sae_top_1_test_accuracy": 0.7978000000000001,
66
+ "sae_top_2_test_accuracy": 0.8472,
67
+ "sae_top_5_test_accuracy": 0.8636000000000001,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9544000387191772,
76
+ "llm_top_1_test_accuracy": 0.67,
77
+ "llm_top_2_test_accuracy": 0.7148,
78
+ "llm_top_5_test_accuracy": 0.7716,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9474000453948974,
84
+ "sae_top_1_test_accuracy": 0.7654,
85
+ "sae_top_2_test_accuracy": 0.7933999999999999,
86
+ "sae_top_5_test_accuracy": 0.8804000000000001,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9320000410079956,
95
+ "llm_top_1_test_accuracy": 0.6918,
96
+ "llm_top_2_test_accuracy": 0.7338,
97
+ "llm_top_5_test_accuracy": 0.765,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9278000354766845,
103
+ "sae_top_1_test_accuracy": 0.8138,
104
+ "sae_top_2_test_accuracy": 0.8508000000000001,
105
+ "sae_top_5_test_accuracy": 0.8704000000000001,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9196000456809997,
114
+ "llm_top_1_test_accuracy": 0.6048,
115
+ "llm_top_2_test_accuracy": 0.6406000000000001,
116
+ "llm_top_5_test_accuracy": 0.6696,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9180000424385071,
122
+ "sae_top_1_test_accuracy": 0.6996,
123
+ "sae_top_2_test_accuracy": 0.7652,
124
+ "sae_top_5_test_accuracy": 0.8160000000000001,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9820000529289246,
133
+ "llm_top_1_test_accuracy": 0.672,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9690000414848328,
141
+ "sae_top_1_test_accuracy": 0.8,
142
+ "sae_top_2_test_accuracy": 0.813,
143
+ "sae_top_5_test_accuracy": 0.837,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.969200050830841,
152
+ "llm_top_1_test_accuracy": 0.6428,
153
+ "llm_top_2_test_accuracy": 0.6920000000000001,
154
+ "llm_top_5_test_accuracy": 0.7656000000000001,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9656000375747681,
160
+ "sae_top_1_test_accuracy": 0.7926,
161
+ "sae_top_2_test_accuracy": 0.8016,
162
+ "sae_top_5_test_accuracy": 0.8466000000000001,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9527500420808792,
171
+ "llm_top_1_test_accuracy": 0.69725,
172
+ "llm_top_2_test_accuracy": 0.76625,
173
+ "llm_top_5_test_accuracy": 0.8192499999999999,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9490000605583191,
179
+ "sae_top_1_test_accuracy": 0.74175,
180
+ "sae_top_2_test_accuracy": 0.7815,
181
+ "sae_top_5_test_accuracy": 0.8514999999999999,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9996000289916992,
190
+ "llm_top_1_test_accuracy": 0.6496000000000001,
191
+ "llm_top_2_test_accuracy": 0.7809999999999999,
192
+ "llm_top_5_test_accuracy": 0.913,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9972000479698181,
198
+ "sae_top_1_test_accuracy": 0.8744,
199
+ "sae_top_2_test_accuracy": 0.9395999999999999,
200
+ "sae_top_5_test_accuracy": 0.9916,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4",
210
+ "sae_lens_version": "5.4.1",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.9390000700950623,
240
+ "1": 0.9650000333786011,
241
+ "2": 0.9470000267028809,
242
+ "6": 0.9820000529289246,
243
+ "9": 0.9780000448226929
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9520000219345093,
249
+ "6": 0.9930000305175781,
250
+ "9": 0.984000027179718
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.568,
254
+ "1": 0.629,
255
+ "2": 0.679,
256
+ "6": 0.791,
257
+ "9": 0.551
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.585,
261
+ "1": 0.666,
262
+ "2": 0.673,
263
+ "6": 0.801,
264
+ "9": 0.712
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.72,
268
+ "1": 0.707,
269
+ "2": 0.764,
270
+ "6": 0.899,
271
+ "9": 0.864
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.621,
275
+ "1": 0.644,
276
+ "2": 0.856,
277
+ "6": 0.978,
278
+ "9": 0.89
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.653,
282
+ "1": 0.817,
283
+ "2": 0.878,
284
+ "6": 0.976,
285
+ "9": 0.912
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.676,
289
+ "1": 0.864,
290
+ "2": 0.889,
291
+ "6": 0.987,
292
+ "9": 0.902
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.9570000171661377,
298
+ "13": 0.9490000605583191,
299
+ "14": 0.9550000429153442,
300
+ "18": 0.9180000424385071,
301
+ "19": 0.9580000638961792
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.971000075340271,
305
+ "13": 0.9520000219345093,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9360000491142273,
308
+ "19": 0.9570000171661377
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.561,
312
+ "13": 0.672,
313
+ "14": 0.631,
314
+ "18": 0.7,
315
+ "19": 0.786
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.69,
319
+ "13": 0.72,
320
+ "14": 0.677,
321
+ "18": 0.721,
322
+ "19": 0.766
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.776,
326
+ "13": 0.742,
327
+ "14": 0.768,
328
+ "18": 0.731,
329
+ "19": 0.841
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.721,
333
+ "13": 0.695,
334
+ "14": 0.859,
335
+ "18": 0.725,
336
+ "19": 0.827
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.859,
340
+ "13": 0.685,
341
+ "14": 0.868,
342
+ "18": 0.731,
343
+ "19": 0.824
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.927,
347
+ "13": 0.845,
348
+ "14": 0.885,
349
+ "18": 0.903,
350
+ "19": 0.842
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9540000557899475,
356
+ "21": 0.9200000166893005,
357
+ "22": 0.9140000343322754,
358
+ "25": 0.9610000252723694,
359
+ "26": 0.89000004529953
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.956000030040741,
363
+ "21": 0.9260000586509705,
364
+ "22": 0.9170000553131104,
365
+ "25": 0.9640000462532043,
366
+ "26": 0.8970000147819519
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.699,
370
+ "21": 0.772,
371
+ "22": 0.641,
372
+ "25": 0.703,
373
+ "26": 0.644
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.811,
377
+ "21": 0.757,
378
+ "22": 0.655,
379
+ "25": 0.762,
380
+ "26": 0.684
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.861,
384
+ "21": 0.788,
385
+ "22": 0.712,
386
+ "25": 0.796,
387
+ "26": 0.668
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.866,
391
+ "21": 0.729,
392
+ "22": 0.883,
393
+ "25": 0.887,
394
+ "26": 0.704
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.895,
398
+ "21": 0.821,
399
+ "22": 0.876,
400
+ "25": 0.893,
401
+ "26": 0.769
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.932,
405
+ "21": 0.851,
406
+ "22": 0.885,
407
+ "25": 0.905,
408
+ "26": 0.779
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9430000185966492,
414
+ "2": 0.9290000200271606,
415
+ "3": 0.921000063419342,
416
+ "5": 0.9260000586509705,
417
+ "6": 0.8710000514984131
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.956000030040741,
421
+ "2": 0.937000036239624,
422
+ "3": 0.9160000681877136,
423
+ "5": 0.9170000553131104,
424
+ "6": 0.8720000386238098
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.665,
428
+ "2": 0.596,
429
+ "3": 0.599,
430
+ "5": 0.576,
431
+ "6": 0.588
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.73,
435
+ "2": 0.632,
436
+ "3": 0.617,
437
+ "5": 0.615,
438
+ "6": 0.609
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.758,
442
+ "2": 0.646,
443
+ "3": 0.627,
444
+ "5": 0.646,
445
+ "6": 0.671
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.893,
449
+ "2": 0.596,
450
+ "3": 0.569,
451
+ "5": 0.771,
452
+ "6": 0.669
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.892,
456
+ "2": 0.772,
457
+ "3": 0.669,
458
+ "5": 0.774,
459
+ "6": 0.719
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.888,
463
+ "2": 0.864,
464
+ "3": 0.711,
465
+ "5": 0.857,
466
+ "6": 0.76
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.968000054359436,
472
+ "5.0": 0.9700000286102295
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.672,
480
+ "5.0": 0.672
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.8,
492
+ "5.0": 0.8
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.813,
496
+ "5.0": 0.813
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.837,
500
+ "5.0": 0.837
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9550000429153442,
506
+ "Python": 0.9750000238418579,
507
+ "HTML": 0.9820000529289246,
508
+ "Java": 0.9590000510215759,
509
+ "PHP": 0.9570000171661377
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9530000686645508,
513
+ "Python": 0.9860000610351562,
514
+ "HTML": 0.9880000352859497,
515
+ "Java": 0.9640000462532043,
516
+ "PHP": 0.9550000429153442
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.657,
520
+ "Python": 0.637,
521
+ "HTML": 0.714,
522
+ "Java": 0.624,
523
+ "PHP": 0.582
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.658,
527
+ "Python": 0.664,
528
+ "HTML": 0.801,
529
+ "Java": 0.697,
530
+ "PHP": 0.64
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.752,
534
+ "Python": 0.726,
535
+ "HTML": 0.928,
536
+ "Java": 0.728,
537
+ "PHP": 0.694
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.606,
541
+ "Python": 0.904,
542
+ "HTML": 0.856,
543
+ "Java": 0.717,
544
+ "PHP": 0.88
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.626,
548
+ "Python": 0.903,
549
+ "HTML": 0.884,
550
+ "Java": 0.709,
551
+ "PHP": 0.886
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.742,
555
+ "Python": 0.925,
556
+ "HTML": 0.92,
557
+ "Java": 0.729,
558
+ "PHP": 0.917
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9360000491142273,
564
+ "1": 0.9860000610351562,
565
+ "2": 0.9260000586509705,
566
+ "3": 0.9480000734329224
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.9380000233650208,
570
+ "1": 0.9880000352859497,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9550000429153442
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.805,
576
+ "1": 0.67,
577
+ "2": 0.648,
578
+ "3": 0.666
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.794,
582
+ "1": 0.795,
583
+ "2": 0.686,
584
+ "3": 0.79
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.818,
588
+ "1": 0.867,
589
+ "2": 0.756,
590
+ "3": 0.836
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.832,
594
+ "1": 0.817,
595
+ "2": 0.674,
596
+ "3": 0.644
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.842,
600
+ "1": 0.821,
601
+ "2": 0.813,
602
+ "3": 0.65
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.854,
606
+ "1": 0.925,
607
+ "2": 0.814,
608
+ "3": 0.813
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 0.999000072479248,
614
+ "fr": 0.9980000257492065,
615
+ "de": 0.9960000514984131,
616
+ "es": 0.9960000514984131,
617
+ "nl": 0.9970000386238098
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 0.999000072479248,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.748,
628
+ "fr": 0.59,
629
+ "de": 0.754,
630
+ "es": 0.494,
631
+ "nl": 0.662
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.596,
636
+ "de": 0.831,
637
+ "es": 0.91,
638
+ "nl": 0.746
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.887,
642
+ "fr": 0.922,
643
+ "de": 0.909,
644
+ "es": 0.982,
645
+ "nl": 0.865
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.696,
649
+ "fr": 0.986,
650
+ "de": 0.908,
651
+ "es": 0.895,
652
+ "nl": 0.887
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.802,
656
+ "fr": 0.988,
657
+ "de": 0.925,
658
+ "es": 0.986,
659
+ "nl": 0.997
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 1.0,
663
+ "fr": 0.988,
664
+ "de": 0.982,
665
+ "es": 0.991,
666
+ "nl": 0.997
667
+ }
668
+ }
669
+ }
670
+ }
old_relu_eval_results/sparse_probing/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,670 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "a24bedef-7962-4166-b149-7a1b6ad73ccc",
30
+ "datetime_epoch_millis": 1738810076954,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9598687920719384,
34
+ "llm_top_1_test_accuracy": 0.6589812500000001,
35
+ "llm_top_2_test_accuracy": 0.7174812500000001,
36
+ "llm_top_5_test_accuracy": 0.7826062500000001,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9497312925755977,
44
+ "sae_top_1_test_accuracy": 0.75526875,
45
+ "sae_top_2_test_accuracy": 0.8121375,
46
+ "sae_top_5_test_accuracy": 0.86029375,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.9694000363349915,
57
+ "llm_top_1_test_accuracy": 0.6436000000000001,
58
+ "llm_top_2_test_accuracy": 0.6874,
59
+ "llm_top_5_test_accuracy": 0.7908,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9564000368118286,
65
+ "sae_top_1_test_accuracy": 0.7794,
66
+ "sae_top_2_test_accuracy": 0.8156000000000001,
67
+ "sae_top_5_test_accuracy": 0.8370000000000001,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9544000387191772,
76
+ "llm_top_1_test_accuracy": 0.67,
77
+ "llm_top_2_test_accuracy": 0.7148,
78
+ "llm_top_5_test_accuracy": 0.7716,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9434000492095947,
84
+ "sae_top_1_test_accuracy": 0.7693999999999999,
85
+ "sae_top_2_test_accuracy": 0.78,
86
+ "sae_top_5_test_accuracy": 0.8836,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9320000410079956,
95
+ "llm_top_1_test_accuracy": 0.6918,
96
+ "llm_top_2_test_accuracy": 0.7338,
97
+ "llm_top_5_test_accuracy": 0.765,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9236000418663025,
103
+ "sae_top_1_test_accuracy": 0.7532,
104
+ "sae_top_2_test_accuracy": 0.8549999999999999,
105
+ "sae_top_5_test_accuracy": 0.8664000000000002,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9196000456809997,
114
+ "llm_top_1_test_accuracy": 0.6048,
115
+ "llm_top_2_test_accuracy": 0.6406000000000001,
116
+ "llm_top_5_test_accuracy": 0.6696,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9022000432014465,
122
+ "sae_top_1_test_accuracy": 0.6662000000000001,
123
+ "sae_top_2_test_accuracy": 0.6942,
124
+ "sae_top_5_test_accuracy": 0.819,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9820000529289246,
133
+ "llm_top_1_test_accuracy": 0.672,
134
+ "llm_top_2_test_accuracy": 0.724,
135
+ "llm_top_5_test_accuracy": 0.766,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9645000398159027,
141
+ "sae_top_1_test_accuracy": 0.786,
142
+ "sae_top_2_test_accuracy": 0.813,
143
+ "sae_top_5_test_accuracy": 0.81,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.969200050830841,
152
+ "llm_top_1_test_accuracy": 0.6428,
153
+ "llm_top_2_test_accuracy": 0.6920000000000001,
154
+ "llm_top_5_test_accuracy": 0.7656000000000001,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9658000349998475,
160
+ "sae_top_1_test_accuracy": 0.7729999999999999,
161
+ "sae_top_2_test_accuracy": 0.7797999999999999,
162
+ "sae_top_5_test_accuracy": 0.8286,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9527500420808792,
171
+ "llm_top_1_test_accuracy": 0.69725,
172
+ "llm_top_2_test_accuracy": 0.76625,
173
+ "llm_top_5_test_accuracy": 0.8192499999999999,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9457500576972961,
179
+ "sae_top_1_test_accuracy": 0.74775,
180
+ "sae_top_2_test_accuracy": 0.8095,
181
+ "sae_top_5_test_accuracy": 0.8547499999999999,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9996000289916992,
190
+ "llm_top_1_test_accuracy": 0.6496000000000001,
191
+ "llm_top_2_test_accuracy": 0.7809999999999999,
192
+ "llm_top_5_test_accuracy": 0.913,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9962000370025634,
198
+ "sae_top_1_test_accuracy": 0.7672000000000001,
199
+ "sae_top_2_test_accuracy": 0.95,
200
+ "sae_top_5_test_accuracy": 0.983,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
208
+ "sae_lens_id": "custom_sae",
209
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5",
210
+ "sae_lens_version": "5.4.1",
211
+ "sae_cfg_dict": {
212
+ "model_name": "gemma-2-2b",
213
+ "d_in": 2304,
214
+ "d_sae": 16384,
215
+ "hook_layer": 12,
216
+ "hook_name": "blocks.12.hook_resid_post",
217
+ "context_size": null,
218
+ "hook_head_index": null,
219
+ "architecture": "standard",
220
+ "apply_b_dec_to_input": null,
221
+ "finetuning_scaling_factor": null,
222
+ "activation_fn_str": "",
223
+ "prepend_bos": true,
224
+ "normalize_activations": "none",
225
+ "dtype": "bfloat16",
226
+ "device": "",
227
+ "dataset_path": "",
228
+ "dataset_trust_remote_code": true,
229
+ "seqpos_slice": [
230
+ null
231
+ ],
232
+ "training_tokens": -100000,
233
+ "sae_lens_training_version": null,
234
+ "neuronpedia_id": null
235
+ },
236
+ "eval_result_unstructured": {
237
+ "LabHC/bias_in_bios_class_set1_results": {
238
+ "sae_test_accuracy": {
239
+ "0": 0.940000057220459,
240
+ "1": 0.9520000219345093,
241
+ "2": 0.9430000185966492,
242
+ "6": 0.9790000319480896,
243
+ "9": 0.968000054359436
244
+ },
245
+ "llm_test_accuracy": {
246
+ "0": 0.9510000348091125,
247
+ "1": 0.9670000672340393,
248
+ "2": 0.9520000219345093,
249
+ "6": 0.9930000305175781,
250
+ "9": 0.984000027179718
251
+ },
252
+ "llm_top_1_test_accuracy": {
253
+ "0": 0.568,
254
+ "1": 0.629,
255
+ "2": 0.679,
256
+ "6": 0.791,
257
+ "9": 0.551
258
+ },
259
+ "llm_top_2_test_accuracy": {
260
+ "0": 0.585,
261
+ "1": 0.666,
262
+ "2": 0.673,
263
+ "6": 0.801,
264
+ "9": 0.712
265
+ },
266
+ "llm_top_5_test_accuracy": {
267
+ "0": 0.72,
268
+ "1": 0.707,
269
+ "2": 0.764,
270
+ "6": 0.899,
271
+ "9": 0.864
272
+ },
273
+ "sae_top_1_test_accuracy": {
274
+ "0": 0.629,
275
+ "1": 0.687,
276
+ "2": 0.83,
277
+ "6": 0.978,
278
+ "9": 0.773
279
+ },
280
+ "sae_top_2_test_accuracy": {
281
+ "0": 0.647,
282
+ "1": 0.754,
283
+ "2": 0.895,
284
+ "6": 0.98,
285
+ "9": 0.802
286
+ },
287
+ "sae_top_5_test_accuracy": {
288
+ "0": 0.656,
289
+ "1": 0.757,
290
+ "2": 0.894,
291
+ "6": 0.983,
292
+ "9": 0.895
293
+ }
294
+ },
295
+ "LabHC/bias_in_bios_class_set2_results": {
296
+ "sae_test_accuracy": {
297
+ "11": 0.956000030040741,
298
+ "13": 0.9450000524520874,
299
+ "14": 0.9540000557899475,
300
+ "18": 0.9030000567436218,
301
+ "19": 0.9590000510215759
302
+ },
303
+ "llm_test_accuracy": {
304
+ "11": 0.971000075340271,
305
+ "13": 0.9520000219345093,
306
+ "14": 0.956000030040741,
307
+ "18": 0.9360000491142273,
308
+ "19": 0.9570000171661377
309
+ },
310
+ "llm_top_1_test_accuracy": {
311
+ "11": 0.561,
312
+ "13": 0.672,
313
+ "14": 0.631,
314
+ "18": 0.7,
315
+ "19": 0.786
316
+ },
317
+ "llm_top_2_test_accuracy": {
318
+ "11": 0.69,
319
+ "13": 0.72,
320
+ "14": 0.677,
321
+ "18": 0.721,
322
+ "19": 0.766
323
+ },
324
+ "llm_top_5_test_accuracy": {
325
+ "11": 0.776,
326
+ "13": 0.742,
327
+ "14": 0.768,
328
+ "18": 0.731,
329
+ "19": 0.841
330
+ },
331
+ "sae_top_1_test_accuracy": {
332
+ "11": 0.728,
333
+ "13": 0.705,
334
+ "14": 0.855,
335
+ "18": 0.726,
336
+ "19": 0.833
337
+ },
338
+ "sae_top_2_test_accuracy": {
339
+ "11": 0.733,
340
+ "13": 0.706,
341
+ "14": 0.866,
342
+ "18": 0.729,
343
+ "19": 0.866
344
+ },
345
+ "sae_top_5_test_accuracy": {
346
+ "11": 0.952,
347
+ "13": 0.818,
348
+ "14": 0.859,
349
+ "18": 0.909,
350
+ "19": 0.88
351
+ }
352
+ },
353
+ "LabHC/bias_in_bios_class_set3_results": {
354
+ "sae_test_accuracy": {
355
+ "20": 0.9540000557899475,
356
+ "21": 0.9230000376701355,
357
+ "22": 0.906000018119812,
358
+ "25": 0.956000030040741,
359
+ "26": 0.8790000677108765
360
+ },
361
+ "llm_test_accuracy": {
362
+ "20": 0.956000030040741,
363
+ "21": 0.9260000586509705,
364
+ "22": 0.9170000553131104,
365
+ "25": 0.9640000462532043,
366
+ "26": 0.8970000147819519
367
+ },
368
+ "llm_top_1_test_accuracy": {
369
+ "20": 0.699,
370
+ "21": 0.772,
371
+ "22": 0.641,
372
+ "25": 0.703,
373
+ "26": 0.644
374
+ },
375
+ "llm_top_2_test_accuracy": {
376
+ "20": 0.811,
377
+ "21": 0.757,
378
+ "22": 0.655,
379
+ "25": 0.762,
380
+ "26": 0.684
381
+ },
382
+ "llm_top_5_test_accuracy": {
383
+ "20": 0.861,
384
+ "21": 0.788,
385
+ "22": 0.712,
386
+ "25": 0.796,
387
+ "26": 0.668
388
+ },
389
+ "sae_top_1_test_accuracy": {
390
+ "20": 0.903,
391
+ "21": 0.751,
392
+ "22": 0.522,
393
+ "25": 0.882,
394
+ "26": 0.708
395
+ },
396
+ "sae_top_2_test_accuracy": {
397
+ "20": 0.913,
398
+ "21": 0.826,
399
+ "22": 0.869,
400
+ "25": 0.89,
401
+ "26": 0.777
402
+ },
403
+ "sae_top_5_test_accuracy": {
404
+ "20": 0.937,
405
+ "21": 0.838,
406
+ "22": 0.878,
407
+ "25": 0.909,
408
+ "26": 0.77
409
+ }
410
+ },
411
+ "canrager/amazon_reviews_mcauley_1and5_results": {
412
+ "sae_test_accuracy": {
413
+ "1": 0.9430000185966492,
414
+ "2": 0.9270000457763672,
415
+ "3": 0.9040000438690186,
416
+ "5": 0.893000066280365,
417
+ "6": 0.8440000414848328
418
+ },
419
+ "llm_test_accuracy": {
420
+ "1": 0.956000030040741,
421
+ "2": 0.937000036239624,
422
+ "3": 0.9160000681877136,
423
+ "5": 0.9170000553131104,
424
+ "6": 0.8720000386238098
425
+ },
426
+ "llm_top_1_test_accuracy": {
427
+ "1": 0.665,
428
+ "2": 0.596,
429
+ "3": 0.599,
430
+ "5": 0.576,
431
+ "6": 0.588
432
+ },
433
+ "llm_top_2_test_accuracy": {
434
+ "1": 0.73,
435
+ "2": 0.632,
436
+ "3": 0.617,
437
+ "5": 0.615,
438
+ "6": 0.609
439
+ },
440
+ "llm_top_5_test_accuracy": {
441
+ "1": 0.758,
442
+ "2": 0.646,
443
+ "3": 0.627,
444
+ "5": 0.646,
445
+ "6": 0.671
446
+ },
447
+ "sae_top_1_test_accuracy": {
448
+ "1": 0.911,
449
+ "2": 0.59,
450
+ "3": 0.543,
451
+ "5": 0.651,
452
+ "6": 0.636
453
+ },
454
+ "sae_top_2_test_accuracy": {
455
+ "1": 0.914,
456
+ "2": 0.65,
457
+ "3": 0.585,
458
+ "5": 0.645,
459
+ "6": 0.677
460
+ },
461
+ "sae_top_5_test_accuracy": {
462
+ "1": 0.936,
463
+ "2": 0.841,
464
+ "3": 0.767,
465
+ "5": 0.81,
466
+ "6": 0.741
467
+ }
468
+ },
469
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
470
+ "sae_test_accuracy": {
471
+ "1.0": 0.9660000205039978,
472
+ "5.0": 0.9630000591278076
473
+ },
474
+ "llm_test_accuracy": {
475
+ "1.0": 0.9820000529289246,
476
+ "5.0": 0.9820000529289246
477
+ },
478
+ "llm_top_1_test_accuracy": {
479
+ "1.0": 0.672,
480
+ "5.0": 0.672
481
+ },
482
+ "llm_top_2_test_accuracy": {
483
+ "1.0": 0.724,
484
+ "5.0": 0.724
485
+ },
486
+ "llm_top_5_test_accuracy": {
487
+ "1.0": 0.766,
488
+ "5.0": 0.766
489
+ },
490
+ "sae_top_1_test_accuracy": {
491
+ "1.0": 0.786,
492
+ "5.0": 0.786
493
+ },
494
+ "sae_top_2_test_accuracy": {
495
+ "1.0": 0.813,
496
+ "5.0": 0.813
497
+ },
498
+ "sae_top_5_test_accuracy": {
499
+ "1.0": 0.81,
500
+ "5.0": 0.81
501
+ }
502
+ },
503
+ "codeparrot/github-code_results": {
504
+ "sae_test_accuracy": {
505
+ "C": 0.9510000348091125,
506
+ "Python": 0.9790000319480896,
507
+ "HTML": 0.9890000224113464,
508
+ "Java": 0.9600000381469727,
509
+ "PHP": 0.9500000476837158
510
+ },
511
+ "llm_test_accuracy": {
512
+ "C": 0.9530000686645508,
513
+ "Python": 0.9860000610351562,
514
+ "HTML": 0.9880000352859497,
515
+ "Java": 0.9640000462532043,
516
+ "PHP": 0.9550000429153442
517
+ },
518
+ "llm_top_1_test_accuracy": {
519
+ "C": 0.657,
520
+ "Python": 0.637,
521
+ "HTML": 0.714,
522
+ "Java": 0.624,
523
+ "PHP": 0.582
524
+ },
525
+ "llm_top_2_test_accuracy": {
526
+ "C": 0.658,
527
+ "Python": 0.664,
528
+ "HTML": 0.801,
529
+ "Java": 0.697,
530
+ "PHP": 0.64
531
+ },
532
+ "llm_top_5_test_accuracy": {
533
+ "C": 0.752,
534
+ "Python": 0.726,
535
+ "HTML": 0.928,
536
+ "Java": 0.728,
537
+ "PHP": 0.694
538
+ },
539
+ "sae_top_1_test_accuracy": {
540
+ "C": 0.609,
541
+ "Python": 0.736,
542
+ "HTML": 0.892,
543
+ "Java": 0.712,
544
+ "PHP": 0.916
545
+ },
546
+ "sae_top_2_test_accuracy": {
547
+ "C": 0.644,
548
+ "Python": 0.743,
549
+ "HTML": 0.889,
550
+ "Java": 0.712,
551
+ "PHP": 0.911
552
+ },
553
+ "sae_top_5_test_accuracy": {
554
+ "C": 0.725,
555
+ "Python": 0.796,
556
+ "HTML": 0.914,
557
+ "Java": 0.794,
558
+ "PHP": 0.914
559
+ }
560
+ },
561
+ "fancyzhx/ag_news_results": {
562
+ "sae_test_accuracy": {
563
+ "0": 0.9320000410079956,
564
+ "1": 0.9810000658035278,
565
+ "2": 0.9310000538825989,
566
+ "3": 0.9390000700950623
567
+ },
568
+ "llm_test_accuracy": {
569
+ "0": 0.9380000233650208,
570
+ "1": 0.9880000352859497,
571
+ "2": 0.9300000667572021,
572
+ "3": 0.9550000429153442
573
+ },
574
+ "llm_top_1_test_accuracy": {
575
+ "0": 0.805,
576
+ "1": 0.67,
577
+ "2": 0.648,
578
+ "3": 0.666
579
+ },
580
+ "llm_top_2_test_accuracy": {
581
+ "0": 0.794,
582
+ "1": 0.795,
583
+ "2": 0.686,
584
+ "3": 0.79
585
+ },
586
+ "llm_top_5_test_accuracy": {
587
+ "0": 0.818,
588
+ "1": 0.867,
589
+ "2": 0.756,
590
+ "3": 0.836
591
+ },
592
+ "sae_top_1_test_accuracy": {
593
+ "0": 0.688,
594
+ "1": 0.934,
595
+ "2": 0.74,
596
+ "3": 0.629
597
+ },
598
+ "sae_top_2_test_accuracy": {
599
+ "0": 0.833,
600
+ "1": 0.93,
601
+ "2": 0.818,
602
+ "3": 0.657
603
+ },
604
+ "sae_top_5_test_accuracy": {
605
+ "0": 0.858,
606
+ "1": 0.934,
607
+ "2": 0.827,
608
+ "3": 0.8
609
+ }
610
+ },
611
+ "Helsinki-NLP/europarl_results": {
612
+ "sae_test_accuracy": {
613
+ "en": 1.0,
614
+ "fr": 0.9950000643730164,
615
+ "de": 0.9970000386238098,
616
+ "es": 0.9930000305175781,
617
+ "nl": 0.9960000514984131
618
+ },
619
+ "llm_test_accuracy": {
620
+ "en": 1.0,
621
+ "fr": 1.0,
622
+ "de": 0.999000072479248,
623
+ "es": 1.0,
624
+ "nl": 0.999000072479248
625
+ },
626
+ "llm_top_1_test_accuracy": {
627
+ "en": 0.748,
628
+ "fr": 0.59,
629
+ "de": 0.754,
630
+ "es": 0.494,
631
+ "nl": 0.662
632
+ },
633
+ "llm_top_2_test_accuracy": {
634
+ "en": 0.822,
635
+ "fr": 0.596,
636
+ "de": 0.831,
637
+ "es": 0.91,
638
+ "nl": 0.746
639
+ },
640
+ "llm_top_5_test_accuracy": {
641
+ "en": 0.887,
642
+ "fr": 0.922,
643
+ "de": 0.909,
644
+ "es": 0.982,
645
+ "nl": 0.865
646
+ },
647
+ "sae_top_1_test_accuracy": {
648
+ "en": 0.709,
649
+ "fr": 0.582,
650
+ "de": 0.906,
651
+ "es": 0.75,
652
+ "nl": 0.889
653
+ },
654
+ "sae_top_2_test_accuracy": {
655
+ "en": 0.97,
656
+ "fr": 0.97,
657
+ "de": 0.909,
658
+ "es": 0.903,
659
+ "nl": 0.998
660
+ },
661
+ "sae_top_5_test_accuracy": {
662
+ "en": 0.999,
663
+ "fr": 0.989,
664
+ "de": 0.943,
665
+ "es": 0.985,
666
+ "nl": 0.999
667
+ }
668
+ }
669
+ }
670
+ }
old_relu_eval_results/tpp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "tpp",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": false,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "1fcdc3f3-fc8d-4e93-bfd2-d7725eac7b72",
73
+ "datetime_epoch_millis": 1738807905009,
74
+ "eval_result_metrics": {
75
+ "tpp_metrics": {
76
+ "tpp_threshold_2_total_metric": 0.005549997091293335,
77
+ "tpp_threshold_2_intended_diff_only": 0.008399999141693114,
78
+ "tpp_threshold_2_unintended_diff_only": 0.00285000205039978,
79
+ "tpp_threshold_5_total_metric": 0.00882500559091568,
80
+ "tpp_threshold_5_intended_diff_only": 0.01300000548362732,
81
+ "tpp_threshold_5_unintended_diff_only": 0.004174999892711639,
82
+ "tpp_threshold_10_total_metric": 0.020424994826316833,
83
+ "tpp_threshold_10_intended_diff_only": 0.02619999647140503,
84
+ "tpp_threshold_10_unintended_diff_only": 0.005775001645088196,
85
+ "tpp_threshold_20_total_metric": 0.030799996852874757,
86
+ "tpp_threshold_20_intended_diff_only": 0.036699998378753665,
87
+ "tpp_threshold_20_unintended_diff_only": 0.005900001525878907,
88
+ "tpp_threshold_50_total_metric": 0.07465000301599503,
89
+ "tpp_threshold_50_intended_diff_only": 0.08340000510215759,
90
+ "tpp_threshold_50_unintended_diff_only": 0.008750002086162566,
91
+ "tpp_threshold_100_total_metric": 0.14097500890493392,
92
+ "tpp_threshold_100_intended_diff_only": 0.15220001339912415,
93
+ "tpp_threshold_100_unintended_diff_only": 0.011225004494190217,
94
+ "tpp_threshold_500_total_metric": 0.41430001705884933,
95
+ "tpp_threshold_500_intended_diff_only": 0.4369000196456909,
96
+ "tpp_threshold_500_unintended_diff_only": 0.022600002586841583
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results",
102
+ "tpp_threshold_2_total_metric": 0.003849995136260986,
103
+ "tpp_threshold_2_intended_diff_only": 0.00559999942779541,
104
+ "tpp_threshold_2_unintended_diff_only": 0.0017500042915344239,
105
+ "tpp_threshold_5_total_metric": 0.006900015473365784,
106
+ "tpp_threshold_5_intended_diff_only": 0.009200012683868409,
107
+ "tpp_threshold_5_unintended_diff_only": 0.0022999972105026247,
108
+ "tpp_threshold_10_total_metric": 0.01449999213218689,
109
+ "tpp_threshold_10_intended_diff_only": 0.017199993133544922,
110
+ "tpp_threshold_10_unintended_diff_only": 0.0027000010013580322,
111
+ "tpp_threshold_20_total_metric": 0.02719999849796295,
112
+ "tpp_threshold_20_intended_diff_only": 0.029799997806549072,
113
+ "tpp_threshold_20_unintended_diff_only": 0.0025999993085861206,
114
+ "tpp_threshold_50_total_metric": 0.05915001034736633,
115
+ "tpp_threshold_50_intended_diff_only": 0.0634000062942505,
116
+ "tpp_threshold_50_unintended_diff_only": 0.004249995946884156,
117
+ "tpp_threshold_100_total_metric": 0.12605001628398896,
118
+ "tpp_threshold_100_intended_diff_only": 0.13100001811981202,
119
+ "tpp_threshold_100_unintended_diff_only": 0.004950001835823059,
120
+ "tpp_threshold_500_total_metric": 0.4442000240087509,
121
+ "tpp_threshold_500_intended_diff_only": 0.45640002489089965,
122
+ "tpp_threshold_500_unintended_diff_only": 0.012200000882148742
123
+ },
124
+ {
125
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results",
126
+ "tpp_threshold_2_total_metric": 0.007249999046325684,
127
+ "tpp_threshold_2_intended_diff_only": 0.01119999885559082,
128
+ "tpp_threshold_2_unintended_diff_only": 0.003949999809265137,
129
+ "tpp_threshold_5_total_metric": 0.010749995708465576,
130
+ "tpp_threshold_5_intended_diff_only": 0.01679999828338623,
131
+ "tpp_threshold_5_unintended_diff_only": 0.006050002574920654,
132
+ "tpp_threshold_10_total_metric": 0.026349997520446776,
133
+ "tpp_threshold_10_intended_diff_only": 0.03519999980926514,
134
+ "tpp_threshold_10_unintended_diff_only": 0.008850002288818359,
135
+ "tpp_threshold_20_total_metric": 0.03439999520778656,
136
+ "tpp_threshold_20_intended_diff_only": 0.04359999895095825,
137
+ "tpp_threshold_20_unintended_diff_only": 0.009200003743171693,
138
+ "tpp_threshold_50_total_metric": 0.09014999568462372,
139
+ "tpp_threshold_50_intended_diff_only": 0.1034000039100647,
140
+ "tpp_threshold_50_unintended_diff_only": 0.013250008225440979,
141
+ "tpp_threshold_100_total_metric": 0.1559000015258789,
142
+ "tpp_threshold_100_intended_diff_only": 0.17340000867843627,
143
+ "tpp_threshold_100_unintended_diff_only": 0.017500007152557374,
144
+ "tpp_threshold_500_total_metric": 0.38440001010894775,
145
+ "tpp_threshold_500_intended_diff_only": 0.4174000144004822,
146
+ "tpp_threshold_500_unintended_diff_only": 0.03300000429153442
147
+ }
148
+ ],
149
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
150
+ "sae_lens_id": "custom_sae",
151
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0",
152
+ "sae_lens_version": "5.4.1",
153
+ "sae_cfg_dict": {
154
+ "model_name": "gemma-2-2b",
155
+ "d_in": 2304,
156
+ "d_sae": 16384,
157
+ "hook_layer": 12,
158
+ "hook_name": "blocks.12.hook_resid_post",
159
+ "context_size": null,
160
+ "hook_head_index": null,
161
+ "architecture": "standard",
162
+ "apply_b_dec_to_input": null,
163
+ "finetuning_scaling_factor": null,
164
+ "activation_fn_str": "",
165
+ "prepend_bos": true,
166
+ "normalize_activations": "none",
167
+ "dtype": "bfloat16",
168
+ "device": "",
169
+ "dataset_path": "",
170
+ "dataset_trust_remote_code": true,
171
+ "seqpos_slice": [
172
+ null
173
+ ],
174
+ "training_tokens": -100000,
175
+ "sae_lens_training_version": null,
176
+ "neuronpedia_id": null
177
+ },
178
+ "eval_result_unstructured": {
179
+ "LabHC/bias_in_bios_class_set1": {
180
+ "0": {
181
+ "tpp_threshold_2_total_metric": 0.005750015377998352,
182
+ "tpp_threshold_2_intended_diff_only": 0.008000016212463379,
183
+ "tpp_threshold_2_unintended_diff_only": 0.002250000834465027,
184
+ "tpp_threshold_5_total_metric": 0.013500049710273743,
185
+ "tpp_threshold_5_intended_diff_only": 0.016000032424926758,
186
+ "tpp_threshold_5_unintended_diff_only": 0.002499982714653015,
187
+ "tpp_threshold_10_total_metric": 0.013500019907951355,
188
+ "tpp_threshold_10_intended_diff_only": 0.018000006675720215,
189
+ "tpp_threshold_10_unintended_diff_only": 0.00449998676776886,
190
+ "tpp_threshold_20_total_metric": 0.03125002980232239,
191
+ "tpp_threshold_20_intended_diff_only": 0.0350000262260437,
192
+ "tpp_threshold_20_unintended_diff_only": 0.0037499964237213135,
193
+ "tpp_threshold_50_total_metric": 0.0712500512599945,
194
+ "tpp_threshold_50_intended_diff_only": 0.07500004768371582,
195
+ "tpp_threshold_50_unintended_diff_only": 0.0037499964237213135,
196
+ "tpp_threshold_100_total_metric": 0.1625000536441803,
197
+ "tpp_threshold_100_intended_diff_only": 0.16700005531311035,
198
+ "tpp_threshold_100_unintended_diff_only": 0.004500001668930054,
199
+ "tpp_threshold_500_total_metric": 0.4487500488758087,
200
+ "tpp_threshold_500_intended_diff_only": 0.45500004291534424,
201
+ "tpp_threshold_500_unintended_diff_only": 0.0062499940395355225
202
+ },
203
+ "1": {
204
+ "tpp_threshold_2_total_metric": 0.0029999762773513794,
205
+ "tpp_threshold_2_intended_diff_only": 0.001999974250793457,
206
+ "tpp_threshold_2_unintended_diff_only": -0.0010000020265579224,
207
+ "tpp_threshold_5_total_metric": -0.0007500052452087402,
208
+ "tpp_threshold_5_intended_diff_only": 0.0,
209
+ "tpp_threshold_5_unintended_diff_only": 0.0007500052452087402,
210
+ "tpp_threshold_10_total_metric": 0.0034999698400497437,
211
+ "tpp_threshold_10_intended_diff_only": 0.0029999613761901855,
212
+ "tpp_threshold_10_unintended_diff_only": -0.0005000084638595581,
213
+ "tpp_threshold_20_total_metric": 0.0104999840259552,
214
+ "tpp_threshold_20_intended_diff_only": 0.014999985694885254,
215
+ "tpp_threshold_20_unintended_diff_only": 0.004500001668930054,
216
+ "tpp_threshold_50_total_metric": 0.03200000524520874,
217
+ "tpp_threshold_50_intended_diff_only": 0.03700000047683716,
218
+ "tpp_threshold_50_unintended_diff_only": 0.004999995231628418,
219
+ "tpp_threshold_100_total_metric": 0.07999999821186066,
220
+ "tpp_threshold_100_intended_diff_only": 0.08700001239776611,
221
+ "tpp_threshold_100_unintended_diff_only": 0.0070000141859054565,
222
+ "tpp_threshold_500_total_metric": 0.43025001883506775,
223
+ "tpp_threshold_500_intended_diff_only": 0.4390000104904175,
224
+ "tpp_threshold_500_unintended_diff_only": 0.008749991655349731
225
+ },
226
+ "2": {
227
+ "tpp_threshold_2_total_metric": 0.0012499988079071045,
228
+ "tpp_threshold_2_intended_diff_only": 0.0040000081062316895,
229
+ "tpp_threshold_2_unintended_diff_only": 0.002750009298324585,
230
+ "tpp_threshold_5_total_metric": 0.008750006556510925,
231
+ "tpp_threshold_5_intended_diff_only": 0.013000011444091797,
232
+ "tpp_threshold_5_unintended_diff_only": 0.004250004887580872,
233
+ "tpp_threshold_10_total_metric": 0.01950003206729889,
234
+ "tpp_threshold_10_intended_diff_only": 0.021000027656555176,
235
+ "tpp_threshold_10_unintended_diff_only": 0.0014999955892562866,
236
+ "tpp_threshold_20_total_metric": 0.028249993920326233,
237
+ "tpp_threshold_20_intended_diff_only": 0.02799999713897705,
238
+ "tpp_threshold_20_unintended_diff_only": -0.00024999678134918213,
239
+ "tpp_threshold_50_total_metric": 0.03624999523162842,
240
+ "tpp_threshold_50_intended_diff_only": 0.03799998760223389,
241
+ "tpp_threshold_50_unintended_diff_only": 0.0017499923706054688,
242
+ "tpp_threshold_100_total_metric": 0.08100000023841858,
243
+ "tpp_threshold_100_intended_diff_only": 0.08300000429153442,
244
+ "tpp_threshold_100_unintended_diff_only": 0.0020000040531158447,
245
+ "tpp_threshold_500_total_metric": 0.4345000237226486,
246
+ "tpp_threshold_500_intended_diff_only": 0.44700002670288086,
247
+ "tpp_threshold_500_unintended_diff_only": 0.012500002980232239
248
+ },
249
+ "6": {
250
+ "tpp_threshold_2_total_metric": 0.0015000104904174805,
251
+ "tpp_threshold_2_intended_diff_only": 0.003000020980834961,
252
+ "tpp_threshold_2_unintended_diff_only": 0.0015000104904174805,
253
+ "tpp_threshold_5_total_metric": 0.0025000423192977905,
254
+ "tpp_threshold_5_intended_diff_only": 0.0020000338554382324,
255
+ "tpp_threshold_5_unintended_diff_only": -0.0005000084638595581,
256
+ "tpp_threshold_10_total_metric": 0.002499982714653015,
257
+ "tpp_threshold_10_intended_diff_only": 0.004999995231628418,
258
+ "tpp_threshold_10_unintended_diff_only": 0.002500012516975403,
259
+ "tpp_threshold_20_total_metric": 0.00475001335144043,
260
+ "tpp_threshold_20_intended_diff_only": 0.0040000081062316895,
261
+ "tpp_threshold_20_unintended_diff_only": -0.0007500052452087402,
262
+ "tpp_threshold_50_total_metric": 0.00950002670288086,
263
+ "tpp_threshold_50_intended_diff_only": 0.012000024318695068,
264
+ "tpp_threshold_50_unintended_diff_only": 0.002499997615814209,
265
+ "tpp_threshold_100_total_metric": 0.015250012278556824,
266
+ "tpp_threshold_100_intended_diff_only": 0.018000006675720215,
267
+ "tpp_threshold_100_unintended_diff_only": 0.002749994397163391,
268
+ "tpp_threshold_500_total_metric": 0.4517500102519989,
269
+ "tpp_threshold_500_intended_diff_only": 0.4620000123977661,
270
+ "tpp_threshold_500_unintended_diff_only": 0.010250002145767212
271
+ },
272
+ "9": {
273
+ "tpp_threshold_2_total_metric": 0.007749974727630615,
274
+ "tpp_threshold_2_intended_diff_only": 0.010999977588653564,
275
+ "tpp_threshold_2_unintended_diff_only": 0.0032500028610229492,
276
+ "tpp_threshold_5_total_metric": 0.0104999840259552,
277
+ "tpp_threshold_5_intended_diff_only": 0.014999985694885254,
278
+ "tpp_threshold_5_unintended_diff_only": 0.004500001668930054,
279
+ "tpp_threshold_10_total_metric": 0.033499956130981445,
280
+ "tpp_threshold_10_intended_diff_only": 0.038999974727630615,
281
+ "tpp_threshold_10_unintended_diff_only": 0.00550001859664917,
282
+ "tpp_threshold_20_total_metric": 0.06124997138977051,
283
+ "tpp_threshold_20_intended_diff_only": 0.06699997186660767,
284
+ "tpp_threshold_20_unintended_diff_only": 0.005750000476837158,
285
+ "tpp_threshold_50_total_metric": 0.14674997329711914,
286
+ "tpp_threshold_50_intended_diff_only": 0.1549999713897705,
287
+ "tpp_threshold_50_unintended_diff_only": 0.008249998092651367,
288
+ "tpp_threshold_100_total_metric": 0.2915000170469284,
289
+ "tpp_threshold_100_intended_diff_only": 0.30000001192092896,
290
+ "tpp_threshold_100_unintended_diff_only": 0.00849999487400055,
291
+ "tpp_threshold_500_total_metric": 0.4557500183582306,
292
+ "tpp_threshold_500_intended_diff_only": 0.4790000319480896,
293
+ "tpp_threshold_500_unintended_diff_only": 0.02325001358985901
294
+ }
295
+ },
296
+ "canrager/amazon_reviews_mcauley_1and5": {
297
+ "1": {
298
+ "tpp_threshold_2_total_metric": 0.012250036001205444,
299
+ "tpp_threshold_2_intended_diff_only": 0.016000032424926758,
300
+ "tpp_threshold_2_unintended_diff_only": 0.0037499964237213135,
301
+ "tpp_threshold_5_total_metric": 0.011750057339668274,
302
+ "tpp_threshold_5_intended_diff_only": 0.01500004529953003,
303
+ "tpp_threshold_5_unintended_diff_only": 0.0032499879598617554,
304
+ "tpp_threshold_10_total_metric": 0.012500032782554626,
305
+ "tpp_threshold_10_intended_diff_only": 0.021000027656555176,
306
+ "tpp_threshold_10_unintended_diff_only": 0.00849999487400055,
307
+ "tpp_threshold_20_total_metric": 0.022000029683113098,
308
+ "tpp_threshold_20_intended_diff_only": 0.03100001811981201,
309
+ "tpp_threshold_20_unintended_diff_only": 0.008999988436698914,
310
+ "tpp_threshold_50_total_metric": 0.06149999797344208,
311
+ "tpp_threshold_50_intended_diff_only": 0.0690000057220459,
312
+ "tpp_threshold_50_unintended_diff_only": 0.007500007748603821,
313
+ "tpp_threshold_100_total_metric": 0.10575002431869507,
314
+ "tpp_threshold_100_intended_diff_only": 0.11800003051757812,
315
+ "tpp_threshold_100_unintended_diff_only": 0.012250006198883057,
316
+ "tpp_threshold_500_total_metric": 0.41975003480911255,
317
+ "tpp_threshold_500_intended_diff_only": 0.44200003147125244,
318
+ "tpp_threshold_500_unintended_diff_only": 0.022249996662139893
319
+ },
320
+ "2": {
321
+ "tpp_threshold_2_total_metric": 0.0007499605417251587,
322
+ "tpp_threshold_2_intended_diff_only": 0.001999974250793457,
323
+ "tpp_threshold_2_unintended_diff_only": 0.0012500137090682983,
324
+ "tpp_threshold_5_total_metric": -0.013000041246414185,
325
+ "tpp_threshold_5_intended_diff_only": 0.0029999613761901855,
326
+ "tpp_threshold_5_unintended_diff_only": 0.01600000262260437,
327
+ "tpp_threshold_10_total_metric": 0.0022499561309814453,
328
+ "tpp_threshold_10_intended_diff_only": 0.01699995994567871,
329
+ "tpp_threshold_10_unintended_diff_only": 0.014750003814697266,
330
+ "tpp_threshold_20_total_metric": 0.0157499760389328,
331
+ "tpp_threshold_20_intended_diff_only": 0.02399998903274536,
332
+ "tpp_threshold_20_unintended_diff_only": 0.008250012993812561,
333
+ "tpp_threshold_50_total_metric": 0.05649995803833008,
334
+ "tpp_threshold_50_intended_diff_only": 0.08099997043609619,
335
+ "tpp_threshold_50_unintended_diff_only": 0.024500012397766113,
336
+ "tpp_threshold_100_total_metric": 0.12999998033046722,
337
+ "tpp_threshold_100_intended_diff_only": 0.15799999237060547,
338
+ "tpp_threshold_100_unintended_diff_only": 0.028000012040138245,
339
+ "tpp_threshold_500_total_metric": 0.3737499713897705,
340
+ "tpp_threshold_500_intended_diff_only": 0.4269999861717224,
341
+ "tpp_threshold_500_unintended_diff_only": 0.053250014781951904
342
+ },
343
+ "3": {
344
+ "tpp_threshold_2_total_metric": -0.0070000141859054565,
345
+ "tpp_threshold_2_intended_diff_only": -0.0040000081062316895,
346
+ "tpp_threshold_2_unintended_diff_only": 0.003000006079673767,
347
+ "tpp_threshold_5_total_metric": 0.004249989986419678,
348
+ "tpp_threshold_5_intended_diff_only": 0.0040000081062316895,
349
+ "tpp_threshold_5_unintended_diff_only": -0.0002499818801879883,
350
+ "tpp_threshold_10_total_metric": 0.024749979376792908,
351
+ "tpp_threshold_10_intended_diff_only": 0.02899998426437378,
352
+ "tpp_threshold_10_unintended_diff_only": 0.004250004887580872,
353
+ "tpp_threshold_20_total_metric": 0.014499977231025696,
354
+ "tpp_threshold_20_intended_diff_only": 0.02399998903274536,
355
+ "tpp_threshold_20_unintended_diff_only": 0.009500011801719666,
356
+ "tpp_threshold_50_total_metric": 0.07250000536441803,
357
+ "tpp_threshold_50_intended_diff_only": 0.0820000171661377,
358
+ "tpp_threshold_50_unintended_diff_only": 0.009500011801719666,
359
+ "tpp_threshold_100_total_metric": 0.12725001573562622,
360
+ "tpp_threshold_100_intended_diff_only": 0.1420000195503235,
361
+ "tpp_threshold_100_unintended_diff_only": 0.014750003814697266,
362
+ "tpp_threshold_500_total_metric": 0.3969999998807907,
363
+ "tpp_threshold_500_intended_diff_only": 0.4259999990463257,
364
+ "tpp_threshold_500_unintended_diff_only": 0.028999999165534973
365
+ },
366
+ "5": {
367
+ "tpp_threshold_2_total_metric": -0.006999999284744263,
368
+ "tpp_threshold_2_intended_diff_only": -0.0040000081062316895,
369
+ "tpp_threshold_2_unintended_diff_only": 0.0029999911785125732,
370
+ "tpp_threshold_5_total_metric": -0.0015000104904174805,
371
+ "tpp_threshold_5_intended_diff_only": 0.004999995231628418,
372
+ "tpp_threshold_5_unintended_diff_only": 0.0065000057220458984,
373
+ "tpp_threshold_10_total_metric": 0.007250010967254639,
374
+ "tpp_threshold_10_intended_diff_only": 0.017000019550323486,
375
+ "tpp_threshold_10_unintended_diff_only": 0.009750008583068848,
376
+ "tpp_threshold_20_total_metric": 0.02724999189376831,
377
+ "tpp_threshold_20_intended_diff_only": 0.03799998760223389,
378
+ "tpp_threshold_20_unintended_diff_only": 0.010749995708465576,
379
+ "tpp_threshold_50_total_metric": 0.07350002229213715,
380
+ "tpp_threshold_50_intended_diff_only": 0.08600002527236938,
381
+ "tpp_threshold_50_unintended_diff_only": 0.012500002980232239,
382
+ "tpp_threshold_100_total_metric": 0.13650000095367432,
383
+ "tpp_threshold_100_intended_diff_only": 0.15700000524520874,
384
+ "tpp_threshold_100_unintended_diff_only": 0.020500004291534424,
385
+ "tpp_threshold_500_total_metric": 0.3790000081062317,
386
+ "tpp_threshold_500_intended_diff_only": 0.41600000858306885,
387
+ "tpp_threshold_500_unintended_diff_only": 0.03700000047683716
388
+ },
389
+ "6": {
390
+ "tpp_threshold_2_total_metric": 0.037250012159347534,
391
+ "tpp_threshold_2_intended_diff_only": 0.046000003814697266,
392
+ "tpp_threshold_2_unintended_diff_only": 0.008749991655349731,
393
+ "tpp_threshold_5_total_metric": 0.052249982953071594,
394
+ "tpp_threshold_5_intended_diff_only": 0.05699998140335083,
395
+ "tpp_threshold_5_unintended_diff_only": 0.004749998450279236,
396
+ "tpp_threshold_10_total_metric": 0.08500000834465027,
397
+ "tpp_threshold_10_intended_diff_only": 0.09200000762939453,
398
+ "tpp_threshold_10_unintended_diff_only": 0.006999999284744263,
399
+ "tpp_threshold_20_total_metric": 0.0925000011920929,
400
+ "tpp_threshold_20_intended_diff_only": 0.10100001096725464,
401
+ "tpp_threshold_20_unintended_diff_only": 0.008500009775161743,
402
+ "tpp_threshold_50_total_metric": 0.18674999475479126,
403
+ "tpp_threshold_50_intended_diff_only": 0.19900000095367432,
404
+ "tpp_threshold_50_unintended_diff_only": 0.012250006198883057,
405
+ "tpp_threshold_100_total_metric": 0.2799999862909317,
406
+ "tpp_threshold_100_intended_diff_only": 0.2919999957084656,
407
+ "tpp_threshold_100_unintended_diff_only": 0.012000009417533875,
408
+ "tpp_threshold_500_total_metric": 0.3525000363588333,
409
+ "tpp_threshold_500_intended_diff_only": 0.3760000467300415,
410
+ "tpp_threshold_500_unintended_diff_only": 0.02350001037120819
411
+ }
412
+ }
413
+ }
414
+ }
old_relu_eval_results/tpp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "tpp",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": false,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "dca94803-7711-4df2-8ffe-a1d99bedfde5",
73
+ "datetime_epoch_millis": 1738808022044,
74
+ "eval_result_metrics": {
75
+ "tpp_metrics": {
76
+ "tpp_threshold_2_total_metric": 0.0034999996423721313,
77
+ "tpp_threshold_2_intended_diff_only": 0.00690000057220459,
78
+ "tpp_threshold_2_unintended_diff_only": 0.0034000009298324587,
79
+ "tpp_threshold_5_total_metric": 0.007625006139278412,
80
+ "tpp_threshold_5_intended_diff_only": 0.011600005626678466,
81
+ "tpp_threshold_5_unintended_diff_only": 0.003974999487400055,
82
+ "tpp_threshold_10_total_metric": 0.02060000002384186,
83
+ "tpp_threshold_10_intended_diff_only": 0.026100003719329835,
84
+ "tpp_threshold_10_unintended_diff_only": 0.005500003695487976,
85
+ "tpp_threshold_20_total_metric": 0.03362499922513962,
86
+ "tpp_threshold_20_intended_diff_only": 0.038699996471405027,
87
+ "tpp_threshold_20_unintended_diff_only": 0.005074997246265411,
88
+ "tpp_threshold_50_total_metric": 0.072200009226799,
89
+ "tpp_threshold_50_intended_diff_only": 0.08010000586509705,
90
+ "tpp_threshold_50_unintended_diff_only": 0.007899996638298035,
91
+ "tpp_threshold_100_total_metric": 0.1294749990105629,
92
+ "tpp_threshold_100_intended_diff_only": 0.14089999794960023,
93
+ "tpp_threshold_100_unintended_diff_only": 0.011424998939037322,
94
+ "tpp_threshold_500_total_metric": 0.40307500660419465,
95
+ "tpp_threshold_500_intended_diff_only": 0.4214000105857849,
96
+ "tpp_threshold_500_unintended_diff_only": 0.01832500398159027
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results",
102
+ "tpp_threshold_2_total_metric": 0.0038499802350997923,
103
+ "tpp_threshold_2_intended_diff_only": 0.005799984931945801,
104
+ "tpp_threshold_2_unintended_diff_only": 0.0019500046968460084,
105
+ "tpp_threshold_5_total_metric": 0.006150004267692566,
106
+ "tpp_threshold_5_intended_diff_only": 0.008399999141693116,
107
+ "tpp_threshold_5_unintended_diff_only": 0.0022499948740005494,
108
+ "tpp_threshold_10_total_metric": 0.017299994826316833,
109
+ "tpp_threshold_10_intended_diff_only": 0.019599997997283937,
110
+ "tpp_threshold_10_unintended_diff_only": 0.002300003170967102,
111
+ "tpp_threshold_20_total_metric": 0.03334999978542328,
112
+ "tpp_threshold_20_intended_diff_only": 0.035399997234344484,
113
+ "tpp_threshold_20_unintended_diff_only": 0.0020499974489212036,
114
+ "tpp_threshold_50_total_metric": 0.05845000743865967,
115
+ "tpp_threshold_50_intended_diff_only": 0.062400007247924806,
116
+ "tpp_threshold_50_unintended_diff_only": 0.003949999809265137,
117
+ "tpp_threshold_100_total_metric": 0.12464999556541442,
118
+ "tpp_threshold_100_intended_diff_only": 0.12999999523162842,
119
+ "tpp_threshold_100_unintended_diff_only": 0.00534999966621399,
120
+ "tpp_threshold_500_total_metric": 0.43650000989437104,
121
+ "tpp_threshold_500_intended_diff_only": 0.44520001411437987,
122
+ "tpp_threshold_500_unintended_diff_only": 0.00870000422000885
123
+ },
124
+ {
125
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results",
126
+ "tpp_threshold_2_total_metric": 0.0031500190496444704,
127
+ "tpp_threshold_2_intended_diff_only": 0.008000016212463379,
128
+ "tpp_threshold_2_unintended_diff_only": 0.0048499971628189085,
129
+ "tpp_threshold_5_total_metric": 0.009100008010864257,
130
+ "tpp_threshold_5_intended_diff_only": 0.014800012111663818,
131
+ "tpp_threshold_5_unintended_diff_only": 0.00570000410079956,
132
+ "tpp_threshold_10_total_metric": 0.02390000522136688,
133
+ "tpp_threshold_10_intended_diff_only": 0.032600009441375734,
134
+ "tpp_threshold_10_unintended_diff_only": 0.00870000422000885,
135
+ "tpp_threshold_20_total_metric": 0.033899998664855956,
136
+ "tpp_threshold_20_intended_diff_only": 0.041999995708465576,
137
+ "tpp_threshold_20_unintended_diff_only": 0.008099997043609619,
138
+ "tpp_threshold_50_total_metric": 0.08595001101493835,
139
+ "tpp_threshold_50_intended_diff_only": 0.09780000448226929,
140
+ "tpp_threshold_50_unintended_diff_only": 0.011849993467330932,
141
+ "tpp_threshold_100_total_metric": 0.13430000245571136,
142
+ "tpp_threshold_100_intended_diff_only": 0.15180000066757202,
143
+ "tpp_threshold_100_unintended_diff_only": 0.017499998211860657,
144
+ "tpp_threshold_500_total_metric": 0.36965000331401826,
145
+ "tpp_threshold_500_intended_diff_only": 0.39760000705718995,
146
+ "tpp_threshold_500_unintended_diff_only": 0.027950003743171692
147
+ }
148
+ ],
149
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
150
+ "sae_lens_id": "custom_sae",
151
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1",
152
+ "sae_lens_version": "5.4.1",
153
+ "sae_cfg_dict": {
154
+ "model_name": "gemma-2-2b",
155
+ "d_in": 2304,
156
+ "d_sae": 16384,
157
+ "hook_layer": 12,
158
+ "hook_name": "blocks.12.hook_resid_post",
159
+ "context_size": null,
160
+ "hook_head_index": null,
161
+ "architecture": "standard",
162
+ "apply_b_dec_to_input": null,
163
+ "finetuning_scaling_factor": null,
164
+ "activation_fn_str": "",
165
+ "prepend_bos": true,
166
+ "normalize_activations": "none",
167
+ "dtype": "bfloat16",
168
+ "device": "",
169
+ "dataset_path": "",
170
+ "dataset_trust_remote_code": true,
171
+ "seqpos_slice": [
172
+ null
173
+ ],
174
+ "training_tokens": -100000,
175
+ "sae_lens_training_version": null,
176
+ "neuronpedia_id": null
177
+ },
178
+ "eval_result_unstructured": {
179
+ "LabHC/bias_in_bios_class_set1": {
180
+ "0": {
181
+ "tpp_threshold_2_total_metric": 0.005000025033950806,
182
+ "tpp_threshold_2_intended_diff_only": 0.00700002908706665,
183
+ "tpp_threshold_2_unintended_diff_only": 0.0020000040531158447,
184
+ "tpp_threshold_5_total_metric": 0.010250017046928406,
185
+ "tpp_threshold_5_intended_diff_only": 0.013000011444091797,
186
+ "tpp_threshold_5_unintended_diff_only": 0.002749994397163391,
187
+ "tpp_threshold_10_total_metric": 0.017250031232833862,
188
+ "tpp_threshold_10_intended_diff_only": 0.020000040531158447,
189
+ "tpp_threshold_10_unintended_diff_only": 0.002750009298324585,
190
+ "tpp_threshold_20_total_metric": 0.025500014424324036,
191
+ "tpp_threshold_20_intended_diff_only": 0.02799999713897705,
192
+ "tpp_threshold_20_unintended_diff_only": 0.002499982714653015,
193
+ "tpp_threshold_50_total_metric": 0.053750038146972656,
194
+ "tpp_threshold_50_intended_diff_only": 0.057000041007995605,
195
+ "tpp_threshold_50_unintended_diff_only": 0.0032500028610229492,
196
+ "tpp_threshold_100_total_metric": 0.1327500194311142,
197
+ "tpp_threshold_100_intended_diff_only": 0.1380000114440918,
198
+ "tpp_threshold_100_unintended_diff_only": 0.0052499920129776,
199
+ "tpp_threshold_500_total_metric": 0.43675003945827484,
200
+ "tpp_threshold_500_intended_diff_only": 0.44200003147125244,
201
+ "tpp_threshold_500_unintended_diff_only": 0.0052499920129776
202
+ },
203
+ "1": {
204
+ "tpp_threshold_2_total_metric": 0.0037499666213989258,
205
+ "tpp_threshold_2_intended_diff_only": 0.0029999613761901855,
206
+ "tpp_threshold_2_unintended_diff_only": -0.0007500052452087402,
207
+ "tpp_threshold_5_total_metric": -0.0012499988079071045,
208
+ "tpp_threshold_5_intended_diff_only": 0.0,
209
+ "tpp_threshold_5_unintended_diff_only": 0.0012499988079071045,
210
+ "tpp_threshold_10_total_metric": 0.005000010132789612,
211
+ "tpp_threshold_10_intended_diff_only": 0.0040000081062316895,
212
+ "tpp_threshold_10_unintended_diff_only": -0.0010000020265579224,
213
+ "tpp_threshold_20_total_metric": 0.017249956727027893,
214
+ "tpp_threshold_20_intended_diff_only": 0.02199995517730713,
215
+ "tpp_threshold_20_unintended_diff_only": 0.004749998450279236,
216
+ "tpp_threshold_50_total_metric": 0.03124995529651642,
217
+ "tpp_threshold_50_intended_diff_only": 0.035999953746795654,
218
+ "tpp_threshold_50_unintended_diff_only": 0.004749998450279236,
219
+ "tpp_threshold_100_total_metric": 0.07649995386600494,
220
+ "tpp_threshold_100_intended_diff_only": 0.08199995756149292,
221
+ "tpp_threshold_100_unintended_diff_only": 0.005500003695487976,
222
+ "tpp_threshold_500_total_metric": 0.40824997425079346,
223
+ "tpp_threshold_500_intended_diff_only": 0.4169999957084656,
224
+ "tpp_threshold_500_unintended_diff_only": 0.00875002145767212
225
+ },
226
+ "2": {
227
+ "tpp_threshold_2_total_metric": 0.003749951720237732,
228
+ "tpp_threshold_2_intended_diff_only": 0.006999969482421875,
229
+ "tpp_threshold_2_unintended_diff_only": 0.003250017762184143,
230
+ "tpp_threshold_5_total_metric": 0.010749995708465576,
231
+ "tpp_threshold_5_intended_diff_only": 0.014999985694885254,
232
+ "tpp_threshold_5_unintended_diff_only": 0.004249989986419678,
233
+ "tpp_threshold_10_total_metric": 0.022749975323677063,
234
+ "tpp_threshold_10_intended_diff_only": 0.02499997615814209,
235
+ "tpp_threshold_10_unintended_diff_only": 0.002250000834465027,
236
+ "tpp_threshold_20_total_metric": 0.0339999794960022,
237
+ "tpp_threshold_20_intended_diff_only": 0.03299999237060547,
238
+ "tpp_threshold_20_unintended_diff_only": -0.0009999871253967285,
239
+ "tpp_threshold_50_total_metric": 0.03925001621246338,
240
+ "tpp_threshold_50_intended_diff_only": 0.04000002145767212,
241
+ "tpp_threshold_50_unintended_diff_only": 0.0007500052452087402,
242
+ "tpp_threshold_100_total_metric": 0.12300001084804535,
243
+ "tpp_threshold_100_intended_diff_only": 0.12400001287460327,
244
+ "tpp_threshold_100_unintended_diff_only": 0.0010000020265579224,
245
+ "tpp_threshold_500_total_metric": 0.43324999511241913,
246
+ "tpp_threshold_500_intended_diff_only": 0.4390000104904175,
247
+ "tpp_threshold_500_unintended_diff_only": 0.005750015377998352
248
+ },
249
+ "6": {
250
+ "tpp_threshold_2_total_metric": 0.002499997615814209,
251
+ "tpp_threshold_2_intended_diff_only": 0.0040000081062316895,
252
+ "tpp_threshold_2_unintended_diff_only": 0.0015000104904174805,
253
+ "tpp_threshold_5_total_metric": 0.0030000507831573486,
254
+ "tpp_threshold_5_intended_diff_only": 0.0020000338554382324,
255
+ "tpp_threshold_5_unintended_diff_only": -0.0010000169277191162,
256
+ "tpp_threshold_10_total_metric": 0.0037499815225601196,
257
+ "tpp_threshold_10_intended_diff_only": 0.0059999823570251465,
258
+ "tpp_threshold_10_unintended_diff_only": 0.002250000834465027,
259
+ "tpp_threshold_20_total_metric": 0.008750036358833313,
260
+ "tpp_threshold_20_intended_diff_only": 0.00700002908706665,
261
+ "tpp_threshold_20_unintended_diff_only": -0.0017500072717666626,
262
+ "tpp_threshold_50_total_metric": 0.01150001585483551,
263
+ "tpp_threshold_50_intended_diff_only": 0.013000011444091797,
264
+ "tpp_threshold_50_unintended_diff_only": 0.0014999955892562866,
265
+ "tpp_threshold_100_total_metric": 0.013750031590461731,
266
+ "tpp_threshold_100_intended_diff_only": 0.017000019550323486,
267
+ "tpp_threshold_100_unintended_diff_only": 0.0032499879598617554,
268
+ "tpp_threshold_500_total_metric": 0.4442500174045563,
269
+ "tpp_threshold_500_intended_diff_only": 0.4490000009536743,
270
+ "tpp_threshold_500_unintended_diff_only": 0.004749983549118042
271
+ },
272
+ "9": {
273
+ "tpp_threshold_2_total_metric": 0.00424996018409729,
274
+ "tpp_threshold_2_intended_diff_only": 0.007999956607818604,
275
+ "tpp_threshold_2_unintended_diff_only": 0.0037499964237213135,
276
+ "tpp_threshold_5_total_metric": 0.007999956607818604,
277
+ "tpp_threshold_5_intended_diff_only": 0.011999964714050293,
278
+ "tpp_threshold_5_unintended_diff_only": 0.0040000081062316895,
279
+ "tpp_threshold_10_total_metric": 0.03774997591972351,
280
+ "tpp_threshold_10_intended_diff_only": 0.042999982833862305,
281
+ "tpp_threshold_10_unintended_diff_only": 0.005250006914138794,
282
+ "tpp_threshold_20_total_metric": 0.08125001192092896,
283
+ "tpp_threshold_20_intended_diff_only": 0.08700001239776611,
284
+ "tpp_threshold_20_unintended_diff_only": 0.005750000476837158,
285
+ "tpp_threshold_50_total_metric": 0.15650001168251038,
286
+ "tpp_threshold_50_intended_diff_only": 0.16600000858306885,
287
+ "tpp_threshold_50_unintended_diff_only": 0.009499996900558472,
288
+ "tpp_threshold_100_total_metric": 0.2772499620914459,
289
+ "tpp_threshold_100_intended_diff_only": 0.2889999747276306,
290
+ "tpp_threshold_100_unintended_diff_only": 0.011750012636184692,
291
+ "tpp_threshold_500_total_metric": 0.46000002324581146,
292
+ "tpp_threshold_500_intended_diff_only": 0.4790000319480896,
293
+ "tpp_threshold_500_unintended_diff_only": 0.019000008702278137
294
+ }
295
+ },
296
+ "canrager/amazon_reviews_mcauley_1and5": {
297
+ "1": {
298
+ "tpp_threshold_2_total_metric": 0.009750023484230042,
299
+ "tpp_threshold_2_intended_diff_only": 0.013000011444091797,
300
+ "tpp_threshold_2_unintended_diff_only": 0.0032499879598617554,
301
+ "tpp_threshold_5_total_metric": 0.007750034332275391,
302
+ "tpp_threshold_5_intended_diff_only": 0.010000050067901611,
303
+ "tpp_threshold_5_unintended_diff_only": 0.0022500157356262207,
304
+ "tpp_threshold_10_total_metric": 0.012500032782554626,
305
+ "tpp_threshold_10_intended_diff_only": 0.021000027656555176,
306
+ "tpp_threshold_10_unintended_diff_only": 0.00849999487400055,
307
+ "tpp_threshold_20_total_metric": 0.014500007033348083,
308
+ "tpp_threshold_20_intended_diff_only": 0.023000001907348633,
309
+ "tpp_threshold_20_unintended_diff_only": 0.00849999487400055,
310
+ "tpp_threshold_50_total_metric": 0.05975005030632019,
311
+ "tpp_threshold_50_intended_diff_only": 0.06600004434585571,
312
+ "tpp_threshold_50_unintended_diff_only": 0.0062499940395355225,
313
+ "tpp_threshold_100_total_metric": 0.09250001609325409,
314
+ "tpp_threshold_100_intended_diff_only": 0.10600000619888306,
315
+ "tpp_threshold_100_unintended_diff_only": 0.013499990105628967,
316
+ "tpp_threshold_500_total_metric": 0.3750000149011612,
317
+ "tpp_threshold_500_intended_diff_only": 0.3970000147819519,
318
+ "tpp_threshold_500_unintended_diff_only": 0.02199999988079071
319
+ },
320
+ "2": {
321
+ "tpp_threshold_2_total_metric": 0.0045000165700912476,
322
+ "tpp_threshold_2_intended_diff_only": 0.009000003337860107,
323
+ "tpp_threshold_2_unintended_diff_only": 0.00449998676776886,
324
+ "tpp_threshold_5_total_metric": -0.002500012516975403,
325
+ "tpp_threshold_5_intended_diff_only": 0.013000011444091797,
326
+ "tpp_threshold_5_unintended_diff_only": 0.0155000239610672,
327
+ "tpp_threshold_10_total_metric": 0.010499954223632812,
328
+ "tpp_threshold_10_intended_diff_only": 0.02199995517730713,
329
+ "tpp_threshold_10_unintended_diff_only": 0.011500000953674316,
330
+ "tpp_threshold_20_total_metric": 0.031749993562698364,
331
+ "tpp_threshold_20_intended_diff_only": 0.03700000047683716,
332
+ "tpp_threshold_20_unintended_diff_only": 0.005250006914138794,
333
+ "tpp_threshold_50_total_metric": 0.06824998557567596,
334
+ "tpp_threshold_50_intended_diff_only": 0.08899998664855957,
335
+ "tpp_threshold_50_unintended_diff_only": 0.020750001072883606,
336
+ "tpp_threshold_100_total_metric": 0.11849997937679291,
337
+ "tpp_threshold_100_intended_diff_only": 0.14499998092651367,
338
+ "tpp_threshold_100_unintended_diff_only": 0.026500001549720764,
339
+ "tpp_threshold_500_total_metric": 0.37724997103214264,
340
+ "tpp_threshold_500_intended_diff_only": 0.4179999828338623,
341
+ "tpp_threshold_500_unintended_diff_only": 0.040750011801719666
342
+ },
343
+ "3": {
344
+ "tpp_threshold_2_total_metric": -0.008250012993812561,
345
+ "tpp_threshold_2_intended_diff_only": -0.004999995231628418,
346
+ "tpp_threshold_2_unintended_diff_only": 0.003250017762184143,
347
+ "tpp_threshold_5_total_metric": -0.002750024199485779,
348
+ "tpp_threshold_5_intended_diff_only": -0.0020000338554382324,
349
+ "tpp_threshold_5_unintended_diff_only": 0.0007499903440475464,
350
+ "tpp_threshold_10_total_metric": 0.018250003457069397,
351
+ "tpp_threshold_10_intended_diff_only": 0.022000014781951904,
352
+ "tpp_threshold_10_unintended_diff_only": 0.0037500113248825073,
353
+ "tpp_threshold_20_total_metric": 0.010499969124794006,
354
+ "tpp_threshold_20_intended_diff_only": 0.01699995994567871,
355
+ "tpp_threshold_20_unintended_diff_only": 0.006499990820884705,
356
+ "tpp_threshold_50_total_metric": 0.06024998426437378,
357
+ "tpp_threshold_50_intended_diff_only": 0.06699997186660767,
358
+ "tpp_threshold_50_unintended_diff_only": 0.006749987602233887,
359
+ "tpp_threshold_100_total_metric": 0.11424997448921204,
360
+ "tpp_threshold_100_intended_diff_only": 0.12699997425079346,
361
+ "tpp_threshold_100_unintended_diff_only": 0.012749999761581421,
362
+ "tpp_threshold_500_total_metric": 0.3764999955892563,
363
+ "tpp_threshold_500_intended_diff_only": 0.39800000190734863,
364
+ "tpp_threshold_500_unintended_diff_only": 0.021500006318092346
365
+ },
366
+ "5": {
367
+ "tpp_threshold_2_total_metric": -0.0057499706745147705,
368
+ "tpp_threshold_2_intended_diff_only": -0.001999974250793457,
369
+ "tpp_threshold_2_unintended_diff_only": 0.0037499964237213135,
370
+ "tpp_threshold_5_total_metric": 0.009500041604042053,
371
+ "tpp_threshold_5_intended_diff_only": 0.016000032424926758,
372
+ "tpp_threshold_5_unintended_diff_only": 0.006499990820884705,
373
+ "tpp_threshold_10_total_metric": 0.012000009417533875,
374
+ "tpp_threshold_10_intended_diff_only": 0.022000014781951904,
375
+ "tpp_threshold_10_unintended_diff_only": 0.01000000536441803,
376
+ "tpp_threshold_20_total_metric": 0.027250006794929504,
377
+ "tpp_threshold_20_intended_diff_only": 0.03700000047683716,
378
+ "tpp_threshold_20_unintended_diff_only": 0.009749993681907654,
379
+ "tpp_threshold_50_total_metric": 0.07799999415874481,
380
+ "tpp_threshold_50_intended_diff_only": 0.08899998664855957,
381
+ "tpp_threshold_50_unintended_diff_only": 0.010999992489814758,
382
+ "tpp_threshold_100_total_metric": 0.11225004494190216,
383
+ "tpp_threshold_100_intended_diff_only": 0.13100004196166992,
384
+ "tpp_threshold_100_unintended_diff_only": 0.01874999701976776,
385
+ "tpp_threshold_500_total_metric": 0.36875003576278687,
386
+ "tpp_threshold_500_intended_diff_only": 0.40000003576278687,
387
+ "tpp_threshold_500_unintended_diff_only": 0.03125
388
+ },
389
+ "6": {
390
+ "tpp_threshold_2_total_metric": 0.015500038862228394,
391
+ "tpp_threshold_2_intended_diff_only": 0.025000035762786865,
392
+ "tpp_threshold_2_unintended_diff_only": 0.009499996900558472,
393
+ "tpp_threshold_5_total_metric": 0.03350000083446503,
394
+ "tpp_threshold_5_intended_diff_only": 0.03700000047683716,
395
+ "tpp_threshold_5_unintended_diff_only": 0.0034999996423721313,
396
+ "tpp_threshold_10_total_metric": 0.0662500262260437,
397
+ "tpp_threshold_10_intended_diff_only": 0.07600003480911255,
398
+ "tpp_threshold_10_unintended_diff_only": 0.009750008583068848,
399
+ "tpp_threshold_20_total_metric": 0.08550001680850983,
400
+ "tpp_threshold_20_intended_diff_only": 0.09600001573562622,
401
+ "tpp_threshold_20_unintended_diff_only": 0.010499998927116394,
402
+ "tpp_threshold_50_total_metric": 0.16350004076957703,
403
+ "tpp_threshold_50_intended_diff_only": 0.17800003290176392,
404
+ "tpp_threshold_50_unintended_diff_only": 0.01449999213218689,
405
+ "tpp_threshold_100_total_metric": 0.23399999737739563,
406
+ "tpp_threshold_100_intended_diff_only": 0.25,
407
+ "tpp_threshold_100_unintended_diff_only": 0.01600000262260437,
408
+ "tpp_threshold_500_total_metric": 0.35074999928474426,
409
+ "tpp_threshold_500_intended_diff_only": 0.375,
410
+ "tpp_threshold_500_unintended_diff_only": 0.024250000715255737
411
+ }
412
+ }
413
+ }
414
+ }
old_relu_eval_results/tpp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "tpp",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": false,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "bafd917a-113b-4ade-93b9-fc280e1e64e8",
73
+ "datetime_epoch_millis": 1738808371674,
74
+ "eval_result_metrics": {
75
+ "tpp_metrics": {
76
+ "tpp_threshold_2_total_metric": 0.004999996721744537,
77
+ "tpp_threshold_2_intended_diff_only": 0.008099997043609619,
78
+ "tpp_threshold_2_unintended_diff_only": 0.003100000321865082,
79
+ "tpp_threshold_5_total_metric": 0.004899995028972625,
80
+ "tpp_threshold_5_intended_diff_only": 0.008599996566772461,
81
+ "tpp_threshold_5_unintended_diff_only": 0.0037000015377998356,
82
+ "tpp_threshold_10_total_metric": 0.017099998891353607,
83
+ "tpp_threshold_10_intended_diff_only": 0.02160000205039978,
84
+ "tpp_threshold_10_unintended_diff_only": 0.004500003159046173,
85
+ "tpp_threshold_20_total_metric": 0.028300000727176665,
86
+ "tpp_threshold_20_intended_diff_only": 0.03340000510215759,
87
+ "tpp_threshold_20_unintended_diff_only": 0.0051000043749809265,
88
+ "tpp_threshold_50_total_metric": 0.059400005638599394,
89
+ "tpp_threshold_50_intended_diff_only": 0.0662000060081482,
90
+ "tpp_threshold_50_unintended_diff_only": 0.0068000003695487974,
91
+ "tpp_threshold_100_total_metric": 0.10572500675916671,
92
+ "tpp_threshold_100_intended_diff_only": 0.11560000777244568,
93
+ "tpp_threshold_100_unintended_diff_only": 0.00987500101327896,
94
+ "tpp_threshold_500_total_metric": 0.3575750187039375,
95
+ "tpp_threshold_500_intended_diff_only": 0.3726000189781189,
96
+ "tpp_threshold_500_unintended_diff_only": 0.015025000274181365
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results",
102
+ "tpp_threshold_2_total_metric": 0.005649995803833008,
103
+ "tpp_threshold_2_intended_diff_only": 0.0075999975204467775,
104
+ "tpp_threshold_2_unintended_diff_only": 0.0019500017166137694,
105
+ "tpp_threshold_5_total_metric": 0.005199992656707763,
106
+ "tpp_threshold_5_intended_diff_only": 0.0075999975204467775,
107
+ "tpp_threshold_5_unintended_diff_only": 0.0024000048637390138,
108
+ "tpp_threshold_10_total_metric": 0.014100000262260437,
109
+ "tpp_threshold_10_intended_diff_only": 0.01640000343322754,
110
+ "tpp_threshold_10_unintended_diff_only": 0.002300003170967102,
111
+ "tpp_threshold_20_total_metric": 0.028600004315376282,
112
+ "tpp_threshold_20_intended_diff_only": 0.031000006198883056,
113
+ "tpp_threshold_20_unintended_diff_only": 0.002400001883506775,
114
+ "tpp_threshold_50_total_metric": 0.053450003266334534,
115
+ "tpp_threshold_50_intended_diff_only": 0.05700000524520874,
116
+ "tpp_threshold_50_unintended_diff_only": 0.0035500019788742066,
117
+ "tpp_threshold_100_total_metric": 0.10355001091957092,
118
+ "tpp_threshold_100_intended_diff_only": 0.10860000848770142,
119
+ "tpp_threshold_100_unintended_diff_only": 0.005049997568130493,
120
+ "tpp_threshold_500_total_metric": 0.39755001962184905,
121
+ "tpp_threshold_500_intended_diff_only": 0.4052000164985657,
122
+ "tpp_threshold_500_unintended_diff_only": 0.007649996876716613
123
+ },
124
+ {
125
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results",
126
+ "tpp_threshold_2_total_metric": 0.004349997639656067,
127
+ "tpp_threshold_2_intended_diff_only": 0.008599996566772461,
128
+ "tpp_threshold_2_unintended_diff_only": 0.004249998927116394,
129
+ "tpp_threshold_5_total_metric": 0.0045999974012374874,
130
+ "tpp_threshold_5_intended_diff_only": 0.009599995613098145,
131
+ "tpp_threshold_5_unintended_diff_only": 0.004999998211860657,
132
+ "tpp_threshold_10_total_metric": 0.020099997520446777,
133
+ "tpp_threshold_10_intended_diff_only": 0.026800000667572023,
134
+ "tpp_threshold_10_unintended_diff_only": 0.006700003147125244,
135
+ "tpp_threshold_20_total_metric": 0.02799999713897705,
136
+ "tpp_threshold_20_intended_diff_only": 0.03580000400543213,
137
+ "tpp_threshold_20_unintended_diff_only": 0.007800006866455078,
138
+ "tpp_threshold_50_total_metric": 0.06535000801086426,
139
+ "tpp_threshold_50_intended_diff_only": 0.07540000677108764,
140
+ "tpp_threshold_50_unintended_diff_only": 0.010049998760223389,
141
+ "tpp_threshold_100_total_metric": 0.1079000025987625,
142
+ "tpp_threshold_100_intended_diff_only": 0.12260000705718994,
143
+ "tpp_threshold_100_unintended_diff_only": 0.014700004458427429,
144
+ "tpp_threshold_500_total_metric": 0.317600017786026,
145
+ "tpp_threshold_500_intended_diff_only": 0.3400000214576721,
146
+ "tpp_threshold_500_unintended_diff_only": 0.02240000367164612
147
+ }
148
+ ],
149
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
150
+ "sae_lens_id": "custom_sae",
151
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2",
152
+ "sae_lens_version": "5.4.1",
153
+ "sae_cfg_dict": {
154
+ "model_name": "gemma-2-2b",
155
+ "d_in": 2304,
156
+ "d_sae": 16384,
157
+ "hook_layer": 12,
158
+ "hook_name": "blocks.12.hook_resid_post",
159
+ "context_size": null,
160
+ "hook_head_index": null,
161
+ "architecture": "standard",
162
+ "apply_b_dec_to_input": null,
163
+ "finetuning_scaling_factor": null,
164
+ "activation_fn_str": "",
165
+ "prepend_bos": true,
166
+ "normalize_activations": "none",
167
+ "dtype": "bfloat16",
168
+ "device": "",
169
+ "dataset_path": "",
170
+ "dataset_trust_remote_code": true,
171
+ "seqpos_slice": [
172
+ null
173
+ ],
174
+ "training_tokens": -100000,
175
+ "sae_lens_training_version": null,
176
+ "neuronpedia_id": null
177
+ },
178
+ "eval_result_unstructured": {
179
+ "LabHC/bias_in_bios_class_set1": {
180
+ "0": {
181
+ "tpp_threshold_2_total_metric": 0.01075001060962677,
182
+ "tpp_threshold_2_intended_diff_only": 0.013000011444091797,
183
+ "tpp_threshold_2_unintended_diff_only": 0.002250000834465027,
184
+ "tpp_threshold_5_total_metric": 0.01600000262260437,
185
+ "tpp_threshold_5_intended_diff_only": 0.018999993801116943,
186
+ "tpp_threshold_5_unintended_diff_only": 0.0029999911785125732,
187
+ "tpp_threshold_10_total_metric": 0.013500034809112549,
188
+ "tpp_threshold_10_intended_diff_only": 0.016000032424926758,
189
+ "tpp_threshold_10_unintended_diff_only": 0.002499997615814209,
190
+ "tpp_threshold_20_total_metric": 0.023750022053718567,
191
+ "tpp_threshold_20_intended_diff_only": 0.027000010013580322,
192
+ "tpp_threshold_20_unintended_diff_only": 0.0032499879598617554,
193
+ "tpp_threshold_50_total_metric": 0.057750046253204346,
194
+ "tpp_threshold_50_intended_diff_only": 0.061000049114227295,
195
+ "tpp_threshold_50_unintended_diff_only": 0.0032500028610229492,
196
+ "tpp_threshold_100_total_metric": 0.11575004458427429,
197
+ "tpp_threshold_100_intended_diff_only": 0.12200003862380981,
198
+ "tpp_threshold_100_unintended_diff_only": 0.0062499940395355225,
199
+ "tpp_threshold_500_total_metric": 0.4100000709295273,
200
+ "tpp_threshold_500_intended_diff_only": 0.41200006008148193,
201
+ "tpp_threshold_500_unintended_diff_only": 0.001999989151954651
202
+ },
203
+ "1": {
204
+ "tpp_threshold_2_total_metric": 0.0014999955892562866,
205
+ "tpp_threshold_2_intended_diff_only": 0.0009999871253967285,
206
+ "tpp_threshold_2_unintended_diff_only": -0.0005000084638595581,
207
+ "tpp_threshold_5_total_metric": -0.005250021815299988,
208
+ "tpp_threshold_5_intended_diff_only": -0.0040000081062316895,
209
+ "tpp_threshold_5_unintended_diff_only": 0.0012500137090682983,
210
+ "tpp_threshold_10_total_metric": -0.0005000382661819458,
211
+ "tpp_threshold_10_intended_diff_only": -0.001000046730041504,
212
+ "tpp_threshold_10_unintended_diff_only": -0.0005000084638595581,
213
+ "tpp_threshold_20_total_metric": 0.006749957799911499,
214
+ "tpp_threshold_20_intended_diff_only": 0.010999977588653564,
215
+ "tpp_threshold_20_unintended_diff_only": 0.004250019788742065,
216
+ "tpp_threshold_50_total_metric": 0.02424997091293335,
217
+ "tpp_threshold_50_intended_diff_only": 0.02899998426437378,
218
+ "tpp_threshold_50_unintended_diff_only": 0.00475001335144043,
219
+ "tpp_threshold_100_total_metric": 0.047499969601631165,
220
+ "tpp_threshold_100_intended_diff_only": 0.05199998617172241,
221
+ "tpp_threshold_100_unintended_diff_only": 0.0045000165700912476,
222
+ "tpp_threshold_500_total_metric": 0.2900000065565109,
223
+ "tpp_threshold_500_intended_diff_only": 0.29500001668930054,
224
+ "tpp_threshold_500_unintended_diff_only": 0.005000010132789612
225
+ },
226
+ "2": {
227
+ "tpp_threshold_2_total_metric": 0.00875002145767212,
228
+ "tpp_threshold_2_intended_diff_only": 0.012000024318695068,
229
+ "tpp_threshold_2_unintended_diff_only": 0.0032500028610229492,
230
+ "tpp_threshold_5_total_metric": 0.007250010967254639,
231
+ "tpp_threshold_5_intended_diff_only": 0.012000024318695068,
232
+ "tpp_threshold_5_unintended_diff_only": 0.00475001335144043,
233
+ "tpp_threshold_10_total_metric": 0.024500012397766113,
234
+ "tpp_threshold_10_intended_diff_only": 0.026000022888183594,
235
+ "tpp_threshold_10_unintended_diff_only": 0.0015000104904174805,
236
+ "tpp_threshold_20_total_metric": 0.03749999403953552,
237
+ "tpp_threshold_20_intended_diff_only": 0.03700000047683716,
238
+ "tpp_threshold_20_unintended_diff_only": -0.0004999935626983643,
239
+ "tpp_threshold_50_total_metric": 0.04449997842311859,
240
+ "tpp_threshold_50_intended_diff_only": 0.042999982833862305,
241
+ "tpp_threshold_50_unintended_diff_only": -0.0014999955892562866,
242
+ "tpp_threshold_100_total_metric": 0.08000004291534424,
243
+ "tpp_threshold_100_intended_diff_only": 0.08100003004074097,
244
+ "tpp_threshold_100_unintended_diff_only": 0.0009999871253967285,
245
+ "tpp_threshold_500_total_metric": 0.4047499895095825,
246
+ "tpp_threshold_500_intended_diff_only": 0.4129999876022339,
247
+ "tpp_threshold_500_unintended_diff_only": 0.008249998092651367
248
+ },
249
+ "6": {
250
+ "tpp_threshold_2_total_metric": 0.0029999911785125732,
251
+ "tpp_threshold_2_intended_diff_only": 0.0040000081062316895,
252
+ "tpp_threshold_2_unintended_diff_only": 0.0010000169277191162,
253
+ "tpp_threshold_5_total_metric": 0.003750026226043701,
254
+ "tpp_threshold_5_intended_diff_only": 0.003000020980834961,
255
+ "tpp_threshold_5_unintended_diff_only": -0.0007500052452087402,
256
+ "tpp_threshold_10_total_metric": 0.002249985933303833,
257
+ "tpp_threshold_10_intended_diff_only": 0.004999995231628418,
258
+ "tpp_threshold_10_unintended_diff_only": 0.002750009298324585,
259
+ "tpp_threshold_20_total_metric": 0.008000046014785767,
260
+ "tpp_threshold_20_intended_diff_only": 0.00700002908706665,
261
+ "tpp_threshold_20_unintended_diff_only": -0.0010000169277191162,
262
+ "tpp_threshold_50_total_metric": 0.012750029563903809,
263
+ "tpp_threshold_50_intended_diff_only": 0.017000019550323486,
264
+ "tpp_threshold_50_unintended_diff_only": 0.004249989986419678,
265
+ "tpp_threshold_100_total_metric": 0.04250001907348633,
266
+ "tpp_threshold_100_intended_diff_only": 0.046000003814697266,
267
+ "tpp_threshold_100_unintended_diff_only": 0.0034999847412109375,
268
+ "tpp_threshold_500_total_metric": 0.42250001430511475,
269
+ "tpp_threshold_500_intended_diff_only": 0.4300000071525574,
270
+ "tpp_threshold_500_unintended_diff_only": 0.007499992847442627
271
+ },
272
+ "9": {
273
+ "tpp_threshold_2_total_metric": 0.00424996018409729,
274
+ "tpp_threshold_2_intended_diff_only": 0.007999956607818604,
275
+ "tpp_threshold_2_unintended_diff_only": 0.0037499964237213135,
276
+ "tpp_threshold_5_total_metric": 0.004249945282936096,
277
+ "tpp_threshold_5_intended_diff_only": 0.007999956607818604,
278
+ "tpp_threshold_5_unintended_diff_only": 0.0037500113248825073,
279
+ "tpp_threshold_10_total_metric": 0.030750006437301636,
280
+ "tpp_threshold_10_intended_diff_only": 0.03600001335144043,
281
+ "tpp_threshold_10_unintended_diff_only": 0.005250006914138794,
282
+ "tpp_threshold_20_total_metric": 0.06700000166893005,
283
+ "tpp_threshold_20_intended_diff_only": 0.07300001382827759,
284
+ "tpp_threshold_20_unintended_diff_only": 0.006000012159347534,
285
+ "tpp_threshold_50_total_metric": 0.12799999117851257,
286
+ "tpp_threshold_50_intended_diff_only": 0.13499999046325684,
287
+ "tpp_threshold_50_unintended_diff_only": 0.006999999284744263,
288
+ "tpp_threshold_100_total_metric": 0.2319999784231186,
289
+ "tpp_threshold_100_intended_diff_only": 0.24199998378753662,
290
+ "tpp_threshold_100_unintended_diff_only": 0.01000000536441803,
291
+ "tpp_threshold_500_total_metric": 0.4605000168085098,
292
+ "tpp_threshold_500_intended_diff_only": 0.47600001096725464,
293
+ "tpp_threshold_500_unintended_diff_only": 0.015499994158744812
294
+ }
295
+ },
296
+ "canrager/amazon_reviews_mcauley_1and5": {
297
+ "1": {
298
+ "tpp_threshold_2_total_metric": 0.013250023126602173,
299
+ "tpp_threshold_2_intended_diff_only": 0.017000019550323486,
300
+ "tpp_threshold_2_unintended_diff_only": 0.0037499964237213135,
301
+ "tpp_threshold_5_total_metric": 0.011250004172325134,
302
+ "tpp_threshold_5_intended_diff_only": 0.013999998569488525,
303
+ "tpp_threshold_5_unintended_diff_only": 0.002749994397163391,
304
+ "tpp_threshold_10_total_metric": 0.01075001060962677,
305
+ "tpp_threshold_10_intended_diff_only": 0.018000006675720215,
306
+ "tpp_threshold_10_unintended_diff_only": 0.007249996066093445,
307
+ "tpp_threshold_20_total_metric": 0.015000015497207642,
308
+ "tpp_threshold_20_intended_diff_only": 0.021000027656555176,
309
+ "tpp_threshold_20_unintended_diff_only": 0.006000012159347534,
310
+ "tpp_threshold_50_total_metric": 0.03725004196166992,
311
+ "tpp_threshold_50_intended_diff_only": 0.04300004243850708,
312
+ "tpp_threshold_50_unintended_diff_only": 0.005750000476837158,
313
+ "tpp_threshold_100_total_metric": 0.0612499862909317,
314
+ "tpp_threshold_100_intended_diff_only": 0.07400000095367432,
315
+ "tpp_threshold_100_unintended_diff_only": 0.012750014662742615,
316
+ "tpp_threshold_500_total_metric": 0.28825002908706665,
317
+ "tpp_threshold_500_intended_diff_only": 0.2990000247955322,
318
+ "tpp_threshold_500_unintended_diff_only": 0.010749995708465576
319
+ },
320
+ "2": {
321
+ "tpp_threshold_2_total_metric": 0.008249983191490173,
322
+ "tpp_threshold_2_intended_diff_only": 0.010999977588653564,
323
+ "tpp_threshold_2_unintended_diff_only": 0.002749994397163391,
324
+ "tpp_threshold_5_total_metric": -0.0034999996423721313,
325
+ "tpp_threshold_5_intended_diff_only": 0.009000003337860107,
326
+ "tpp_threshold_5_unintended_diff_only": 0.012500002980232239,
327
+ "tpp_threshold_10_total_metric": 0.015499994158744812,
328
+ "tpp_threshold_10_intended_diff_only": 0.02399998903274536,
329
+ "tpp_threshold_10_unintended_diff_only": 0.00849999487400055,
330
+ "tpp_threshold_20_total_metric": 0.02699996531009674,
331
+ "tpp_threshold_20_intended_diff_only": 0.03299999237060547,
332
+ "tpp_threshold_20_unintended_diff_only": 0.006000027060508728,
333
+ "tpp_threshold_50_total_metric": 0.0677499920129776,
334
+ "tpp_threshold_50_intended_diff_only": 0.07999998331069946,
335
+ "tpp_threshold_50_unintended_diff_only": 0.012249991297721863,
336
+ "tpp_threshold_100_total_metric": 0.10849995911121368,
337
+ "tpp_threshold_100_intended_diff_only": 0.12699997425079346,
338
+ "tpp_threshold_100_unintended_diff_only": 0.018500015139579773,
339
+ "tpp_threshold_500_total_metric": 0.3362499922513962,
340
+ "tpp_threshold_500_intended_diff_only": 0.3669999837875366,
341
+ "tpp_threshold_500_unintended_diff_only": 0.030749991536140442
342
+ },
343
+ "3": {
344
+ "tpp_threshold_2_total_metric": -0.005500048398971558,
345
+ "tpp_threshold_2_intended_diff_only": -0.0020000338554382324,
346
+ "tpp_threshold_2_unintended_diff_only": 0.003500014543533325,
347
+ "tpp_threshold_5_total_metric": 0.0037499964237213135,
348
+ "tpp_threshold_5_intended_diff_only": 0.004999995231628418,
349
+ "tpp_threshold_5_unintended_diff_only": 0.0012499988079071045,
350
+ "tpp_threshold_10_total_metric": 0.0209999680519104,
351
+ "tpp_threshold_10_intended_diff_only": 0.02399998903274536,
352
+ "tpp_threshold_10_unintended_diff_only": 0.003000020980834961,
353
+ "tpp_threshold_20_total_metric": 0.010250002145767212,
354
+ "tpp_threshold_20_intended_diff_only": 0.018000006675720215,
355
+ "tpp_threshold_20_unintended_diff_only": 0.007750004529953003,
356
+ "tpp_threshold_50_total_metric": 0.06074999272823334,
357
+ "tpp_threshold_50_intended_diff_only": 0.06999999284744263,
358
+ "tpp_threshold_50_unintended_diff_only": 0.00925000011920929,
359
+ "tpp_threshold_100_total_metric": 0.08075001835823059,
360
+ "tpp_threshold_100_intended_diff_only": 0.09600001573562622,
361
+ "tpp_threshold_100_unintended_diff_only": 0.01524999737739563,
362
+ "tpp_threshold_500_total_metric": 0.3087500035762787,
363
+ "tpp_threshold_500_intended_diff_only": 0.3320000171661377,
364
+ "tpp_threshold_500_unintended_diff_only": 0.02325001358985901
365
+ },
366
+ "5": {
367
+ "tpp_threshold_2_total_metric": -0.006499990820884705,
368
+ "tpp_threshold_2_intended_diff_only": -0.0040000081062316895,
369
+ "tpp_threshold_2_unintended_diff_only": 0.002499982714653015,
370
+ "tpp_threshold_5_total_metric": -0.0052499920129776,
371
+ "tpp_threshold_5_intended_diff_only": 0.0,
372
+ "tpp_threshold_5_unintended_diff_only": 0.0052499920129776,
373
+ "tpp_threshold_10_total_metric": -0.0022500157356262207,
374
+ "tpp_threshold_10_intended_diff_only": 0.0059999823570251465,
375
+ "tpp_threshold_10_unintended_diff_only": 0.008249998092651367,
376
+ "tpp_threshold_20_total_metric": 0.013750016689300537,
377
+ "tpp_threshold_20_intended_diff_only": 0.023000001907348633,
378
+ "tpp_threshold_20_unintended_diff_only": 0.009249985218048096,
379
+ "tpp_threshold_50_total_metric": 0.03574998676776886,
380
+ "tpp_threshold_50_intended_diff_only": 0.046999990940093994,
381
+ "tpp_threshold_50_unintended_diff_only": 0.011250004172325134,
382
+ "tpp_threshold_100_total_metric": 0.08550001680850983,
383
+ "tpp_threshold_100_intended_diff_only": 0.10100001096725464,
384
+ "tpp_threshold_100_unintended_diff_only": 0.015499994158744812,
385
+ "tpp_threshold_500_total_metric": 0.3072500377893448,
386
+ "tpp_threshold_500_intended_diff_only": 0.33500003814697266,
387
+ "tpp_threshold_500_unintended_diff_only": 0.02775000035762787
388
+ },
389
+ "6": {
390
+ "tpp_threshold_2_total_metric": 0.01225002110004425,
391
+ "tpp_threshold_2_intended_diff_only": 0.021000027656555176,
392
+ "tpp_threshold_2_unintended_diff_only": 0.008750006556510925,
393
+ "tpp_threshold_5_total_metric": 0.016749978065490723,
394
+ "tpp_threshold_5_intended_diff_only": 0.019999980926513672,
395
+ "tpp_threshold_5_unintended_diff_only": 0.0032500028610229492,
396
+ "tpp_threshold_10_total_metric": 0.055500030517578125,
397
+ "tpp_threshold_10_intended_diff_only": 0.06200003623962402,
398
+ "tpp_threshold_10_unintended_diff_only": 0.0065000057220458984,
399
+ "tpp_threshold_20_total_metric": 0.07399998605251312,
400
+ "tpp_threshold_20_intended_diff_only": 0.08399999141693115,
401
+ "tpp_threshold_20_unintended_diff_only": 0.01000000536441803,
402
+ "tpp_threshold_50_total_metric": 0.12525002658367157,
403
+ "tpp_threshold_50_intended_diff_only": 0.13700002431869507,
404
+ "tpp_threshold_50_unintended_diff_only": 0.011749997735023499,
405
+ "tpp_threshold_100_total_metric": 0.20350003242492676,
406
+ "tpp_threshold_100_intended_diff_only": 0.21500003337860107,
407
+ "tpp_threshold_100_unintended_diff_only": 0.011500000953674316,
408
+ "tpp_threshold_500_total_metric": 0.3475000262260437,
409
+ "tpp_threshold_500_intended_diff_only": 0.3670000433921814,
410
+ "tpp_threshold_500_unintended_diff_only": 0.019500017166137695
411
+ }
412
+ }
413
+ }
414
+ }
old_relu_eval_results/tpp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "tpp",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": false,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "a9df393e-0561-4737-9c16-8f03033409cb",
73
+ "datetime_epoch_millis": 1738808254874,
74
+ "eval_result_metrics": {
75
+ "tpp_metrics": {
76
+ "tpp_threshold_2_total_metric": 0.0035749942064285277,
77
+ "tpp_threshold_2_intended_diff_only": 0.0061999976634979255,
78
+ "tpp_threshold_2_unintended_diff_only": 0.002625003457069397,
79
+ "tpp_threshold_5_total_metric": 0.005500002205371857,
80
+ "tpp_threshold_5_intended_diff_only": 0.009100002050399781,
81
+ "tpp_threshold_5_unintended_diff_only": 0.003599999845027924,
82
+ "tpp_threshold_10_total_metric": 0.01294999271631241,
83
+ "tpp_threshold_10_intended_diff_only": 0.017399996519088745,
84
+ "tpp_threshold_10_unintended_diff_only": 0.004450003802776336,
85
+ "tpp_threshold_20_total_metric": 0.021499992907047273,
86
+ "tpp_threshold_20_intended_diff_only": 0.0262999951839447,
87
+ "tpp_threshold_20_unintended_diff_only": 0.00480000227689743,
88
+ "tpp_threshold_50_total_metric": 0.043899996578693395,
89
+ "tpp_threshold_50_intended_diff_only": 0.04889999628067017,
90
+ "tpp_threshold_50_unintended_diff_only": 0.004999999701976777,
91
+ "tpp_threshold_100_total_metric": 0.07275000959634781,
92
+ "tpp_threshold_100_intended_diff_only": 0.08150001168251038,
93
+ "tpp_threshold_100_unintended_diff_only": 0.008750002086162566,
94
+ "tpp_threshold_500_total_metric": 0.2600750118494034,
95
+ "tpp_threshold_500_intended_diff_only": 0.27110000848770144,
96
+ "tpp_threshold_500_unintended_diff_only": 0.011024996638298035
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results",
102
+ "tpp_threshold_2_total_metric": 0.003349986672401428,
103
+ "tpp_threshold_2_intended_diff_only": 0.0053999900817871095,
104
+ "tpp_threshold_2_unintended_diff_only": 0.002050003409385681,
105
+ "tpp_threshold_5_total_metric": 0.006400004029273987,
106
+ "tpp_threshold_5_intended_diff_only": 0.008800005912780762,
107
+ "tpp_threshold_5_unintended_diff_only": 0.002400001883506775,
108
+ "tpp_threshold_10_total_metric": 0.011899998784065247,
109
+ "tpp_threshold_10_intended_diff_only": 0.014800000190734863,
110
+ "tpp_threshold_10_unintended_diff_only": 0.0029000014066696165,
111
+ "tpp_threshold_20_total_metric": 0.023699989914894103,
112
+ "tpp_threshold_20_intended_diff_only": 0.02659999132156372,
113
+ "tpp_threshold_20_unintended_diff_only": 0.0029000014066696165,
114
+ "tpp_threshold_50_total_metric": 0.04629998803138733,
115
+ "tpp_threshold_50_intended_diff_only": 0.049799990653991696,
116
+ "tpp_threshold_50_unintended_diff_only": 0.0035000026226043703,
117
+ "tpp_threshold_100_total_metric": 0.07975000441074372,
118
+ "tpp_threshold_100_intended_diff_only": 0.08480000495910645,
119
+ "tpp_threshold_100_unintended_diff_only": 0.005050000548362732,
120
+ "tpp_threshold_500_total_metric": 0.31270001232624056,
121
+ "tpp_threshold_500_intended_diff_only": 0.3202000021934509,
122
+ "tpp_threshold_500_unintended_diff_only": 0.007499989867210388
123
+ },
124
+ {
125
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results",
126
+ "tpp_threshold_2_total_metric": 0.0038000017404556273,
127
+ "tpp_threshold_2_intended_diff_only": 0.007000005245208741,
128
+ "tpp_threshold_2_unintended_diff_only": 0.003200003504753113,
129
+ "tpp_threshold_5_total_metric": 0.004600000381469726,
130
+ "tpp_threshold_5_intended_diff_only": 0.009399998188018798,
131
+ "tpp_threshold_5_unintended_diff_only": 0.004799997806549073,
132
+ "tpp_threshold_10_total_metric": 0.01399998664855957,
133
+ "tpp_threshold_10_intended_diff_only": 0.019999992847442628,
134
+ "tpp_threshold_10_unintended_diff_only": 0.006000006198883056,
135
+ "tpp_threshold_20_total_metric": 0.01929999589920044,
136
+ "tpp_threshold_20_intended_diff_only": 0.025999999046325682,
137
+ "tpp_threshold_20_unintended_diff_only": 0.006700003147125244,
138
+ "tpp_threshold_50_total_metric": 0.04150000512599945,
139
+ "tpp_threshold_50_intended_diff_only": 0.048000001907348634,
140
+ "tpp_threshold_50_unintended_diff_only": 0.0064999967813491825,
141
+ "tpp_threshold_100_total_metric": 0.0657500147819519,
142
+ "tpp_threshold_100_intended_diff_only": 0.07820001840591431,
143
+ "tpp_threshold_100_unintended_diff_only": 0.012450003623962402,
144
+ "tpp_threshold_500_total_metric": 0.20745001137256622,
145
+ "tpp_threshold_500_intended_diff_only": 0.22200001478195192,
146
+ "tpp_threshold_500_unintended_diff_only": 0.01455000340938568
147
+ }
148
+ ],
149
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
150
+ "sae_lens_id": "custom_sae",
151
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3",
152
+ "sae_lens_version": "5.4.1",
153
+ "sae_cfg_dict": {
154
+ "model_name": "gemma-2-2b",
155
+ "d_in": 2304,
156
+ "d_sae": 16384,
157
+ "hook_layer": 12,
158
+ "hook_name": "blocks.12.hook_resid_post",
159
+ "context_size": null,
160
+ "hook_head_index": null,
161
+ "architecture": "standard",
162
+ "apply_b_dec_to_input": null,
163
+ "finetuning_scaling_factor": null,
164
+ "activation_fn_str": "",
165
+ "prepend_bos": true,
166
+ "normalize_activations": "none",
167
+ "dtype": "bfloat16",
168
+ "device": "",
169
+ "dataset_path": "",
170
+ "dataset_trust_remote_code": true,
171
+ "seqpos_slice": [
172
+ null
173
+ ],
174
+ "training_tokens": -100000,
175
+ "sae_lens_training_version": null,
176
+ "neuronpedia_id": null
177
+ },
178
+ "eval_result_unstructured": {
179
+ "LabHC/bias_in_bios_class_set1": {
180
+ "0": {
181
+ "tpp_threshold_2_total_metric": 0.007749989628791809,
182
+ "tpp_threshold_2_intended_diff_only": 0.009999990463256836,
183
+ "tpp_threshold_2_unintended_diff_only": 0.002250000834465027,
184
+ "tpp_threshold_5_total_metric": 0.017250046133995056,
185
+ "tpp_threshold_5_intended_diff_only": 0.020000040531158447,
186
+ "tpp_threshold_5_unintended_diff_only": 0.002749994397163391,
187
+ "tpp_threshold_10_total_metric": 0.014750003814697266,
188
+ "tpp_threshold_10_intended_diff_only": 0.018000006675720215,
189
+ "tpp_threshold_10_unintended_diff_only": 0.0032500028610229492,
190
+ "tpp_threshold_20_total_metric": 0.023250028491020203,
191
+ "tpp_threshold_20_intended_diff_only": 0.026000022888183594,
192
+ "tpp_threshold_20_unintended_diff_only": 0.002749994397163391,
193
+ "tpp_threshold_50_total_metric": 0.04175001382827759,
194
+ "tpp_threshold_50_intended_diff_only": 0.04500001668930054,
195
+ "tpp_threshold_50_unintended_diff_only": 0.0032500028610229492,
196
+ "tpp_threshold_100_total_metric": 0.07125002145767212,
197
+ "tpp_threshold_100_intended_diff_only": 0.07700002193450928,
198
+ "tpp_threshold_100_unintended_diff_only": 0.005750000476837158,
199
+ "tpp_threshold_500_total_metric": 0.24525001645088196,
200
+ "tpp_threshold_500_intended_diff_only": 0.24500000476837158,
201
+ "tpp_threshold_500_unintended_diff_only": -0.000250011682510376
202
+ },
203
+ "1": {
204
+ "tpp_threshold_2_total_metric": 0.0029999613761901855,
205
+ "tpp_threshold_2_intended_diff_only": 0.0029999613761901855,
206
+ "tpp_threshold_2_unintended_diff_only": 0.0,
207
+ "tpp_threshold_5_total_metric": -0.0010000169277191162,
208
+ "tpp_threshold_5_intended_diff_only": 0.0009999871253967285,
209
+ "tpp_threshold_5_unintended_diff_only": 0.0020000040531158447,
210
+ "tpp_threshold_10_total_metric": 0.00925000011920929,
211
+ "tpp_threshold_10_intended_diff_only": 0.009000003337860107,
212
+ "tpp_threshold_10_unintended_diff_only": -0.00024999678134918213,
213
+ "tpp_threshold_20_total_metric": 0.010249972343444824,
214
+ "tpp_threshold_20_intended_diff_only": 0.014999985694885254,
215
+ "tpp_threshold_20_unintended_diff_only": 0.00475001335144043,
216
+ "tpp_threshold_50_total_metric": 0.025749951601028442,
217
+ "tpp_threshold_50_intended_diff_only": 0.030999958515167236,
218
+ "tpp_threshold_50_unintended_diff_only": 0.005250006914138794,
219
+ "tpp_threshold_100_total_metric": 0.05324995517730713,
220
+ "tpp_threshold_100_intended_diff_only": 0.05799996852874756,
221
+ "tpp_threshold_100_unintended_diff_only": 0.00475001335144043,
222
+ "tpp_threshold_500_total_metric": 0.26924997568130493,
223
+ "tpp_threshold_500_intended_diff_only": 0.2789999842643738,
224
+ "tpp_threshold_500_unintended_diff_only": 0.009750008583068848
225
+ },
226
+ "2": {
227
+ "tpp_threshold_2_total_metric": 0.0027499794960021973,
228
+ "tpp_threshold_2_intended_diff_only": 0.0059999823570251465,
229
+ "tpp_threshold_2_unintended_diff_only": 0.0032500028610229492,
230
+ "tpp_threshold_5_total_metric": 0.006249964237213135,
231
+ "tpp_threshold_5_intended_diff_only": 0.010999977588653564,
232
+ "tpp_threshold_5_unintended_diff_only": 0.00475001335144043,
233
+ "tpp_threshold_10_total_metric": 0.021499991416931152,
234
+ "tpp_threshold_10_intended_diff_only": 0.02399998903274536,
235
+ "tpp_threshold_10_unintended_diff_only": 0.002499997615814209,
236
+ "tpp_threshold_20_total_metric": 0.03274999558925629,
237
+ "tpp_threshold_20_intended_diff_only": 0.03299999237060547,
238
+ "tpp_threshold_20_unintended_diff_only": 0.00024999678134918213,
239
+ "tpp_threshold_50_total_metric": 0.03449997305870056,
240
+ "tpp_threshold_50_intended_diff_only": 0.0339999794960022,
241
+ "tpp_threshold_50_unintended_diff_only": -0.0004999935626983643,
242
+ "tpp_threshold_100_total_metric": 0.05425000190734863,
243
+ "tpp_threshold_100_intended_diff_only": 0.0559999942779541,
244
+ "tpp_threshold_100_unintended_diff_only": 0.0017499923706054688,
245
+ "tpp_threshold_500_total_metric": 0.3055000454187393,
246
+ "tpp_threshold_500_intended_diff_only": 0.312000036239624,
247
+ "tpp_threshold_500_unintended_diff_only": 0.006499990820884705
248
+ },
249
+ "6": {
250
+ "tpp_threshold_2_total_metric": 0.0010000169277191162,
251
+ "tpp_threshold_2_intended_diff_only": 0.0020000338554382324,
252
+ "tpp_threshold_2_unintended_diff_only": 0.0010000169277191162,
253
+ "tpp_threshold_5_total_metric": 0.003000035881996155,
254
+ "tpp_threshold_5_intended_diff_only": 0.0020000338554382324,
255
+ "tpp_threshold_5_unintended_diff_only": -0.0010000020265579224,
256
+ "tpp_threshold_10_total_metric": 0.0010000020265579224,
257
+ "tpp_threshold_10_intended_diff_only": 0.004999995231628418,
258
+ "tpp_threshold_10_unintended_diff_only": 0.003999993205070496,
259
+ "tpp_threshold_20_total_metric": 0.005499988794326782,
260
+ "tpp_threshold_20_intended_diff_only": 0.0059999823570251465,
261
+ "tpp_threshold_20_unintended_diff_only": 0.0004999935626983643,
262
+ "tpp_threshold_50_total_metric": 0.006500035524368286,
263
+ "tpp_threshold_50_intended_diff_only": 0.012000024318695068,
264
+ "tpp_threshold_50_unintended_diff_only": 0.005499988794326782,
265
+ "tpp_threshold_100_total_metric": 0.04650004208087921,
266
+ "tpp_threshold_100_intended_diff_only": 0.053000032901763916,
267
+ "tpp_threshold_100_unintended_diff_only": 0.006499990820884705,
268
+ "tpp_threshold_500_total_metric": 0.30650003254413605,
269
+ "tpp_threshold_500_intended_diff_only": 0.3190000057220459,
270
+ "tpp_threshold_500_unintended_diff_only": 0.012499973177909851
271
+ },
272
+ "9": {
273
+ "tpp_threshold_2_total_metric": 0.002249985933303833,
274
+ "tpp_threshold_2_intended_diff_only": 0.0059999823570251465,
275
+ "tpp_threshold_2_unintended_diff_only": 0.0037499964237213135,
276
+ "tpp_threshold_5_total_metric": 0.006499990820884705,
277
+ "tpp_threshold_5_intended_diff_only": 0.009999990463256836,
278
+ "tpp_threshold_5_unintended_diff_only": 0.0034999996423721313,
279
+ "tpp_threshold_10_total_metric": 0.012999996542930603,
280
+ "tpp_threshold_10_intended_diff_only": 0.018000006675720215,
281
+ "tpp_threshold_10_unintended_diff_only": 0.005000010132789612,
282
+ "tpp_threshold_20_total_metric": 0.046749964356422424,
283
+ "tpp_threshold_20_intended_diff_only": 0.05299997329711914,
284
+ "tpp_threshold_20_unintended_diff_only": 0.006250008940696716,
285
+ "tpp_threshold_50_total_metric": 0.12299996614456177,
286
+ "tpp_threshold_50_intended_diff_only": 0.12699997425079346,
287
+ "tpp_threshold_50_unintended_diff_only": 0.0040000081062316895,
288
+ "tpp_threshold_100_total_metric": 0.17350000143051147,
289
+ "tpp_threshold_100_intended_diff_only": 0.18000000715255737,
290
+ "tpp_threshold_100_unintended_diff_only": 0.0065000057220458984,
291
+ "tpp_threshold_500_total_metric": 0.43699999153614044,
292
+ "tpp_threshold_500_intended_diff_only": 0.44599997997283936,
293
+ "tpp_threshold_500_unintended_diff_only": 0.008999988436698914
294
+ }
295
+ },
296
+ "canrager/amazon_reviews_mcauley_1and5": {
297
+ "1": {
298
+ "tpp_threshold_2_total_metric": 0.01175004243850708,
299
+ "tpp_threshold_2_intended_diff_only": 0.01500004529953003,
300
+ "tpp_threshold_2_unintended_diff_only": 0.0032500028610229492,
301
+ "tpp_threshold_5_total_metric": 0.011750012636184692,
302
+ "tpp_threshold_5_intended_diff_only": 0.013999998569488525,
303
+ "tpp_threshold_5_unintended_diff_only": 0.002249985933303833,
304
+ "tpp_threshold_10_total_metric": 0.007500052452087402,
305
+ "tpp_threshold_10_intended_diff_only": 0.01500004529953003,
306
+ "tpp_threshold_10_unintended_diff_only": 0.007499992847442627,
307
+ "tpp_threshold_20_total_metric": 0.011250004172325134,
308
+ "tpp_threshold_20_intended_diff_only": 0.018000006675720215,
309
+ "tpp_threshold_20_unintended_diff_only": 0.006750002503395081,
310
+ "tpp_threshold_50_total_metric": 0.03525002300739288,
311
+ "tpp_threshold_50_intended_diff_only": 0.03600001335144043,
312
+ "tpp_threshold_50_unintended_diff_only": 0.0007499903440475464,
313
+ "tpp_threshold_100_total_metric": 0.03375004231929779,
314
+ "tpp_threshold_100_intended_diff_only": 0.04300004243850708,
315
+ "tpp_threshold_100_unintended_diff_only": 0.00925000011920929,
316
+ "tpp_threshold_500_total_metric": 0.15350006520748138,
317
+ "tpp_threshold_500_intended_diff_only": 0.16200006008148193,
318
+ "tpp_threshold_500_unintended_diff_only": 0.00849999487400055
319
+ },
320
+ "2": {
321
+ "tpp_threshold_2_total_metric": 0.0024999380111694336,
322
+ "tpp_threshold_2_intended_diff_only": 0.0029999613761901855,
323
+ "tpp_threshold_2_unintended_diff_only": 0.000500023365020752,
324
+ "tpp_threshold_5_total_metric": -0.011750012636184692,
325
+ "tpp_threshold_5_intended_diff_only": 0.0009999871253967285,
326
+ "tpp_threshold_5_unintended_diff_only": 0.012749999761581421,
327
+ "tpp_threshold_10_total_metric": 0.0024999380111694336,
328
+ "tpp_threshold_10_intended_diff_only": 0.007999956607818604,
329
+ "tpp_threshold_10_unintended_diff_only": 0.00550001859664917,
330
+ "tpp_threshold_20_total_metric": 0.007499963045120239,
331
+ "tpp_threshold_20_intended_diff_only": 0.011999964714050293,
332
+ "tpp_threshold_20_unintended_diff_only": 0.004500001668930054,
333
+ "tpp_threshold_50_total_metric": 0.021749988198280334,
334
+ "tpp_threshold_50_intended_diff_only": 0.03299999237060547,
335
+ "tpp_threshold_50_unintended_diff_only": 0.011250004172325134,
336
+ "tpp_threshold_100_total_metric": 0.04649996757507324,
337
+ "tpp_threshold_100_intended_diff_only": 0.06599998474121094,
338
+ "tpp_threshold_100_unintended_diff_only": 0.019500017166137695,
339
+ "tpp_threshold_500_total_metric": 0.195499986410141,
340
+ "tpp_threshold_500_intended_diff_only": 0.21299999952316284,
341
+ "tpp_threshold_500_unintended_diff_only": 0.01750001311302185
342
+ },
343
+ "3": {
344
+ "tpp_threshold_2_total_metric": -0.003500014543533325,
345
+ "tpp_threshold_2_intended_diff_only": 0.0009999871253967285,
346
+ "tpp_threshold_2_unintended_diff_only": 0.004500001668930054,
347
+ "tpp_threshold_5_total_metric": 0.004499956965446472,
348
+ "tpp_threshold_5_intended_diff_only": 0.006999969482421875,
349
+ "tpp_threshold_5_unintended_diff_only": 0.002500012516975403,
350
+ "tpp_threshold_10_total_metric": 0.019999995827674866,
351
+ "tpp_threshold_10_intended_diff_only": 0.023000001907348633,
352
+ "tpp_threshold_10_unintended_diff_only": 0.003000006079673767,
353
+ "tpp_threshold_20_total_metric": 0.008000001311302185,
354
+ "tpp_threshold_20_intended_diff_only": 0.013999998569488525,
355
+ "tpp_threshold_20_unintended_diff_only": 0.00599999725818634,
356
+ "tpp_threshold_50_total_metric": 0.03449995815753937,
357
+ "tpp_threshold_50_intended_diff_only": 0.034999966621398926,
358
+ "tpp_threshold_50_unintended_diff_only": 0.0005000084638595581,
359
+ "tpp_threshold_100_total_metric": 0.06475000083446503,
360
+ "tpp_threshold_100_intended_diff_only": 0.07400000095367432,
361
+ "tpp_threshold_100_unintended_diff_only": 0.00925000011920929,
362
+ "tpp_threshold_500_total_metric": 0.18399998545646667,
363
+ "tpp_threshold_500_intended_diff_only": 0.19900000095367432,
364
+ "tpp_threshold_500_unintended_diff_only": 0.015000015497207642
365
+ },
366
+ "5": {
367
+ "tpp_threshold_2_total_metric": -0.004999980330467224,
368
+ "tpp_threshold_2_intended_diff_only": -0.0009999871253967285,
369
+ "tpp_threshold_2_unintended_diff_only": 0.003999993205070496,
370
+ "tpp_threshold_5_total_metric": -0.002499982714653015,
371
+ "tpp_threshold_5_intended_diff_only": 0.0040000081062316895,
372
+ "tpp_threshold_5_unintended_diff_only": 0.006499990820884705,
373
+ "tpp_threshold_10_total_metric": -0.002750009298324585,
374
+ "tpp_threshold_10_intended_diff_only": 0.0059999823570251465,
375
+ "tpp_threshold_10_unintended_diff_only": 0.008749991655349731,
376
+ "tpp_threshold_20_total_metric": 0.003000020980834961,
377
+ "tpp_threshold_20_intended_diff_only": 0.012000024318695068,
378
+ "tpp_threshold_20_unintended_diff_only": 0.009000003337860107,
379
+ "tpp_threshold_50_total_metric": 0.027250006794929504,
380
+ "tpp_threshold_50_intended_diff_only": 0.03700000047683716,
381
+ "tpp_threshold_50_unintended_diff_only": 0.009749993681907654,
382
+ "tpp_threshold_100_total_metric": 0.04350002110004425,
383
+ "tpp_threshold_100_intended_diff_only": 0.058000028133392334,
384
+ "tpp_threshold_100_unintended_diff_only": 0.014500007033348083,
385
+ "tpp_threshold_500_total_metric": 0.18325002491474152,
386
+ "tpp_threshold_500_intended_diff_only": 0.20200002193450928,
387
+ "tpp_threshold_500_unintended_diff_only": 0.01874999701976776
388
+ },
389
+ "6": {
390
+ "tpp_threshold_2_total_metric": 0.013250023126602173,
391
+ "tpp_threshold_2_intended_diff_only": 0.017000019550323486,
392
+ "tpp_threshold_2_unintended_diff_only": 0.0037499964237213135,
393
+ "tpp_threshold_5_total_metric": 0.021000027656555176,
394
+ "tpp_threshold_5_intended_diff_only": 0.021000027656555176,
395
+ "tpp_threshold_5_unintended_diff_only": 0.0,
396
+ "tpp_threshold_10_total_metric": 0.042749956250190735,
397
+ "tpp_threshold_10_intended_diff_only": 0.04799997806549072,
398
+ "tpp_threshold_10_unintended_diff_only": 0.005250021815299988,
399
+ "tpp_threshold_20_total_metric": 0.06674998998641968,
400
+ "tpp_threshold_20_intended_diff_only": 0.07400000095367432,
401
+ "tpp_threshold_20_unintended_diff_only": 0.007250010967254639,
402
+ "tpp_threshold_50_total_metric": 0.08875004947185516,
403
+ "tpp_threshold_50_intended_diff_only": 0.09900003671646118,
404
+ "tpp_threshold_50_unintended_diff_only": 0.010249987244606018,
405
+ "tpp_threshold_100_total_metric": 0.1402500420808792,
406
+ "tpp_threshold_100_intended_diff_only": 0.15000003576278687,
407
+ "tpp_threshold_100_unintended_diff_only": 0.009749993681907654,
408
+ "tpp_threshold_500_total_metric": 0.32099999487400055,
409
+ "tpp_threshold_500_intended_diff_only": 0.33399999141693115,
410
+ "tpp_threshold_500_unintended_diff_only": 0.012999996542930603
411
+ }
412
+ }
413
+ }
414
+ }
old_relu_eval_results/tpp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "tpp",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": false,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "d8d50b3f-efea-453e-80ea-09e656823d9c",
73
+ "datetime_epoch_millis": 1738808485682,
74
+ "eval_result_metrics": {
75
+ "tpp_metrics": {
76
+ "tpp_threshold_2_total_metric": 0.003474992513656616,
77
+ "tpp_threshold_2_intended_diff_only": 0.005799990892410278,
78
+ "tpp_threshold_2_unintended_diff_only": 0.002324998378753662,
79
+ "tpp_threshold_5_total_metric": 0.0039999991655349735,
80
+ "tpp_threshold_5_intended_diff_only": 0.006999999284744263,
81
+ "tpp_threshold_5_unintended_diff_only": 0.0030000001192092896,
82
+ "tpp_threshold_10_total_metric": 0.01015000194311142,
83
+ "tpp_threshold_10_intended_diff_only": 0.013400000333786011,
84
+ "tpp_threshold_10_unintended_diff_only": 0.0032499983906745912,
85
+ "tpp_threshold_20_total_metric": 0.016299988329410552,
86
+ "tpp_threshold_20_intended_diff_only": 0.019599992036819457,
87
+ "tpp_threshold_20_unintended_diff_only": 0.003300003707408905,
88
+ "tpp_threshold_50_total_metric": 0.03060000091791153,
89
+ "tpp_threshold_50_intended_diff_only": 0.03460000157356262,
90
+ "tpp_threshold_50_unintended_diff_only": 0.0040000006556510925,
91
+ "tpp_threshold_100_total_metric": 0.047974994778633116,
92
+ "tpp_threshold_100_intended_diff_only": 0.054599994421005243,
93
+ "tpp_threshold_100_unintended_diff_only": 0.0066249996423721315,
94
+ "tpp_threshold_500_total_metric": 0.18647501319646836,
95
+ "tpp_threshold_500_intended_diff_only": 0.19520001411437987,
96
+ "tpp_threshold_500_unintended_diff_only": 0.00872500091791153
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results",
102
+ "tpp_threshold_2_total_metric": 0.0038499891757965087,
103
+ "tpp_threshold_2_intended_diff_only": 0.005599987506866455,
104
+ "tpp_threshold_2_unintended_diff_only": 0.0017499983310699462,
105
+ "tpp_threshold_5_total_metric": 0.005899989604949951,
106
+ "tpp_threshold_5_intended_diff_only": 0.008799993991851806,
107
+ "tpp_threshold_5_unintended_diff_only": 0.0029000043869018555,
108
+ "tpp_threshold_10_total_metric": 0.008850002288818359,
109
+ "tpp_threshold_10_intended_diff_only": 0.01119999885559082,
110
+ "tpp_threshold_10_unintended_diff_only": 0.002349996566772461,
111
+ "tpp_threshold_20_total_metric": 0.01929998993873596,
112
+ "tpp_threshold_20_intended_diff_only": 0.021199989318847656,
113
+ "tpp_threshold_20_unintended_diff_only": 0.0018999993801116944,
114
+ "tpp_threshold_50_total_metric": 0.03360001742839813,
115
+ "tpp_threshold_50_intended_diff_only": 0.03660001754760742,
116
+ "tpp_threshold_50_unintended_diff_only": 0.0030000001192092896,
117
+ "tpp_threshold_100_total_metric": 0.05629999041557312,
118
+ "tpp_threshold_100_intended_diff_only": 0.06019998788833618,
119
+ "tpp_threshold_100_unintended_diff_only": 0.0038999974727630614,
120
+ "tpp_threshold_500_total_metric": 0.23835001289844512,
121
+ "tpp_threshold_500_intended_diff_only": 0.2446000099182129,
122
+ "tpp_threshold_500_unintended_diff_only": 0.006249997019767761
123
+ },
124
+ {
125
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results",
126
+ "tpp_threshold_2_total_metric": 0.003099995851516724,
127
+ "tpp_threshold_2_intended_diff_only": 0.005999994277954101,
128
+ "tpp_threshold_2_unintended_diff_only": 0.002899998426437378,
129
+ "tpp_threshold_5_total_metric": 0.0021000087261199953,
130
+ "tpp_threshold_5_intended_diff_only": 0.005200004577636719,
131
+ "tpp_threshold_5_unintended_diff_only": 0.003099995851516724,
132
+ "tpp_threshold_10_total_metric": 0.01145000159740448,
133
+ "tpp_threshold_10_intended_diff_only": 0.015600001811981202,
134
+ "tpp_threshold_10_unintended_diff_only": 0.004150000214576721,
135
+ "tpp_threshold_20_total_metric": 0.013299986720085144,
136
+ "tpp_threshold_20_intended_diff_only": 0.01799999475479126,
137
+ "tpp_threshold_20_unintended_diff_only": 0.004700008034706116,
138
+ "tpp_threshold_50_total_metric": 0.027599984407424928,
139
+ "tpp_threshold_50_intended_diff_only": 0.03259998559951782,
140
+ "tpp_threshold_50_unintended_diff_only": 0.005000001192092896,
141
+ "tpp_threshold_100_total_metric": 0.039649999141693114,
142
+ "tpp_threshold_100_intended_diff_only": 0.049000000953674315,
143
+ "tpp_threshold_100_unintended_diff_only": 0.009350001811981201,
144
+ "tpp_threshold_500_total_metric": 0.13460001349449158,
145
+ "tpp_threshold_500_intended_diff_only": 0.14580001831054687,
146
+ "tpp_threshold_500_unintended_diff_only": 0.011200004816055298
147
+ }
148
+ ],
149
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
150
+ "sae_lens_id": "custom_sae",
151
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4",
152
+ "sae_lens_version": "5.4.1",
153
+ "sae_cfg_dict": {
154
+ "model_name": "gemma-2-2b",
155
+ "d_in": 2304,
156
+ "d_sae": 16384,
157
+ "hook_layer": 12,
158
+ "hook_name": "blocks.12.hook_resid_post",
159
+ "context_size": null,
160
+ "hook_head_index": null,
161
+ "architecture": "standard",
162
+ "apply_b_dec_to_input": null,
163
+ "finetuning_scaling_factor": null,
164
+ "activation_fn_str": "",
165
+ "prepend_bos": true,
166
+ "normalize_activations": "none",
167
+ "dtype": "bfloat16",
168
+ "device": "",
169
+ "dataset_path": "",
170
+ "dataset_trust_remote_code": true,
171
+ "seqpos_slice": [
172
+ null
173
+ ],
174
+ "training_tokens": -100000,
175
+ "sae_lens_training_version": null,
176
+ "neuronpedia_id": null
177
+ },
178
+ "eval_result_unstructured": {
179
+ "LabHC/bias_in_bios_class_set1": {
180
+ "0": {
181
+ "tpp_threshold_2_total_metric": 0.0070000141859054565,
182
+ "tpp_threshold_2_intended_diff_only": 0.009000003337860107,
183
+ "tpp_threshold_2_unintended_diff_only": 0.001999989151954651,
184
+ "tpp_threshold_5_total_metric": 0.014750033617019653,
185
+ "tpp_threshold_5_intended_diff_only": 0.017000019550323486,
186
+ "tpp_threshold_5_unintended_diff_only": 0.002249985933303833,
187
+ "tpp_threshold_10_total_metric": 0.005250036716461182,
188
+ "tpp_threshold_10_intended_diff_only": 0.00700002908706665,
189
+ "tpp_threshold_10_unintended_diff_only": 0.0017499923706054688,
190
+ "tpp_threshold_20_total_metric": 0.015250027179718018,
191
+ "tpp_threshold_20_intended_diff_only": 0.017000019550323486,
192
+ "tpp_threshold_20_unintended_diff_only": 0.0017499923706054688,
193
+ "tpp_threshold_50_total_metric": 0.02700003981590271,
194
+ "tpp_threshold_50_intended_diff_only": 0.030000030994415283,
195
+ "tpp_threshold_50_unintended_diff_only": 0.0029999911785125732,
196
+ "tpp_threshold_100_total_metric": 0.04625000059604645,
197
+ "tpp_threshold_100_intended_diff_only": 0.050999999046325684,
198
+ "tpp_threshold_100_unintended_diff_only": 0.004749998450279236,
199
+ "tpp_threshold_500_total_metric": 0.18425005674362183,
200
+ "tpp_threshold_500_intended_diff_only": 0.18700003623962402,
201
+ "tpp_threshold_500_unintended_diff_only": 0.0027499794960021973
202
+ },
203
+ "1": {
204
+ "tpp_threshold_2_total_metric": 0.0027499794960021973,
205
+ "tpp_threshold_2_intended_diff_only": 0.001999974250793457,
206
+ "tpp_threshold_2_unintended_diff_only": -0.0007500052452087402,
207
+ "tpp_threshold_5_total_metric": -0.0025000721216201782,
208
+ "tpp_threshold_5_intended_diff_only": -0.001000046730041504,
209
+ "tpp_threshold_5_unintended_diff_only": 0.0015000253915786743,
210
+ "tpp_threshold_10_total_metric": 0.00475001335144043,
211
+ "tpp_threshold_10_intended_diff_only": 0.0040000081062316895,
212
+ "tpp_threshold_10_unintended_diff_only": -0.0007500052452087402,
213
+ "tpp_threshold_20_total_metric": 0.0029999911785125732,
214
+ "tpp_threshold_20_intended_diff_only": 0.0059999823570251465,
215
+ "tpp_threshold_20_unintended_diff_only": 0.0029999911785125732,
216
+ "tpp_threshold_50_total_metric": 0.02799999713897705,
217
+ "tpp_threshold_50_intended_diff_only": 0.03200000524520874,
218
+ "tpp_threshold_50_unintended_diff_only": 0.0040000081062316895,
219
+ "tpp_threshold_100_total_metric": 0.04924996197223663,
220
+ "tpp_threshold_100_intended_diff_only": 0.05399996042251587,
221
+ "tpp_threshold_100_unintended_diff_only": 0.004749998450279236,
222
+ "tpp_threshold_500_total_metric": 0.164249986410141,
223
+ "tpp_threshold_500_intended_diff_only": 0.171999990940094,
224
+ "tpp_threshold_500_unintended_diff_only": 0.007750004529953003
225
+ },
226
+ "2": {
227
+ "tpp_threshold_2_total_metric": 0.005499988794326782,
228
+ "tpp_threshold_2_intended_diff_only": 0.009000003337860107,
229
+ "tpp_threshold_2_unintended_diff_only": 0.003500014543533325,
230
+ "tpp_threshold_5_total_metric": 0.008000001311302185,
231
+ "tpp_threshold_5_intended_diff_only": 0.013000011444091797,
232
+ "tpp_threshold_5_unintended_diff_only": 0.005000010132789612,
233
+ "tpp_threshold_10_total_metric": 0.020500019192695618,
234
+ "tpp_threshold_10_intended_diff_only": 0.023000001907348633,
235
+ "tpp_threshold_10_unintended_diff_only": 0.002499982714653015,
236
+ "tpp_threshold_20_total_metric": 0.029999971389770508,
237
+ "tpp_threshold_20_intended_diff_only": 0.029999971389770508,
238
+ "tpp_threshold_20_unintended_diff_only": 0.0,
239
+ "tpp_threshold_50_total_metric": 0.028249993920326233,
240
+ "tpp_threshold_50_intended_diff_only": 0.02799999713897705,
241
+ "tpp_threshold_50_unintended_diff_only": -0.00024999678134918213,
242
+ "tpp_threshold_100_total_metric": 0.040000006556510925,
243
+ "tpp_threshold_100_intended_diff_only": 0.04100000858306885,
244
+ "tpp_threshold_100_unintended_diff_only": 0.0010000020265579224,
245
+ "tpp_threshold_500_total_metric": 0.21049997210502625,
246
+ "tpp_threshold_500_intended_diff_only": 0.21399998664855957,
247
+ "tpp_threshold_500_unintended_diff_only": 0.003500014543533325
248
+ },
249
+ "6": {
250
+ "tpp_threshold_2_total_metric": 0.00024999678134918213,
251
+ "tpp_threshold_2_intended_diff_only": 0.0009999871253967285,
252
+ "tpp_threshold_2_unintended_diff_only": 0.0007499903440475464,
253
+ "tpp_threshold_5_total_metric": 0.0012500286102294922,
254
+ "tpp_threshold_5_intended_diff_only": 0.003000020980834961,
255
+ "tpp_threshold_5_unintended_diff_only": 0.0017499923706054688,
256
+ "tpp_threshold_10_total_metric": 0.002249985933303833,
257
+ "tpp_threshold_10_intended_diff_only": 0.0059999823570251465,
258
+ "tpp_threshold_10_unintended_diff_only": 0.0037499964237213135,
259
+ "tpp_threshold_20_total_metric": 0.005749985575675964,
260
+ "tpp_threshold_20_intended_diff_only": 0.0059999823570251465,
261
+ "tpp_threshold_20_unintended_diff_only": 0.00024999678134918213,
262
+ "tpp_threshold_50_total_metric": 0.00575004518032074,
263
+ "tpp_threshold_50_intended_diff_only": 0.01100003719329834,
264
+ "tpp_threshold_50_unintended_diff_only": 0.0052499920129776,
265
+ "tpp_threshold_100_total_metric": 0.026749998331069946,
266
+ "tpp_threshold_100_intended_diff_only": 0.02899998426437378,
267
+ "tpp_threshold_100_unintended_diff_only": 0.002249985933303833,
268
+ "tpp_threshold_500_total_metric": 0.2757500410079956,
269
+ "tpp_threshold_500_intended_diff_only": 0.2850000262260437,
270
+ "tpp_threshold_500_unintended_diff_only": 0.009249985218048096
271
+ },
272
+ "9": {
273
+ "tpp_threshold_2_total_metric": 0.0037499666213989258,
274
+ "tpp_threshold_2_intended_diff_only": 0.006999969482421875,
275
+ "tpp_threshold_2_unintended_diff_only": 0.0032500028610229492,
276
+ "tpp_threshold_5_total_metric": 0.007999956607818604,
277
+ "tpp_threshold_5_intended_diff_only": 0.011999964714050293,
278
+ "tpp_threshold_5_unintended_diff_only": 0.0040000081062316895,
279
+ "tpp_threshold_10_total_metric": 0.011499956250190735,
280
+ "tpp_threshold_10_intended_diff_only": 0.015999972820281982,
281
+ "tpp_threshold_10_unintended_diff_only": 0.0045000165700912476,
282
+ "tpp_threshold_20_total_metric": 0.04249997437000275,
283
+ "tpp_threshold_20_intended_diff_only": 0.046999990940093994,
284
+ "tpp_threshold_20_unintended_diff_only": 0.0045000165700912476,
285
+ "tpp_threshold_50_total_metric": 0.07900001108646393,
286
+ "tpp_threshold_50_intended_diff_only": 0.0820000171661377,
287
+ "tpp_threshold_50_unintended_diff_only": 0.003000006079673767,
288
+ "tpp_threshold_100_total_metric": 0.11924998462200165,
289
+ "tpp_threshold_100_intended_diff_only": 0.12599998712539673,
290
+ "tpp_threshold_100_unintended_diff_only": 0.006750002503395081,
291
+ "tpp_threshold_500_total_metric": 0.357000008225441,
292
+ "tpp_threshold_500_intended_diff_only": 0.36500000953674316,
293
+ "tpp_threshold_500_unintended_diff_only": 0.008000001311302185
294
+ }
295
+ },
296
+ "canrager/amazon_reviews_mcauley_1and5": {
297
+ "1": {
298
+ "tpp_threshold_2_total_metric": 0.010500013828277588,
299
+ "tpp_threshold_2_intended_diff_only": 0.013999998569488525,
300
+ "tpp_threshold_2_unintended_diff_only": 0.0034999847412109375,
301
+ "tpp_threshold_5_total_metric": 0.009250059723854065,
302
+ "tpp_threshold_5_intended_diff_only": 0.010000050067901611,
303
+ "tpp_threshold_5_unintended_diff_only": 0.0007499903440475464,
304
+ "tpp_threshold_10_total_metric": 0.0065000057220458984,
305
+ "tpp_threshold_10_intended_diff_only": 0.013000011444091797,
306
+ "tpp_threshold_10_unintended_diff_only": 0.0065000057220458984,
307
+ "tpp_threshold_20_total_metric": 0.008249998092651367,
308
+ "tpp_threshold_20_intended_diff_only": 0.013000011444091797,
309
+ "tpp_threshold_20_unintended_diff_only": 0.00475001335144043,
310
+ "tpp_threshold_50_total_metric": 0.02350001037120819,
311
+ "tpp_threshold_50_intended_diff_only": 0.023000001907348633,
312
+ "tpp_threshold_50_unintended_diff_only": -0.0005000084638595581,
313
+ "tpp_threshold_100_total_metric": 0.016500025987625122,
314
+ "tpp_threshold_100_intended_diff_only": 0.025000035762786865,
315
+ "tpp_threshold_100_unintended_diff_only": 0.008500009775161743,
316
+ "tpp_threshold_500_total_metric": 0.07200004160404205,
317
+ "tpp_threshold_500_intended_diff_only": 0.07600003480911255,
318
+ "tpp_threshold_500_unintended_diff_only": 0.003999993205070496
319
+ },
320
+ "2": {
321
+ "tpp_threshold_2_total_metric": 0.0009999573230743408,
322
+ "tpp_threshold_2_intended_diff_only": 0.001999974250793457,
323
+ "tpp_threshold_2_unintended_diff_only": 0.0010000169277191162,
324
+ "tpp_threshold_5_total_metric": -0.007750019431114197,
325
+ "tpp_threshold_5_intended_diff_only": 0.001999974250793457,
326
+ "tpp_threshold_5_unintended_diff_only": 0.009749993681907654,
327
+ "tpp_threshold_10_total_metric": 0.007749974727630615,
328
+ "tpp_threshold_10_intended_diff_only": 0.010999977588653564,
329
+ "tpp_threshold_10_unintended_diff_only": 0.0032500028610229492,
330
+ "tpp_threshold_20_total_metric": 0.005499944090843201,
331
+ "tpp_threshold_20_intended_diff_only": 0.007999956607818604,
332
+ "tpp_threshold_20_unintended_diff_only": 0.002500012516975403,
333
+ "tpp_threshold_50_total_metric": 0.013999953866004944,
334
+ "tpp_threshold_50_intended_diff_only": 0.0209999680519104,
335
+ "tpp_threshold_50_unintended_diff_only": 0.0070000141859054565,
336
+ "tpp_threshold_100_total_metric": 0.02374996244907379,
337
+ "tpp_threshold_100_intended_diff_only": 0.034999966621398926,
338
+ "tpp_threshold_100_unintended_diff_only": 0.011250004172325134,
339
+ "tpp_threshold_500_total_metric": 0.11574997007846832,
340
+ "tpp_threshold_500_intended_diff_only": 0.12699997425079346,
341
+ "tpp_threshold_500_unintended_diff_only": 0.011250004172325134
342
+ },
343
+ "3": {
344
+ "tpp_threshold_2_total_metric": -0.007500007748603821,
345
+ "tpp_threshold_2_intended_diff_only": -0.0040000081062316895,
346
+ "tpp_threshold_2_unintended_diff_only": 0.0034999996423721313,
347
+ "tpp_threshold_5_total_metric": 0.0004999935626983643,
348
+ "tpp_threshold_5_intended_diff_only": 0.0009999871253967285,
349
+ "tpp_threshold_5_unintended_diff_only": 0.0004999935626983643,
350
+ "tpp_threshold_10_total_metric": 0.012000009417533875,
351
+ "tpp_threshold_10_intended_diff_only": 0.013999998569488525,
352
+ "tpp_threshold_10_unintended_diff_only": 0.001999989151954651,
353
+ "tpp_threshold_20_total_metric": 0.002749994397163391,
354
+ "tpp_threshold_20_intended_diff_only": 0.009000003337860107,
355
+ "tpp_threshold_20_unintended_diff_only": 0.006250008940696716,
356
+ "tpp_threshold_50_total_metric": 0.016749992966651917,
357
+ "tpp_threshold_50_intended_diff_only": 0.018999993801116943,
358
+ "tpp_threshold_50_unintended_diff_only": 0.002250000834465027,
359
+ "tpp_threshold_100_total_metric": 0.040249988436698914,
360
+ "tpp_threshold_100_intended_diff_only": 0.046999990940093994,
361
+ "tpp_threshold_100_unintended_diff_only": 0.006750002503395081,
362
+ "tpp_threshold_500_total_metric": 0.12049998342990875,
363
+ "tpp_threshold_500_intended_diff_only": 0.1340000033378601,
364
+ "tpp_threshold_500_unintended_diff_only": 0.013500019907951355
365
+ },
366
+ "5": {
367
+ "tpp_threshold_2_total_metric": -0.0054999589920043945,
368
+ "tpp_threshold_2_intended_diff_only": -0.001999974250793457,
369
+ "tpp_threshold_2_unintended_diff_only": 0.0034999847412109375,
370
+ "tpp_threshold_5_total_metric": -0.0027499794960021973,
371
+ "tpp_threshold_5_intended_diff_only": 0.003000020980834961,
372
+ "tpp_threshold_5_unintended_diff_only": 0.005750000476837158,
373
+ "tpp_threshold_10_total_metric": -0.007249996066093445,
374
+ "tpp_threshold_10_intended_diff_only": -0.0009999871253967285,
375
+ "tpp_threshold_10_unintended_diff_only": 0.006250008940696716,
376
+ "tpp_threshold_20_total_metric": 0.008249983191490173,
377
+ "tpp_threshold_20_intended_diff_only": 0.014999985694885254,
378
+ "tpp_threshold_20_unintended_diff_only": 0.006750002503395081,
379
+ "tpp_threshold_50_total_metric": 0.020749986171722412,
380
+ "tpp_threshold_50_intended_diff_only": 0.02899998426437378,
381
+ "tpp_threshold_50_unintended_diff_only": 0.008249998092651367,
382
+ "tpp_threshold_100_total_metric": 0.03675001859664917,
383
+ "tpp_threshold_100_intended_diff_only": 0.04900002479553223,
384
+ "tpp_threshold_100_unintended_diff_only": 0.012250006198883057,
385
+ "tpp_threshold_500_total_metric": 0.11750003695487976,
386
+ "tpp_threshold_500_intended_diff_only": 0.13600003719329834,
387
+ "tpp_threshold_500_unintended_diff_only": 0.01850000023841858
388
+ },
389
+ "6": {
390
+ "tpp_threshold_2_total_metric": 0.016999974846839905,
391
+ "tpp_threshold_2_intended_diff_only": 0.019999980926513672,
392
+ "tpp_threshold_2_unintended_diff_only": 0.003000006079673767,
393
+ "tpp_threshold_5_total_metric": 0.01124998927116394,
394
+ "tpp_threshold_5_intended_diff_only": 0.009999990463256836,
395
+ "tpp_threshold_5_unintended_diff_only": -0.0012499988079071045,
396
+ "tpp_threshold_10_total_metric": 0.03825001418590546,
397
+ "tpp_threshold_10_intended_diff_only": 0.04100000858306885,
398
+ "tpp_threshold_10_unintended_diff_only": 0.002749994397163391,
399
+ "tpp_threshold_20_total_metric": 0.04175001382827759,
400
+ "tpp_threshold_20_intended_diff_only": 0.04500001668930054,
401
+ "tpp_threshold_20_unintended_diff_only": 0.0032500028610229492,
402
+ "tpp_threshold_50_total_metric": 0.06299997866153717,
403
+ "tpp_threshold_50_intended_diff_only": 0.07099997997283936,
404
+ "tpp_threshold_50_unintended_diff_only": 0.008000001311302185,
405
+ "tpp_threshold_100_total_metric": 0.08100000023841858,
406
+ "tpp_threshold_100_intended_diff_only": 0.08899998664855957,
407
+ "tpp_threshold_100_unintended_diff_only": 0.007999986410140991,
408
+ "tpp_threshold_500_total_metric": 0.247250035405159,
409
+ "tpp_threshold_500_intended_diff_only": 0.2560000419616699,
410
+ "tpp_threshold_500_unintended_diff_only": 0.008750006556510925
411
+ }
412
+ }
413
+ }
414
+ }
old_relu_eval_results/tpp/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "tpp",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "canrager/amazon_reviews_mcauley_1and5"
8
+ ],
9
+ "perform_scr": false,
10
+ "early_stopping_patience": 20,
11
+ "train_set_size": 4000,
12
+ "test_set_size": 1000,
13
+ "context_length": 128,
14
+ "probe_train_batch_size": 16,
15
+ "probe_test_batch_size": 500,
16
+ "probe_epochs": 20,
17
+ "probe_lr": 0.001,
18
+ "probe_l1_penalty": 0.001,
19
+ "sae_batch_size": 125,
20
+ "llm_batch_size": 32,
21
+ "llm_dtype": "bfloat16",
22
+ "lower_vram_usage": false,
23
+ "model_name": "gemma-2-2b",
24
+ "n_values": [
25
+ 2,
26
+ 5,
27
+ 10,
28
+ 20,
29
+ 50,
30
+ 100,
31
+ 500
32
+ ],
33
+ "column1_vals_lookup": {
34
+ "LabHC/bias_in_bios_class_set1": [
35
+ [
36
+ "professor",
37
+ "nurse"
38
+ ],
39
+ [
40
+ "architect",
41
+ "journalist"
42
+ ],
43
+ [
44
+ "surgeon",
45
+ "psychologist"
46
+ ],
47
+ [
48
+ "attorney",
49
+ "teacher"
50
+ ]
51
+ ],
52
+ "canrager/amazon_reviews_mcauley_1and5": [
53
+ [
54
+ "Books",
55
+ "CDs_and_Vinyl"
56
+ ],
57
+ [
58
+ "Software",
59
+ "Electronics"
60
+ ],
61
+ [
62
+ "Pet_Supplies",
63
+ "Office_Products"
64
+ ],
65
+ [
66
+ "Industrial_and_Scientific",
67
+ "Toys_and_Games"
68
+ ]
69
+ ]
70
+ }
71
+ },
72
+ "eval_id": "9cc117bf-8880-4c1e-be3d-07555801ce77",
73
+ "datetime_epoch_millis": 1738808138146,
74
+ "eval_result_metrics": {
75
+ "tpp_metrics": {
76
+ "tpp_threshold_2_total_metric": 0.0010750010609626769,
77
+ "tpp_threshold_2_intended_diff_only": 0.0034000039100646976,
78
+ "tpp_threshold_2_unintended_diff_only": 0.00232500284910202,
79
+ "tpp_threshold_5_total_metric": 0.0006999984383583069,
80
+ "tpp_threshold_5_intended_diff_only": 0.0031000018119812013,
81
+ "tpp_threshold_5_unintended_diff_only": 0.0024000033736228943,
82
+ "tpp_threshold_10_total_metric": 0.005324994027614593,
83
+ "tpp_threshold_10_intended_diff_only": 0.008099997043609619,
84
+ "tpp_threshold_10_unintended_diff_only": 0.0027750030159950255,
85
+ "tpp_threshold_20_total_metric": 0.009074991941452027,
86
+ "tpp_threshold_20_intended_diff_only": 0.01199999451637268,
87
+ "tpp_threshold_20_unintended_diff_only": 0.0029250025749206544,
88
+ "tpp_threshold_50_total_metric": 0.019300003349781037,
89
+ "tpp_threshold_50_intended_diff_only": 0.02290000319480896,
90
+ "tpp_threshold_50_unintended_diff_only": 0.003599999845027924,
91
+ "tpp_threshold_100_total_metric": 0.029224996268749238,
92
+ "tpp_threshold_100_intended_diff_only": 0.03529999852180481,
93
+ "tpp_threshold_100_unintended_diff_only": 0.0060750022530555725,
94
+ "tpp_threshold_500_total_metric": 0.11080000549554825,
95
+ "tpp_threshold_500_intended_diff_only": 0.11790000796318054,
96
+ "tpp_threshold_500_unintended_diff_only": 0.007100002467632293
97
+ }
98
+ },
99
+ "eval_result_details": [
100
+ {
101
+ "dataset_name": "LabHC/bias_in_bios_class_set1_tpp_results",
102
+ "tpp_threshold_2_total_metric": 0.002349993586540222,
103
+ "tpp_threshold_2_intended_diff_only": 0.0039999961853027345,
104
+ "tpp_threshold_2_unintended_diff_only": 0.0016500025987625123,
105
+ "tpp_threshold_5_total_metric": 0.003099992871284485,
106
+ "tpp_threshold_5_intended_diff_only": 0.004999995231628418,
107
+ "tpp_threshold_5_unintended_diff_only": 0.001900002360343933,
108
+ "tpp_threshold_10_total_metric": 0.006099992990493774,
109
+ "tpp_threshold_10_intended_diff_only": 0.007799994945526123,
110
+ "tpp_threshold_10_unintended_diff_only": 0.0017000019550323486,
111
+ "tpp_threshold_20_total_metric": 0.011999988555908203,
112
+ "tpp_threshold_20_intended_diff_only": 0.013399994373321534,
113
+ "tpp_threshold_20_unintended_diff_only": 0.0014000058174133301,
114
+ "tpp_threshold_50_total_metric": 0.02170000672340393,
115
+ "tpp_threshold_50_intended_diff_only": 0.02380000352859497,
116
+ "tpp_threshold_50_unintended_diff_only": 0.00209999680519104,
117
+ "tpp_threshold_100_total_metric": 0.03549998998641968,
118
+ "tpp_threshold_100_intended_diff_only": 0.03879998922348023,
119
+ "tpp_threshold_100_unintended_diff_only": 0.003299999237060547,
120
+ "tpp_threshold_500_total_metric": 0.1364999920129776,
121
+ "tpp_threshold_500_intended_diff_only": 0.14199999570846558,
122
+ "tpp_threshold_500_unintended_diff_only": 0.005500003695487976
123
+ },
124
+ {
125
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_tpp_results",
126
+ "tpp_threshold_2_total_metric": -0.00019999146461486817,
127
+ "tpp_threshold_2_intended_diff_only": 0.0028000116348266602,
128
+ "tpp_threshold_2_unintended_diff_only": 0.003000003099441528,
129
+ "tpp_threshold_5_total_metric": -0.0016999959945678711,
130
+ "tpp_threshold_5_intended_diff_only": 0.0012000083923339843,
131
+ "tpp_threshold_5_unintended_diff_only": 0.0029000043869018555,
132
+ "tpp_threshold_10_total_metric": 0.004549995064735413,
133
+ "tpp_threshold_10_intended_diff_only": 0.008399999141693116,
134
+ "tpp_threshold_10_unintended_diff_only": 0.0038500040769577025,
135
+ "tpp_threshold_20_total_metric": 0.00614999532699585,
136
+ "tpp_threshold_20_intended_diff_only": 0.010599994659423828,
137
+ "tpp_threshold_20_unintended_diff_only": 0.004449999332427979,
138
+ "tpp_threshold_50_total_metric": 0.016899999976158143,
139
+ "tpp_threshold_50_intended_diff_only": 0.02200000286102295,
140
+ "tpp_threshold_50_unintended_diff_only": 0.0051000028848648075,
141
+ "tpp_threshold_100_total_metric": 0.022950002551078798,
142
+ "tpp_threshold_100_intended_diff_only": 0.03180000782012939,
143
+ "tpp_threshold_100_unintended_diff_only": 0.008850005269050599,
144
+ "tpp_threshold_500_total_metric": 0.08510001897811889,
145
+ "tpp_threshold_500_intended_diff_only": 0.0938000202178955,
146
+ "tpp_threshold_500_unintended_diff_only": 0.00870000123977661
147
+ }
148
+ ],
149
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
150
+ "sae_lens_id": "custom_sae",
151
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5",
152
+ "sae_lens_version": "5.4.1",
153
+ "sae_cfg_dict": {
154
+ "model_name": "gemma-2-2b",
155
+ "d_in": 2304,
156
+ "d_sae": 16384,
157
+ "hook_layer": 12,
158
+ "hook_name": "blocks.12.hook_resid_post",
159
+ "context_size": null,
160
+ "hook_head_index": null,
161
+ "architecture": "standard",
162
+ "apply_b_dec_to_input": null,
163
+ "finetuning_scaling_factor": null,
164
+ "activation_fn_str": "",
165
+ "prepend_bos": true,
166
+ "normalize_activations": "none",
167
+ "dtype": "bfloat16",
168
+ "device": "",
169
+ "dataset_path": "",
170
+ "dataset_trust_remote_code": true,
171
+ "seqpos_slice": [
172
+ null
173
+ ],
174
+ "training_tokens": -100000,
175
+ "sae_lens_training_version": null,
176
+ "neuronpedia_id": null
177
+ },
178
+ "eval_result_unstructured": {
179
+ "LabHC/bias_in_bios_class_set1": {
180
+ "0": {
181
+ "tpp_threshold_2_total_metric": 0.005500033497810364,
182
+ "tpp_threshold_2_intended_diff_only": 0.00700002908706665,
183
+ "tpp_threshold_2_unintended_diff_only": 0.0014999955892562866,
184
+ "tpp_threshold_5_total_metric": 0.010000035166740417,
185
+ "tpp_threshold_5_intended_diff_only": 0.01100003719329834,
186
+ "tpp_threshold_5_unintended_diff_only": 0.0010000020265579224,
187
+ "tpp_threshold_10_total_metric": 0.002500012516975403,
188
+ "tpp_threshold_10_intended_diff_only": 0.0040000081062316895,
189
+ "tpp_threshold_10_unintended_diff_only": 0.0014999955892562866,
190
+ "tpp_threshold_20_total_metric": 0.008999988436698914,
191
+ "tpp_threshold_20_intended_diff_only": 0.009999990463256836,
192
+ "tpp_threshold_20_unintended_diff_only": 0.0010000020265579224,
193
+ "tpp_threshold_50_total_metric": 0.016499996185302734,
194
+ "tpp_threshold_50_intended_diff_only": 0.018999993801116943,
195
+ "tpp_threshold_50_unintended_diff_only": 0.002499997615814209,
196
+ "tpp_threshold_100_total_metric": 0.02399998903274536,
197
+ "tpp_threshold_100_intended_diff_only": 0.02799999713897705,
198
+ "tpp_threshold_100_unintended_diff_only": 0.0040000081062316895,
199
+ "tpp_threshold_500_total_metric": 0.12150004506111145,
200
+ "tpp_threshold_500_intended_diff_only": 0.12200003862380981,
201
+ "tpp_threshold_500_unintended_diff_only": 0.0004999935626983643
202
+ },
203
+ "1": {
204
+ "tpp_threshold_2_total_metric": 0.0032499581575393677,
205
+ "tpp_threshold_2_intended_diff_only": 0.0029999613761901855,
206
+ "tpp_threshold_2_unintended_diff_only": -0.00024999678134918213,
207
+ "tpp_threshold_5_total_metric": -0.0025000572204589844,
208
+ "tpp_threshold_5_intended_diff_only": -0.001000046730041504,
209
+ "tpp_threshold_5_unintended_diff_only": 0.0015000104904174805,
210
+ "tpp_threshold_10_total_metric": 0.002499982714653015,
211
+ "tpp_threshold_10_intended_diff_only": 0.0009999871253967285,
212
+ "tpp_threshold_10_unintended_diff_only": -0.0014999955892562866,
213
+ "tpp_threshold_20_total_metric": 0.0029999911785125732,
214
+ "tpp_threshold_20_intended_diff_only": 0.004999995231628418,
215
+ "tpp_threshold_20_unintended_diff_only": 0.0020000040531158447,
216
+ "tpp_threshold_50_total_metric": 0.02799999713897705,
217
+ "tpp_threshold_50_intended_diff_only": 0.03200000524520874,
218
+ "tpp_threshold_50_unintended_diff_only": 0.0040000081062316895,
219
+ "tpp_threshold_100_total_metric": 0.039999961853027344,
220
+ "tpp_threshold_100_intended_diff_only": 0.04499995708465576,
221
+ "tpp_threshold_100_unintended_diff_only": 0.004999995231628418,
222
+ "tpp_threshold_500_total_metric": 0.11849997937679291,
223
+ "tpp_threshold_500_intended_diff_only": 0.12599998712539673,
224
+ "tpp_threshold_500_unintended_diff_only": 0.007500007748603821
225
+ },
226
+ "2": {
227
+ "tpp_threshold_2_total_metric": -0.000500023365020752,
228
+ "tpp_threshold_2_intended_diff_only": 0.001999974250793457,
229
+ "tpp_threshold_2_unintended_diff_only": 0.002499997615814209,
230
+ "tpp_threshold_5_total_metric": 0.002249971032142639,
231
+ "tpp_threshold_5_intended_diff_only": 0.0059999823570251465,
232
+ "tpp_threshold_5_unintended_diff_only": 0.0037500113248825073,
233
+ "tpp_threshold_10_total_metric": 0.01600000262260437,
234
+ "tpp_threshold_10_intended_diff_only": 0.018000006675720215,
235
+ "tpp_threshold_10_unintended_diff_only": 0.0020000040531158447,
236
+ "tpp_threshold_20_total_metric": 0.02374999225139618,
237
+ "tpp_threshold_20_intended_diff_only": 0.02399998903274536,
238
+ "tpp_threshold_20_unintended_diff_only": 0.00024999678134918213,
239
+ "tpp_threshold_50_total_metric": 0.01800002157688141,
240
+ "tpp_threshold_50_intended_diff_only": 0.017000019550323486,
241
+ "tpp_threshold_50_unintended_diff_only": -0.0010000020265579224,
242
+ "tpp_threshold_100_total_metric": 0.01874999701976776,
243
+ "tpp_threshold_100_intended_diff_only": 0.018999993801116943,
244
+ "tpp_threshold_100_unintended_diff_only": 0.00024999678134918213,
245
+ "tpp_threshold_500_total_metric": 0.0807499885559082,
246
+ "tpp_threshold_500_intended_diff_only": 0.07999998331069946,
247
+ "tpp_threshold_500_unintended_diff_only": -0.0007500052452087402
248
+ },
249
+ "6": {
250
+ "tpp_threshold_2_total_metric": 0.0017500072717666626,
251
+ "tpp_threshold_2_intended_diff_only": 0.003000020980834961,
252
+ "tpp_threshold_2_unintended_diff_only": 0.0012500137090682983,
253
+ "tpp_threshold_5_total_metric": 0.0022500455379486084,
254
+ "tpp_threshold_5_intended_diff_only": 0.0020000338554382324,
255
+ "tpp_threshold_5_unintended_diff_only": -0.000250011682510376,
256
+ "tpp_threshold_10_total_metric": 0.0032499730587005615,
257
+ "tpp_threshold_10_intended_diff_only": 0.0059999823570251465,
258
+ "tpp_threshold_10_unintended_diff_only": 0.002750009298324585,
259
+ "tpp_threshold_20_total_metric": 0.006749972701072693,
260
+ "tpp_threshold_20_intended_diff_only": 0.0059999823570251465,
261
+ "tpp_threshold_20_unintended_diff_only": -0.0007499903440475464,
262
+ "tpp_threshold_50_total_metric": 0.009000048041343689,
263
+ "tpp_threshold_50_intended_diff_only": 0.01100003719329834,
264
+ "tpp_threshold_50_unintended_diff_only": 0.001999989151954651,
265
+ "tpp_threshold_100_total_metric": 0.016749978065490723,
266
+ "tpp_threshold_100_intended_diff_only": 0.019999980926513672,
267
+ "tpp_threshold_100_unintended_diff_only": 0.0032500028610229492,
268
+ "tpp_threshold_500_total_metric": 0.16824999451637268,
269
+ "tpp_threshold_500_intended_diff_only": 0.1809999942779541,
270
+ "tpp_threshold_500_unintended_diff_only": 0.012749999761581421
271
+ },
272
+ "9": {
273
+ "tpp_threshold_2_total_metric": 0.0017499923706054688,
274
+ "tpp_threshold_2_intended_diff_only": 0.004999995231628418,
275
+ "tpp_threshold_2_unintended_diff_only": 0.0032500028610229492,
276
+ "tpp_threshold_5_total_metric": 0.0034999698400497437,
277
+ "tpp_threshold_5_intended_diff_only": 0.006999969482421875,
278
+ "tpp_threshold_5_unintended_diff_only": 0.0034999996423721313,
279
+ "tpp_threshold_10_total_metric": 0.0062499940395355225,
280
+ "tpp_threshold_10_intended_diff_only": 0.009999990463256836,
281
+ "tpp_threshold_10_unintended_diff_only": 0.0037499964237213135,
282
+ "tpp_threshold_20_total_metric": 0.017499998211860657,
283
+ "tpp_threshold_20_intended_diff_only": 0.022000014781951904,
284
+ "tpp_threshold_20_unintended_diff_only": 0.0045000165700912476,
285
+ "tpp_threshold_50_total_metric": 0.03699997067451477,
286
+ "tpp_threshold_50_intended_diff_only": 0.039999961853027344,
287
+ "tpp_threshold_50_unintended_diff_only": 0.0029999911785125732,
288
+ "tpp_threshold_100_total_metric": 0.0780000239610672,
289
+ "tpp_threshold_100_intended_diff_only": 0.0820000171661377,
290
+ "tpp_threshold_100_unintended_diff_only": 0.003999993205070496,
291
+ "tpp_threshold_500_total_metric": 0.19349995255470276,
292
+ "tpp_threshold_500_intended_diff_only": 0.20099997520446777,
293
+ "tpp_threshold_500_unintended_diff_only": 0.007500022649765015
294
+ }
295
+ },
296
+ "canrager/amazon_reviews_mcauley_1and5": {
297
+ "1": {
298
+ "tpp_threshold_2_total_metric": 0.01175004243850708,
299
+ "tpp_threshold_2_intended_diff_only": 0.01500004529953003,
300
+ "tpp_threshold_2_unintended_diff_only": 0.0032500028610229492,
301
+ "tpp_threshold_5_total_metric": 0.006250008940696716,
302
+ "tpp_threshold_5_intended_diff_only": 0.008000016212463379,
303
+ "tpp_threshold_5_unintended_diff_only": 0.0017500072717666626,
304
+ "tpp_threshold_10_total_metric": 0.003000006079673767,
305
+ "tpp_threshold_10_intended_diff_only": 0.009000003337860107,
306
+ "tpp_threshold_10_unintended_diff_only": 0.00599999725818634,
307
+ "tpp_threshold_20_total_metric": 0.003750041127204895,
308
+ "tpp_threshold_20_intended_diff_only": 0.00700002908706665,
309
+ "tpp_threshold_20_unintended_diff_only": 0.0032499879598617554,
310
+ "tpp_threshold_50_total_metric": 0.015000015497207642,
311
+ "tpp_threshold_50_intended_diff_only": 0.013999998569488525,
312
+ "tpp_threshold_50_unintended_diff_only": -0.0010000169277191162,
313
+ "tpp_threshold_100_total_metric": 0.00850003957748413,
314
+ "tpp_threshold_100_intended_diff_only": 0.01500004529953003,
315
+ "tpp_threshold_100_unintended_diff_only": 0.0065000057220458984,
316
+ "tpp_threshold_500_total_metric": 0.033750057220458984,
317
+ "tpp_threshold_500_intended_diff_only": 0.03800004720687866,
318
+ "tpp_threshold_500_unintended_diff_only": 0.004249989986419678
319
+ },
320
+ "2": {
321
+ "tpp_threshold_2_total_metric": 0.0012499690055847168,
322
+ "tpp_threshold_2_intended_diff_only": 0.001999974250793457,
323
+ "tpp_threshold_2_unintended_diff_only": 0.0007500052452087402,
324
+ "tpp_threshold_5_total_metric": -0.009000003337860107,
325
+ "tpp_threshold_5_intended_diff_only": 0.0,
326
+ "tpp_threshold_5_unintended_diff_only": 0.009000003337860107,
327
+ "tpp_threshold_10_total_metric": 0.004499971866607666,
328
+ "tpp_threshold_10_intended_diff_only": 0.006999969482421875,
329
+ "tpp_threshold_10_unintended_diff_only": 0.002499997615814209,
330
+ "tpp_threshold_20_total_metric": -0.0012499988079071045,
331
+ "tpp_threshold_20_intended_diff_only": 0.0,
332
+ "tpp_threshold_20_unintended_diff_only": 0.0012499988079071045,
333
+ "tpp_threshold_50_total_metric": 0.00349995493888855,
334
+ "tpp_threshold_50_intended_diff_only": 0.010999977588653564,
335
+ "tpp_threshold_50_unintended_diff_only": 0.007500022649765015,
336
+ "tpp_threshold_100_total_metric": 0.00899997353553772,
337
+ "tpp_threshold_100_intended_diff_only": 0.019999980926513672,
338
+ "tpp_threshold_100_unintended_diff_only": 0.011000007390975952,
339
+ "tpp_threshold_500_total_metric": 0.07375001907348633,
340
+ "tpp_threshold_500_intended_diff_only": 0.0820000171661377,
341
+ "tpp_threshold_500_unintended_diff_only": 0.008249998092651367
342
+ },
343
+ "3": {
344
+ "tpp_threshold_2_total_metric": -0.009000003337860107,
345
+ "tpp_threshold_2_intended_diff_only": -0.004999995231628418,
346
+ "tpp_threshold_2_unintended_diff_only": 0.0040000081062316895,
347
+ "tpp_threshold_5_total_metric": -0.0052499920129776,
348
+ "tpp_threshold_5_intended_diff_only": -0.0059999823570251465,
349
+ "tpp_threshold_5_unintended_diff_only": -0.0007499903440475464,
350
+ "tpp_threshold_10_total_metric": 0.007499992847442627,
351
+ "tpp_threshold_10_intended_diff_only": 0.009000003337860107,
352
+ "tpp_threshold_10_unintended_diff_only": 0.0015000104904174805,
353
+ "tpp_threshold_20_total_metric": -0.004000037908554077,
354
+ "tpp_threshold_20_intended_diff_only": 0.001999974250793457,
355
+ "tpp_threshold_20_unintended_diff_only": 0.006000012159347534,
356
+ "tpp_threshold_50_total_metric": 0.010999992489814758,
357
+ "tpp_threshold_50_intended_diff_only": 0.013999998569488525,
358
+ "tpp_threshold_50_unintended_diff_only": 0.003000006079673767,
359
+ "tpp_threshold_100_total_metric": 0.030500009655952454,
360
+ "tpp_threshold_100_intended_diff_only": 0.03600001335144043,
361
+ "tpp_threshold_100_unintended_diff_only": 0.005500003695487976,
362
+ "tpp_threshold_500_total_metric": 0.07175000011920929,
363
+ "tpp_threshold_500_intended_diff_only": 0.08300000429153442,
364
+ "tpp_threshold_500_unintended_diff_only": 0.011250004172325134
365
+ },
366
+ "5": {
367
+ "tpp_threshold_2_total_metric": -0.009249985218048096,
368
+ "tpp_threshold_2_intended_diff_only": -0.0059999823570251465,
369
+ "tpp_threshold_2_unintended_diff_only": 0.0032500028610229492,
370
+ "tpp_threshold_5_total_metric": -0.006749987602233887,
371
+ "tpp_threshold_5_intended_diff_only": -0.0009999871253967285,
372
+ "tpp_threshold_5_unintended_diff_only": 0.005750000476837158,
373
+ "tpp_threshold_10_total_metric": -0.012249991297721863,
374
+ "tpp_threshold_10_intended_diff_only": -0.006999969482421875,
375
+ "tpp_threshold_10_unintended_diff_only": 0.005250021815299988,
376
+ "tpp_threshold_20_total_metric": -0.005250006914138794,
377
+ "tpp_threshold_20_intended_diff_only": 0.0009999871253967285,
378
+ "tpp_threshold_20_unintended_diff_only": 0.0062499940395355225,
379
+ "tpp_threshold_50_total_metric": 0.005750015377998352,
380
+ "tpp_threshold_50_intended_diff_only": 0.013000011444091797,
381
+ "tpp_threshold_50_unintended_diff_only": 0.007249996066093445,
382
+ "tpp_threshold_100_total_metric": 0.009499996900558472,
383
+ "tpp_threshold_100_intended_diff_only": 0.023000001907348633,
384
+ "tpp_threshold_100_unintended_diff_only": 0.013500005006790161,
385
+ "tpp_threshold_500_total_metric": 0.06675000488758087,
386
+ "tpp_threshold_500_intended_diff_only": 0.078000009059906,
387
+ "tpp_threshold_500_unintended_diff_only": 0.011250004172325134
388
+ },
389
+ "6": {
390
+ "tpp_threshold_2_total_metric": 0.004250019788742065,
391
+ "tpp_threshold_2_intended_diff_only": 0.008000016212463379,
392
+ "tpp_threshold_2_unintended_diff_only": 0.0037499964237213135,
393
+ "tpp_threshold_5_total_metric": 0.0062499940395355225,
394
+ "tpp_threshold_5_intended_diff_only": 0.004999995231628418,
395
+ "tpp_threshold_5_unintended_diff_only": -0.0012499988079071045,
396
+ "tpp_threshold_10_total_metric": 0.019999995827674866,
397
+ "tpp_threshold_10_intended_diff_only": 0.02399998903274536,
398
+ "tpp_threshold_10_unintended_diff_only": 0.003999993205070496,
399
+ "tpp_threshold_20_total_metric": 0.03749997913837433,
400
+ "tpp_threshold_20_intended_diff_only": 0.042999982833862305,
401
+ "tpp_threshold_20_unintended_diff_only": 0.005500003695487976,
402
+ "tpp_threshold_50_total_metric": 0.04925002157688141,
403
+ "tpp_threshold_50_intended_diff_only": 0.058000028133392334,
404
+ "tpp_threshold_50_unintended_diff_only": 0.008750006556510925,
405
+ "tpp_threshold_100_total_metric": 0.057249993085861206,
406
+ "tpp_threshold_100_intended_diff_only": 0.06499999761581421,
407
+ "tpp_threshold_100_unintended_diff_only": 0.007750004529953003,
408
+ "tpp_threshold_500_total_metric": 0.179500013589859,
409
+ "tpp_threshold_500_intended_diff_only": 0.18800002336502075,
410
+ "tpp_threshold_500_unintended_diff_only": 0.008500009775161743
411
+ }
412
+ }
413
+ }
414
+ }
old_relu_eval_results/unlearning/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0_custom_sae_eval_results.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "unlearning",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "wmdp-bio",
7
+ "high_school_us_history",
8
+ "college_computer_science",
9
+ "high_school_geography",
10
+ "human_aging"
11
+ ],
12
+ "intervention_method": "clamp_feature_activation",
13
+ "retain_thresholds": [
14
+ 0.001,
15
+ 0.01
16
+ ],
17
+ "n_features_list": [
18
+ 10,
19
+ 20
20
+ ],
21
+ "multipliers": [
22
+ 25,
23
+ 50,
24
+ 100,
25
+ 200
26
+ ],
27
+ "dataset_size": 1024,
28
+ "seq_len": 1024,
29
+ "n_batch_loss_added": 50,
30
+ "target_metric": "correct",
31
+ "save_metrics": true,
32
+ "model_name": "gemma-2-2b-it",
33
+ "llm_batch_size": 4,
34
+ "llm_dtype": "bfloat16"
35
+ },
36
+ "eval_id": "27152fa7-494d-44cf-ac35-e2e33052a1b4",
37
+ "datetime_epoch_millis": 1738815248274,
38
+ "eval_result_metrics": {
39
+ "unlearning": {
40
+ "unlearning_score": 0.007504701614379883
41
+ }
42
+ },
43
+ "eval_result_details": [],
44
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
45
+ "sae_lens_id": "custom_sae",
46
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_0",
47
+ "sae_lens_version": "5.4.1",
48
+ "sae_cfg_dict": {
49
+ "model_name": "gemma-2-2b",
50
+ "d_in": 2304,
51
+ "d_sae": 16384,
52
+ "hook_layer": 12,
53
+ "hook_name": "blocks.12.hook_resid_post",
54
+ "context_size": null,
55
+ "hook_head_index": null,
56
+ "architecture": "standard",
57
+ "apply_b_dec_to_input": null,
58
+ "finetuning_scaling_factor": null,
59
+ "activation_fn_str": "",
60
+ "prepend_bos": true,
61
+ "normalize_activations": "none",
62
+ "dtype": "bfloat16",
63
+ "device": "",
64
+ "dataset_path": "",
65
+ "dataset_trust_remote_code": true,
66
+ "seqpos_slice": [
67
+ null
68
+ ],
69
+ "training_tokens": -100000,
70
+ "sae_lens_training_version": null,
71
+ "neuronpedia_id": null
72
+ },
73
+ "eval_result_unstructured": null
74
+ }
old_relu_eval_results/unlearning/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1_custom_sae_eval_results.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "unlearning",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "wmdp-bio",
7
+ "high_school_us_history",
8
+ "college_computer_science",
9
+ "high_school_geography",
10
+ "human_aging"
11
+ ],
12
+ "intervention_method": "clamp_feature_activation",
13
+ "retain_thresholds": [
14
+ 0.001,
15
+ 0.01
16
+ ],
17
+ "n_features_list": [
18
+ 10,
19
+ 20
20
+ ],
21
+ "multipliers": [
22
+ 25,
23
+ 50,
24
+ 100,
25
+ 200
26
+ ],
27
+ "dataset_size": 1024,
28
+ "seq_len": 1024,
29
+ "n_batch_loss_added": 50,
30
+ "target_metric": "correct",
31
+ "save_metrics": true,
32
+ "model_name": "gemma-2-2b-it",
33
+ "llm_batch_size": 4,
34
+ "llm_dtype": "bfloat16"
35
+ },
36
+ "eval_id": "b6ab776e-c77a-46bb-9b89-5bd9905335f9",
37
+ "datetime_epoch_millis": 1738815974660,
38
+ "eval_result_metrics": {
39
+ "unlearning": {
40
+ "unlearning_score": 0.015009403228759766
41
+ }
42
+ },
43
+ "eval_result_details": [],
44
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
45
+ "sae_lens_id": "custom_sae",
46
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_1",
47
+ "sae_lens_version": "5.4.1",
48
+ "sae_cfg_dict": {
49
+ "model_name": "gemma-2-2b",
50
+ "d_in": 2304,
51
+ "d_sae": 16384,
52
+ "hook_layer": 12,
53
+ "hook_name": "blocks.12.hook_resid_post",
54
+ "context_size": null,
55
+ "hook_head_index": null,
56
+ "architecture": "standard",
57
+ "apply_b_dec_to_input": null,
58
+ "finetuning_scaling_factor": null,
59
+ "activation_fn_str": "",
60
+ "prepend_bos": true,
61
+ "normalize_activations": "none",
62
+ "dtype": "bfloat16",
63
+ "device": "",
64
+ "dataset_path": "",
65
+ "dataset_trust_remote_code": true,
66
+ "seqpos_slice": [
67
+ null
68
+ ],
69
+ "training_tokens": -100000,
70
+ "sae_lens_training_version": null,
71
+ "neuronpedia_id": null
72
+ },
73
+ "eval_result_unstructured": null
74
+ }
old_relu_eval_results/unlearning/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2_custom_sae_eval_results.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "unlearning",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "wmdp-bio",
7
+ "high_school_us_history",
8
+ "college_computer_science",
9
+ "high_school_geography",
10
+ "human_aging"
11
+ ],
12
+ "intervention_method": "clamp_feature_activation",
13
+ "retain_thresholds": [
14
+ 0.001,
15
+ 0.01
16
+ ],
17
+ "n_features_list": [
18
+ 10,
19
+ 20
20
+ ],
21
+ "multipliers": [
22
+ 25,
23
+ 50,
24
+ 100,
25
+ 200
26
+ ],
27
+ "dataset_size": 1024,
28
+ "seq_len": 1024,
29
+ "n_batch_loss_added": 50,
30
+ "target_metric": "correct",
31
+ "save_metrics": true,
32
+ "model_name": "gemma-2-2b-it",
33
+ "llm_batch_size": 4,
34
+ "llm_dtype": "bfloat16"
35
+ },
36
+ "eval_id": "4e677628-eb8d-40f5-ade8-e25072775bf1",
37
+ "datetime_epoch_millis": 1738818108714,
38
+ "eval_result_metrics": {
39
+ "unlearning": {
40
+ "unlearning_score": 0.06378984451293945
41
+ }
42
+ },
43
+ "eval_result_details": [],
44
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
45
+ "sae_lens_id": "custom_sae",
46
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_2",
47
+ "sae_lens_version": "5.4.1",
48
+ "sae_cfg_dict": {
49
+ "model_name": "gemma-2-2b",
50
+ "d_in": 2304,
51
+ "d_sae": 16384,
52
+ "hook_layer": 12,
53
+ "hook_name": "blocks.12.hook_resid_post",
54
+ "context_size": null,
55
+ "hook_head_index": null,
56
+ "architecture": "standard",
57
+ "apply_b_dec_to_input": null,
58
+ "finetuning_scaling_factor": null,
59
+ "activation_fn_str": "",
60
+ "prepend_bos": true,
61
+ "normalize_activations": "none",
62
+ "dtype": "bfloat16",
63
+ "device": "",
64
+ "dataset_path": "",
65
+ "dataset_trust_remote_code": true,
66
+ "seqpos_slice": [
67
+ null
68
+ ],
69
+ "training_tokens": -100000,
70
+ "sae_lens_training_version": null,
71
+ "neuronpedia_id": null
72
+ },
73
+ "eval_result_unstructured": null
74
+ }
old_relu_eval_results/unlearning/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3_custom_sae_eval_results.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "unlearning",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "wmdp-bio",
7
+ "high_school_us_history",
8
+ "college_computer_science",
9
+ "high_school_geography",
10
+ "human_aging"
11
+ ],
12
+ "intervention_method": "clamp_feature_activation",
13
+ "retain_thresholds": [
14
+ 0.001,
15
+ 0.01
16
+ ],
17
+ "n_features_list": [
18
+ 10,
19
+ 20
20
+ ],
21
+ "multipliers": [
22
+ 25,
23
+ 50,
24
+ 100,
25
+ 200
26
+ ],
27
+ "dataset_size": 1024,
28
+ "seq_len": 1024,
29
+ "n_batch_loss_added": 50,
30
+ "target_metric": "correct",
31
+ "save_metrics": true,
32
+ "model_name": "gemma-2-2b-it",
33
+ "llm_batch_size": 4,
34
+ "llm_dtype": "bfloat16"
35
+ },
36
+ "eval_id": "9001c398-92b1-4daf-8d14-cd80c5147849",
37
+ "datetime_epoch_millis": 1738817408434,
38
+ "eval_result_metrics": {
39
+ "unlearning": {
40
+ "unlearning_score": 0.0863039493560791
41
+ }
42
+ },
43
+ "eval_result_details": [],
44
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
45
+ "sae_lens_id": "custom_sae",
46
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_3",
47
+ "sae_lens_version": "5.4.1",
48
+ "sae_cfg_dict": {
49
+ "model_name": "gemma-2-2b",
50
+ "d_in": 2304,
51
+ "d_sae": 16384,
52
+ "hook_layer": 12,
53
+ "hook_name": "blocks.12.hook_resid_post",
54
+ "context_size": null,
55
+ "hook_head_index": null,
56
+ "architecture": "standard",
57
+ "apply_b_dec_to_input": null,
58
+ "finetuning_scaling_factor": null,
59
+ "activation_fn_str": "",
60
+ "prepend_bos": true,
61
+ "normalize_activations": "none",
62
+ "dtype": "bfloat16",
63
+ "device": "",
64
+ "dataset_path": "",
65
+ "dataset_trust_remote_code": true,
66
+ "seqpos_slice": [
67
+ null
68
+ ],
69
+ "training_tokens": -100000,
70
+ "sae_lens_training_version": null,
71
+ "neuronpedia_id": null
72
+ },
73
+ "eval_result_unstructured": null
74
+ }
old_relu_eval_results/unlearning/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4_custom_sae_eval_results.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "unlearning",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "wmdp-bio",
7
+ "high_school_us_history",
8
+ "college_computer_science",
9
+ "high_school_geography",
10
+ "human_aging"
11
+ ],
12
+ "intervention_method": "clamp_feature_activation",
13
+ "retain_thresholds": [
14
+ 0.001,
15
+ 0.01
16
+ ],
17
+ "n_features_list": [
18
+ 10,
19
+ 20
20
+ ],
21
+ "multipliers": [
22
+ 25,
23
+ 50,
24
+ 100,
25
+ 200
26
+ ],
27
+ "dataset_size": 1024,
28
+ "seq_len": 1024,
29
+ "n_batch_loss_added": 50,
30
+ "target_metric": "correct",
31
+ "save_metrics": true,
32
+ "model_name": "gemma-2-2b-it",
33
+ "llm_batch_size": 4,
34
+ "llm_dtype": "bfloat16"
35
+ },
36
+ "eval_id": "68919444-b3fe-445b-b7d6-7eadd952f2a7",
37
+ "datetime_epoch_millis": 1738818814082,
38
+ "eval_result_metrics": {
39
+ "unlearning": {
40
+ "unlearning_score": 0.09943711757659912
41
+ }
42
+ },
43
+ "eval_result_details": [],
44
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
45
+ "sae_lens_id": "custom_sae",
46
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_4",
47
+ "sae_lens_version": "5.4.1",
48
+ "sae_cfg_dict": {
49
+ "model_name": "gemma-2-2b",
50
+ "d_in": 2304,
51
+ "d_sae": 16384,
52
+ "hook_layer": 12,
53
+ "hook_name": "blocks.12.hook_resid_post",
54
+ "context_size": null,
55
+ "hook_head_index": null,
56
+ "architecture": "standard",
57
+ "apply_b_dec_to_input": null,
58
+ "finetuning_scaling_factor": null,
59
+ "activation_fn_str": "",
60
+ "prepend_bos": true,
61
+ "normalize_activations": "none",
62
+ "dtype": "bfloat16",
63
+ "device": "",
64
+ "dataset_path": "",
65
+ "dataset_trust_remote_code": true,
66
+ "seqpos_slice": [
67
+ null
68
+ ],
69
+ "training_tokens": -100000,
70
+ "sae_lens_training_version": null,
71
+ "neuronpedia_id": null
72
+ },
73
+ "eval_result_unstructured": null
74
+ }
old_relu_eval_results/unlearning/temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5_custom_sae_eval_results.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "unlearning",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "wmdp-bio",
7
+ "high_school_us_history",
8
+ "college_computer_science",
9
+ "high_school_geography",
10
+ "human_aging"
11
+ ],
12
+ "intervention_method": "clamp_feature_activation",
13
+ "retain_thresholds": [
14
+ 0.001,
15
+ 0.01
16
+ ],
17
+ "n_features_list": [
18
+ 10,
19
+ 20
20
+ ],
21
+ "multipliers": [
22
+ 25,
23
+ 50,
24
+ 100,
25
+ 200
26
+ ],
27
+ "dataset_size": 1024,
28
+ "seq_len": 1024,
29
+ "n_batch_loss_added": 50,
30
+ "target_metric": "correct",
31
+ "save_metrics": true,
32
+ "model_name": "gemma-2-2b-it",
33
+ "llm_batch_size": 4,
34
+ "llm_dtype": "bfloat16"
35
+ },
36
+ "eval_id": "39440e8a-6927-4b07-b9d8-8bf75861a3b2",
37
+ "datetime_epoch_millis": 1738816689242,
38
+ "eval_result_metrics": {
39
+ "unlearning": {
40
+ "unlearning_score": 0.23452156782150269
41
+ }
42
+ },
43
+ "eval_result_details": [],
44
+ "sae_bench_commit_hash": "155afbca50a9ffe6cf72c81796997e6daa475658",
45
+ "sae_lens_id": "custom_sae",
46
+ "sae_lens_release_id": "temp_old_relu_google_gemma-2-2b_standard_resid_post_layer_12_trainer_5",
47
+ "sae_lens_version": "5.4.1",
48
+ "sae_cfg_dict": {
49
+ "model_name": "gemma-2-2b",
50
+ "d_in": 2304,
51
+ "d_sae": 16384,
52
+ "hook_layer": 12,
53
+ "hook_name": "blocks.12.hook_resid_post",
54
+ "context_size": null,
55
+ "hook_head_index": null,
56
+ "architecture": "standard",
57
+ "apply_b_dec_to_input": null,
58
+ "finetuning_scaling_factor": null,
59
+ "activation_fn_str": "",
60
+ "prepend_bos": true,
61
+ "normalize_activations": "none",
62
+ "dtype": "bfloat16",
63
+ "device": "",
64
+ "dataset_path": "",
65
+ "dataset_trust_remote_code": true,
66
+ "seqpos_slice": [
67
+ null
68
+ ],
69
+ "training_tokens": -100000,
70
+ "sae_lens_training_version": null,
71
+ "neuronpedia_id": null
72
+ },
73
+ "eval_result_unstructured": null
74
+ }