jbloom commited on
Commit
123b02d
·
verified ·
1 Parent(s): 2052130

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. benchmark_stats.csv +13 -0
  3. benchmark_stats.html +646 -0
  4. benchmark_stats.png +3 -0
  5. v5_32k_layer_0/cfg.json +1 -0
  6. v5_32k_layer_0/metrics.json +1 -0
  7. v5_32k_layer_0/sae_weights.safetensors +3 -0
  8. v5_32k_layer_0/sparsity.safetensors +3 -0
  9. v5_32k_layer_1/cfg.json +1 -0
  10. v5_32k_layer_1/metrics.json +1 -0
  11. v5_32k_layer_1/sae_weights.safetensors +3 -0
  12. v5_32k_layer_1/sparsity.safetensors +3 -0
  13. v5_32k_layer_10/cfg.json +1 -0
  14. v5_32k_layer_10/metrics.json +1 -0
  15. v5_32k_layer_10/sae_weights.safetensors +3 -0
  16. v5_32k_layer_10/sparsity.safetensors +3 -0
  17. v5_32k_layer_11/cfg.json +1 -0
  18. v5_32k_layer_11/metrics.json +1 -0
  19. v5_32k_layer_11/sae_weights.safetensors +3 -0
  20. v5_32k_layer_11/sparsity.safetensors +3 -0
  21. v5_32k_layer_2/cfg.json +1 -0
  22. v5_32k_layer_2/metrics.json +1 -0
  23. v5_32k_layer_2/sae_weights.safetensors +3 -0
  24. v5_32k_layer_2/sparsity.safetensors +3 -0
  25. v5_32k_layer_3/cfg.json +1 -0
  26. v5_32k_layer_3/metrics.json +1 -0
  27. v5_32k_layer_3/sae_weights.safetensors +3 -0
  28. v5_32k_layer_3/sparsity.safetensors +3 -0
  29. v5_32k_layer_4/cfg.json +1 -0
  30. v5_32k_layer_4/metrics.json +1 -0
  31. v5_32k_layer_4/sae_weights.safetensors +3 -0
  32. v5_32k_layer_4/sparsity.safetensors +3 -0
  33. v5_32k_layer_5/cfg.json +1 -0
  34. v5_32k_layer_5/metrics.json +1 -0
  35. v5_32k_layer_5/sae_weights.safetensors +3 -0
  36. v5_32k_layer_5/sparsity.safetensors +3 -0
  37. v5_32k_layer_6/cfg.json +1 -0
  38. v5_32k_layer_6/metrics.json +1 -0
  39. v5_32k_layer_6/sae_weights.safetensors +3 -0
  40. v5_32k_layer_6/sparsity.safetensors +3 -0
  41. v5_32k_layer_7/cfg.json +1 -0
  42. v5_32k_layer_7/metrics.json +1 -0
  43. v5_32k_layer_7/sae_weights.safetensors +3 -0
  44. v5_32k_layer_7/sparsity.safetensors +3 -0
  45. v5_32k_layer_8/cfg.json +1 -0
  46. v5_32k_layer_8/metrics.json +1 -0
  47. v5_32k_layer_8/sae_weights.safetensors +3 -0
  48. v5_32k_layer_8/sparsity.safetensors +3 -0
  49. v5_32k_layer_9/cfg.json +1 -0
  50. v5_32k_layer_9/metrics.json +1 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ benchmark_stats.png filter=lfs diff=lfs merge=lfs -text
benchmark_stats.csv ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,version,d_sae,layer,kl_div_with_sae,kl_div_with_ablation,ce_loss_with_sae,ce_loss_without_sae,ce_loss_with_ablation,kl_div_score,ce_loss_score,l2_norm_in,l2_norm_out,l2_ratio,l0,l1,explained_variance,mse,total_tokens_evaluated,filepath
2
+ OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_0/metrics.json,5,32,0,0.0048454091884195805,3.094083309173584,3.6054646968841553,3.599064588546753,6.694648742675781,0.9984339758486613,0.9979325038445924,29.933448791503906,29.601543426513672,0.9893707036972046,32.0,71.21115112304688,0.9667970538139343,21.729291915893555,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_0/metrics.json
3
+ OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_1/metrics.json,5,32,1,0.006601419299840927,0.05105271190404892,3.605595588684082,3.599064588546753,3.6525371074676514,0.8706940522131719,0.8778624933119316,18.973735809326172,17.91716766357422,0.910649299621582,32.0,86.5653305053711,0.8854424357414246,25.637441635131836,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_1/metrics.json
4
+ OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_2/metrics.json,5,32,2,0.00936876516789198,0.05874736234545708,3.601879119873047,3.599064588546753,3.6459126472473145,0.8405244968650671,0.9399221354232932,49.106536865234375,47.64448165893555,0.8887979388237,31.875,85.81163024902344,0.9745470285415649,37.83729553222656,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_2/metrics.json
5
+ OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_3/metrics.json,5,32,3,0.010681239888072014,0.07059153914451599,3.6096014976501465,3.599064588546753,3.658677577972412,0.8486895169376428,0.823244745735597,16.98731803894043,15.157210350036621,0.8746687173843384,31.91145896911621,85.93821716308594,0.7805342078208923,50.548057556152344,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_3/metrics.json
6
+ OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_4/metrics.json,5,32,4,0.012658017687499523,0.06332532316446304,3.611159086227417,3.599064588546753,3.6600804328918457,0.8001112816333331,0.8017810322797447,17.251985549926758,15.012179374694824,0.8525444865226746,31.95556640625,82.4767074584961,0.7294961810112,63.70451354980469,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_4/metrics.json
7
+ OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_5/metrics.json,5,32,5,0.014466611668467522,0.06850520521402359,3.613976240158081,3.599064588546753,3.6693859100341797,0.7888246356861349,0.7879497811486054,18.888967514038086,16.209918975830078,0.8484395742416382,32.0,81.43401336669922,0.7174215316772461,87.28172302246094,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_5/metrics.json
8
+ OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_6/metrics.json,5,32,6,0.016599537804722786,0.07569437474012375,3.6187987327575684,3.599064588546753,3.67651629447937,0.7807031518298047,0.7452071071490137,21.466564178466797,18.40247344970703,0.852634608745575,32.0,78.82976531982422,0.7063077688217163,117.07249450683594,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_6/metrics.json
9
+ OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_7/metrics.json,5,32,7,0.017009764909744263,0.08048636466264725,3.614975929260254,3.599064588546753,3.6727118492126465,0.7886627756013151,0.7839520361024154,25.444438934326172,22.004989624023438,0.8624889254570007,32.0,76.41993713378906,0.7180025577545166,157.79141235351562,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_7/metrics.json
10
+ OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_8/metrics.json,5,32,8,0.01810324750840664,0.08732372522354126,3.6162450313568115,3.599064588546753,3.6803367137908936,0.792688098657451,0.7886059610420089,30.250225067138672,26.306936264038086,0.8676368594169617,32.0,76.72819519042969,0.7239155769348145,219.98291015625,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_8/metrics.json
11
+ OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_9/metrics.json,5,32,9,0.01999707892537117,0.09758877754211426,3.6174559593200684,3.599064588546753,3.6962451934814453,0.7950883346526042,0.8107506041388108,40.192413330078125,35.94580841064453,0.8897998332977295,32.0,72.42656707763672,0.7423521876335144,318.1434326171875,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_9/metrics.json
12
+ OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_10/metrics.json,5,32,10,0.02311515063047409,0.12674781680107117,3.6171722412109375,3.599064588546753,3.708984136581421,0.8176288064452188,0.8352644912761693,81.75682830810547,78.3930892944336,0.9553598165512085,32.0,50.45811462402344,0.7926573753356934,514.5535888671875,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_10/metrics.json
13
+ OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_11/metrics.json,5,32,11,0.028952505439519882,0.17384076118469238,3.62371826171875,3.599064588546753,3.783318281173706,0.8334538732906256,0.8661971284238421,92.90629577636719,87.66377258300781,0.9233807325363159,32.0,73.98703002929688,0.840599000453949,742.95751953125,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_11/metrics.json
benchmark_stats.html ADDED
@@ -0,0 +1,646 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <style type="text/css">
2
+ #T_03f5d_row0_col2, #T_03f5d_row0_col3, #T_03f5d_row0_col6, #T_03f5d_row0_col16, #T_03f5d_row0_col17, #T_03f5d_row1_col4, #T_03f5d_row1_col6, #T_03f5d_row1_col7, #T_03f5d_row1_col17, #T_03f5d_row2_col4, #T_03f5d_row2_col5, #T_03f5d_row2_col6, #T_03f5d_row2_col7, #T_03f5d_row2_col13, #T_03f5d_row2_col17, #T_03f5d_row3_col6, #T_03f5d_row3_col10, #T_03f5d_row3_col11, #T_03f5d_row3_col17, #T_03f5d_row4_col6, #T_03f5d_row4_col10, #T_03f5d_row4_col11, #T_03f5d_row4_col17, #T_03f5d_row5_col6, #T_03f5d_row5_col12, #T_03f5d_row5_col17, #T_03f5d_row6_col6, #T_03f5d_row6_col8, #T_03f5d_row6_col9, #T_03f5d_row6_col15, #T_03f5d_row6_col17, #T_03f5d_row7_col6, #T_03f5d_row7_col17, #T_03f5d_row8_col6, #T_03f5d_row8_col17, #T_03f5d_row9_col6, #T_03f5d_row9_col17, #T_03f5d_row10_col6, #T_03f5d_row10_col14, #T_03f5d_row10_col17, #T_03f5d_row11_col6, #T_03f5d_row11_col17 {
3
+ background-color: #440154;
4
+ color: #f1f1f1;
5
+ }
6
+ #T_03f5d_row0_col4, #T_03f5d_row0_col7, #T_03f5d_row0_col8, #T_03f5d_row0_col9, #T_03f5d_row0_col12, #T_03f5d_row0_col13, #T_03f5d_row1_col13, #T_03f5d_row1_col14, #T_03f5d_row2_col15, #T_03f5d_row5_col13, #T_03f5d_row6_col13, #T_03f5d_row7_col13, #T_03f5d_row8_col13, #T_03f5d_row9_col13, #T_03f5d_row10_col13, #T_03f5d_row11_col2, #T_03f5d_row11_col3, #T_03f5d_row11_col5, #T_03f5d_row11_col10, #T_03f5d_row11_col11, #T_03f5d_row11_col13, #T_03f5d_row11_col16 {
7
+ background-color: #fde725;
8
+ color: #000000;
9
+ }
10
+ #T_03f5d_row0_col5 {
11
+ background-color: #443983;
12
+ color: #f1f1f1;
13
+ }
14
+ #T_03f5d_row0_col10, #T_03f5d_row1_col5, #T_03f5d_row5_col9, #T_03f5d_row8_col9, #T_03f5d_row10_col8 {
15
+ background-color: #443a83;
16
+ color: #f1f1f1;
17
+ }
18
+ #T_03f5d_row0_col11 {
19
+ background-color: #414487;
20
+ color: #f1f1f1;
21
+ }
22
+ #T_03f5d_row0_col14 {
23
+ background-color: #1fa287;
24
+ color: #f1f1f1;
25
+ }
26
+ #T_03f5d_row0_col15 {
27
+ background-color: #ece51b;
28
+ color: #000000;
29
+ }
30
+ #T_03f5d_row1_col2, #T_03f5d_row5_col16 {
31
+ background-color: #482173;
32
+ color: #f1f1f1;
33
+ }
34
+ #T_03f5d_row1_col3 {
35
+ background-color: #481b6d;
36
+ color: #f1f1f1;
37
+ }
38
+ #T_03f5d_row1_col8, #T_03f5d_row9_col16 {
39
+ background-color: #297b8e;
40
+ color: #f1f1f1;
41
+ }
42
+ #T_03f5d_row1_col9 {
43
+ background-color: #1f968b;
44
+ color: #f1f1f1;
45
+ }
46
+ #T_03f5d_row1_col10, #T_03f5d_row5_col10, #T_03f5d_row10_col4 {
47
+ background-color: #460a5d;
48
+ color: #f1f1f1;
49
+ }
50
+ #T_03f5d_row1_col11, #T_03f5d_row3_col16, #T_03f5d_row5_col15, #T_03f5d_row11_col4 {
51
+ background-color: #471063;
52
+ color: #f1f1f1;
53
+ }
54
+ #T_03f5d_row1_col12 {
55
+ background-color: #26828e;
56
+ color: #f1f1f1;
57
+ }
58
+ #T_03f5d_row1_col15 {
59
+ background-color: #35b779;
60
+ color: #f1f1f1;
61
+ }
62
+ #T_03f5d_row1_col16, #T_03f5d_row3_col4, #T_03f5d_row3_col7, #T_03f5d_row4_col4, #T_03f5d_row4_col7, #T_03f5d_row5_col4, #T_03f5d_row5_col7 {
63
+ background-color: #440256;
64
+ color: #f1f1f1;
65
+ }
66
+ #T_03f5d_row2_col2 {
67
+ background-color: #433e85;
68
+ color: #f1f1f1;
69
+ }
70
+ #T_03f5d_row2_col3, #T_03f5d_row7_col16 {
71
+ background-color: #424086;
72
+ color: #f1f1f1;
73
+ }
74
+ #T_03f5d_row2_col8, #T_03f5d_row3_col15, #T_03f5d_row8_col16 {
75
+ background-color: #38598c;
76
+ color: #f1f1f1;
77
+ }
78
+ #T_03f5d_row2_col9 {
79
+ background-color: #69cd5b;
80
+ color: #000000;
81
+ }
82
+ #T_03f5d_row2_col10, #T_03f5d_row4_col5 {
83
+ background-color: #277e8e;
84
+ color: #f1f1f1;
85
+ }
86
+ #T_03f5d_row2_col11 {
87
+ background-color: #25838e;
88
+ color: #f1f1f1;
89
+ }
90
+ #T_03f5d_row2_col12, #T_03f5d_row9_col11 {
91
+ background-color: #365c8d;
92
+ color: #f1f1f1;
93
+ }
94
+ #T_03f5d_row2_col14 {
95
+ background-color: #f1e51d;
96
+ color: #000000;
97
+ }
98
+ #T_03f5d_row2_col16, #T_03f5d_row10_col7 {
99
+ background-color: #46085c;
100
+ color: #f1f1f1;
101
+ }
102
+ #T_03f5d_row3_col2 {
103
+ background-color: #38588c;
104
+ color: #f1f1f1;
105
+ }
106
+ #T_03f5d_row3_col3 {
107
+ background-color: #3c4f8a;
108
+ color: #f1f1f1;
109
+ }
110
+ #T_03f5d_row3_col5 {
111
+ background-color: #2e6d8e;
112
+ color: #f1f1f1;
113
+ }
114
+ #T_03f5d_row3_col8, #T_03f5d_row3_col9 {
115
+ background-color: #33628d;
116
+ color: #f1f1f1;
117
+ }
118
+ #T_03f5d_row3_col12 {
119
+ background-color: #423f85;
120
+ color: #f1f1f1;
121
+ }
122
+ #T_03f5d_row3_col13 {
123
+ background-color: #365d8d;
124
+ color: #f1f1f1;
125
+ }
126
+ #T_03f5d_row3_col14 {
127
+ background-color: #f4e61e;
128
+ color: #000000;
129
+ }
130
+ #T_03f5d_row4_col2 {
131
+ background-color: #2d708e;
132
+ color: #f1f1f1;
133
+ }
134
+ #T_03f5d_row4_col3, #T_03f5d_row10_col15 {
135
+ background-color: #32658e;
136
+ color: #f1f1f1;
137
+ }
138
+ #T_03f5d_row4_col8, #T_03f5d_row4_col15 {
139
+ background-color: #482071;
140
+ color: #f1f1f1;
141
+ }
142
+ #T_03f5d_row4_col9 {
143
+ background-color: #3e4a89;
144
+ color: #f1f1f1;
145
+ }
146
+ #T_03f5d_row4_col12, #T_03f5d_row6_col12 {
147
+ background-color: #460b5e;
148
+ color: #f1f1f1;
149
+ }
150
+ #T_03f5d_row4_col13 {
151
+ background-color: #2eb37c;
152
+ color: #f1f1f1;
153
+ }
154
+ #T_03f5d_row4_col14 {
155
+ background-color: #b5de2b;
156
+ color: #000000;
157
+ }
158
+ #T_03f5d_row4_col16, #T_03f5d_row8_col8 {
159
+ background-color: #481668;
160
+ color: #f1f1f1;
161
+ }
162
+ #T_03f5d_row5_col2 {
163
+ background-color: #25858e;
164
+ color: #f1f1f1;
165
+ }
166
+ #T_03f5d_row5_col3 {
167
+ background-color: #2a788e;
168
+ color: #f1f1f1;
169
+ }
170
+ #T_03f5d_row5_col5 {
171
+ background-color: #1e9d89;
172
+ color: #f1f1f1;
173
+ }
174
+ #T_03f5d_row5_col8, #T_03f5d_row7_col8 {
175
+ background-color: #470e61;
176
+ color: #f1f1f1;
177
+ }
178
+ #T_03f5d_row5_col11, #T_03f5d_row9_col7 {
179
+ background-color: #46075a;
180
+ color: #f1f1f1;
181
+ }
182
+ #T_03f5d_row5_col14 {
183
+ background-color: #a0da39;
184
+ color: #000000;
185
+ }
186
+ #T_03f5d_row6_col2 {
187
+ background-color: #1e9b8a;
188
+ color: #f1f1f1;
189
+ }
190
+ #T_03f5d_row6_col3 {
191
+ background-color: #228d8d;
192
+ color: #f1f1f1;
193
+ }
194
+ #T_03f5d_row6_col4, #T_03f5d_row6_col7, #T_03f5d_row7_col4, #T_03f5d_row7_col7, #T_03f5d_row8_col7 {
195
+ background-color: #450457;
196
+ color: #f1f1f1;
197
+ }
198
+ #T_03f5d_row6_col5 {
199
+ background-color: #6ccd5a;
200
+ color: #000000;
201
+ }
202
+ #T_03f5d_row6_col10 {
203
+ background-color: #481769;
204
+ color: #f1f1f1;
205
+ }
206
+ #T_03f5d_row6_col11, #T_03f5d_row7_col15, #T_03f5d_row11_col7 {
207
+ background-color: #471164;
208
+ color: #f1f1f1;
209
+ }
210
+ #T_03f5d_row6_col14 {
211
+ background-color: #73d056;
212
+ color: #000000;
213
+ }
214
+ #T_03f5d_row6_col16 {
215
+ background-color: #472e7c;
216
+ color: #f1f1f1;
217
+ }
218
+ #T_03f5d_row7_col2 {
219
+ background-color: #2ab07f;
220
+ color: #f1f1f1;
221
+ }
222
+ #T_03f5d_row7_col3 {
223
+ background-color: #20928c;
224
+ color: #f1f1f1;
225
+ }
226
+ #T_03f5d_row7_col5 {
227
+ background-color: #22a884;
228
+ color: #f1f1f1;
229
+ }
230
+ #T_03f5d_row7_col9, #T_03f5d_row8_col11 {
231
+ background-color: #453581;
232
+ color: #f1f1f1;
233
+ }
234
+ #T_03f5d_row7_col10 {
235
+ background-color: #482878;
236
+ color: #f1f1f1;
237
+ }
238
+ #T_03f5d_row7_col11 {
239
+ background-color: #482374;
240
+ color: #f1f1f1;
241
+ }
242
+ #T_03f5d_row7_col12 {
243
+ background-color: #482475;
244
+ color: #f1f1f1;
245
+ }
246
+ #T_03f5d_row7_col14 {
247
+ background-color: #4ec36b;
248
+ color: #000000;
249
+ }
250
+ #T_03f5d_row8_col2, #T_03f5d_row8_col14 {
251
+ background-color: #52c569;
252
+ color: #000000;
253
+ }
254
+ #T_03f5d_row8_col3 {
255
+ background-color: #1e9c89;
256
+ color: #f1f1f1;
257
+ }
258
+ #T_03f5d_row8_col4, #T_03f5d_row9_col4 {
259
+ background-color: #450559;
260
+ color: #f1f1f1;
261
+ }
262
+ #T_03f5d_row8_col5 {
263
+ background-color: #32b67a;
264
+ color: #f1f1f1;
265
+ }
266
+ #T_03f5d_row8_col10 {
267
+ background-color: #443b84;
268
+ color: #f1f1f1;
269
+ }
270
+ #T_03f5d_row8_col12, #T_03f5d_row9_col15 {
271
+ background-color: #472f7d;
272
+ color: #f1f1f1;
273
+ }
274
+ #T_03f5d_row8_col15, #T_03f5d_row9_col8 {
275
+ background-color: #48186a;
276
+ color: #f1f1f1;
277
+ }
278
+ #T_03f5d_row9_col2 {
279
+ background-color: #86d549;
280
+ color: #000000;
281
+ }
282
+ #T_03f5d_row9_col3 {
283
+ background-color: #28ae80;
284
+ color: #f1f1f1;
285
+ }
286
+ #T_03f5d_row9_col5 {
287
+ background-color: #4ac16d;
288
+ color: #000000;
289
+ }
290
+ #T_03f5d_row9_col9 {
291
+ background-color: #3a548c;
292
+ color: #f1f1f1;
293
+ }
294
+ #T_03f5d_row9_col10 {
295
+ background-color: #34618d;
296
+ color: #f1f1f1;
297
+ }
298
+ #T_03f5d_row9_col12 {
299
+ background-color: #355e8d;
300
+ color: #f1f1f1;
301
+ }
302
+ #T_03f5d_row9_col14 {
303
+ background-color: #24aa83;
304
+ color: #f1f1f1;
305
+ }
306
+ #T_03f5d_row10_col2 {
307
+ background-color: #c2df23;
308
+ color: #000000;
309
+ }
310
+ #T_03f5d_row10_col3, #T_03f5d_row10_col12 {
311
+ background-color: #63cb5f;
312
+ color: #000000;
313
+ }
314
+ #T_03f5d_row10_col5 {
315
+ background-color: #44bf70;
316
+ color: #f1f1f1;
317
+ }
318
+ #T_03f5d_row10_col9 {
319
+ background-color: #2e6e8e;
320
+ color: #f1f1f1;
321
+ }
322
+ #T_03f5d_row10_col10 {
323
+ background-color: #9dd93b;
324
+ color: #000000;
325
+ }
326
+ #T_03f5d_row10_col11 {
327
+ background-color: #aadc32;
328
+ color: #000000;
329
+ }
330
+ #T_03f5d_row10_col16 {
331
+ background-color: #3bbb75;
332
+ color: #f1f1f1;
333
+ }
334
+ #T_03f5d_row11_col8 {
335
+ background-color: #3c508b;
336
+ color: #f1f1f1;
337
+ }
338
+ #T_03f5d_row11_col9 {
339
+ background-color: #228b8d;
340
+ color: #f1f1f1;
341
+ }
342
+ #T_03f5d_row11_col12 {
343
+ background-color: #1f988b;
344
+ color: #f1f1f1;
345
+ }
346
+ #T_03f5d_row11_col14 {
347
+ background-color: #2fb47c;
348
+ color: #f1f1f1;
349
+ }
350
+ #T_03f5d_row11_col15 {
351
+ background-color: #21918c;
352
+ color: #f1f1f1;
353
+ }
354
+ </style>
355
+ <table id="T_03f5d">
356
+ <thead>
357
+ <tr>
358
+ <th class="blank level0" >&nbsp;</th>
359
+ <th id="T_03f5d_level0_col0" class="col_heading level0 col0" >version</th>
360
+ <th id="T_03f5d_level0_col1" class="col_heading level0 col1" >d_sae</th>
361
+ <th id="T_03f5d_level0_col2" class="col_heading level0 col2" >layer</th>
362
+ <th id="T_03f5d_level0_col3" class="col_heading level0 col3" >kl_div_with_sae</th>
363
+ <th id="T_03f5d_level0_col4" class="col_heading level0 col4" >kl_div_with_ablation</th>
364
+ <th id="T_03f5d_level0_col5" class="col_heading level0 col5" >ce_loss_with_sae</th>
365
+ <th id="T_03f5d_level0_col6" class="col_heading level0 col6" >ce_loss_without_sae</th>
366
+ <th id="T_03f5d_level0_col7" class="col_heading level0 col7" >ce_loss_with_ablation</th>
367
+ <th id="T_03f5d_level0_col8" class="col_heading level0 col8" >kl_div_score</th>
368
+ <th id="T_03f5d_level0_col9" class="col_heading level0 col9" >ce_loss_score</th>
369
+ <th id="T_03f5d_level0_col10" class="col_heading level0 col10" >l2_norm_in</th>
370
+ <th id="T_03f5d_level0_col11" class="col_heading level0 col11" >l2_norm_out</th>
371
+ <th id="T_03f5d_level0_col12" class="col_heading level0 col12" >l2_ratio</th>
372
+ <th id="T_03f5d_level0_col13" class="col_heading level0 col13" >l0</th>
373
+ <th id="T_03f5d_level0_col14" class="col_heading level0 col14" >l1</th>
374
+ <th id="T_03f5d_level0_col15" class="col_heading level0 col15" >explained_variance</th>
375
+ <th id="T_03f5d_level0_col16" class="col_heading level0 col16" >mse</th>
376
+ <th id="T_03f5d_level0_col17" class="col_heading level0 col17" >total_tokens_evaluated</th>
377
+ <th id="T_03f5d_level0_col18" class="col_heading level0 col18" >filepath</th>
378
+ </tr>
379
+ </thead>
380
+ <tbody>
381
+ <tr>
382
+ <th id="T_03f5d_level0_row0" class="row_heading level0 row0" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_0/metrics.json</th>
383
+ <td id="T_03f5d_row0_col0" class="data row0 col0" >5</td>
384
+ <td id="T_03f5d_row0_col1" class="data row0 col1" >32</td>
385
+ <td id="T_03f5d_row0_col2" class="data row0 col2" >0</td>
386
+ <td id="T_03f5d_row0_col3" class="data row0 col3" >0.004845</td>
387
+ <td id="T_03f5d_row0_col4" class="data row0 col4" >3.094083</td>
388
+ <td id="T_03f5d_row0_col5" class="data row0 col5" >3.605465</td>
389
+ <td id="T_03f5d_row0_col6" class="data row0 col6" >3.599065</td>
390
+ <td id="T_03f5d_row0_col7" class="data row0 col7" >6.694649</td>
391
+ <td id="T_03f5d_row0_col8" class="data row0 col8" >0.998434</td>
392
+ <td id="T_03f5d_row0_col9" class="data row0 col9" >0.997933</td>
393
+ <td id="T_03f5d_row0_col10" class="data row0 col10" >29.933449</td>
394
+ <td id="T_03f5d_row0_col11" class="data row0 col11" >29.601543</td>
395
+ <td id="T_03f5d_row0_col12" class="data row0 col12" >0.989371</td>
396
+ <td id="T_03f5d_row0_col13" class="data row0 col13" >32.000000</td>
397
+ <td id="T_03f5d_row0_col14" class="data row0 col14" >71.211151</td>
398
+ <td id="T_03f5d_row0_col15" class="data row0 col15" >0.966797</td>
399
+ <td id="T_03f5d_row0_col16" class="data row0 col16" >21.729292</td>
400
+ <td id="T_03f5d_row0_col17" class="data row0 col17" >6144.000000</td>
401
+ <td id="T_03f5d_row0_col18" class="data row0 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_0/metrics.json</td>
402
+ </tr>
403
+ <tr>
404
+ <th id="T_03f5d_level0_row1" class="row_heading level0 row1" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_1/metrics.json</th>
405
+ <td id="T_03f5d_row1_col0" class="data row1 col0" >5</td>
406
+ <td id="T_03f5d_row1_col1" class="data row1 col1" >32</td>
407
+ <td id="T_03f5d_row1_col2" class="data row1 col2" >1</td>
408
+ <td id="T_03f5d_row1_col3" class="data row1 col3" >0.006601</td>
409
+ <td id="T_03f5d_row1_col4" class="data row1 col4" >0.051053</td>
410
+ <td id="T_03f5d_row1_col5" class="data row1 col5" >3.605596</td>
411
+ <td id="T_03f5d_row1_col6" class="data row1 col6" >3.599065</td>
412
+ <td id="T_03f5d_row1_col7" class="data row1 col7" >3.652537</td>
413
+ <td id="T_03f5d_row1_col8" class="data row1 col8" >0.870694</td>
414
+ <td id="T_03f5d_row1_col9" class="data row1 col9" >0.877862</td>
415
+ <td id="T_03f5d_row1_col10" class="data row1 col10" >18.973736</td>
416
+ <td id="T_03f5d_row1_col11" class="data row1 col11" >17.917168</td>
417
+ <td id="T_03f5d_row1_col12" class="data row1 col12" >0.910649</td>
418
+ <td id="T_03f5d_row1_col13" class="data row1 col13" >32.000000</td>
419
+ <td id="T_03f5d_row1_col14" class="data row1 col14" >86.565331</td>
420
+ <td id="T_03f5d_row1_col15" class="data row1 col15" >0.885442</td>
421
+ <td id="T_03f5d_row1_col16" class="data row1 col16" >25.637442</td>
422
+ <td id="T_03f5d_row1_col17" class="data row1 col17" >6144.000000</td>
423
+ <td id="T_03f5d_row1_col18" class="data row1 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_1/metrics.json</td>
424
+ </tr>
425
+ <tr>
426
+ <th id="T_03f5d_level0_row2" class="row_heading level0 row2" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_2/metrics.json</th>
427
+ <td id="T_03f5d_row2_col0" class="data row2 col0" >5</td>
428
+ <td id="T_03f5d_row2_col1" class="data row2 col1" >32</td>
429
+ <td id="T_03f5d_row2_col2" class="data row2 col2" >2</td>
430
+ <td id="T_03f5d_row2_col3" class="data row2 col3" >0.009369</td>
431
+ <td id="T_03f5d_row2_col4" class="data row2 col4" >0.058747</td>
432
+ <td id="T_03f5d_row2_col5" class="data row2 col5" >3.601879</td>
433
+ <td id="T_03f5d_row2_col6" class="data row2 col6" >3.599065</td>
434
+ <td id="T_03f5d_row2_col7" class="data row2 col7" >3.645913</td>
435
+ <td id="T_03f5d_row2_col8" class="data row2 col8" >0.840524</td>
436
+ <td id="T_03f5d_row2_col9" class="data row2 col9" >0.939922</td>
437
+ <td id="T_03f5d_row2_col10" class="data row2 col10" >49.106537</td>
438
+ <td id="T_03f5d_row2_col11" class="data row2 col11" >47.644482</td>
439
+ <td id="T_03f5d_row2_col12" class="data row2 col12" >0.888798</td>
440
+ <td id="T_03f5d_row2_col13" class="data row2 col13" >31.875000</td>
441
+ <td id="T_03f5d_row2_col14" class="data row2 col14" >85.811630</td>
442
+ <td id="T_03f5d_row2_col15" class="data row2 col15" >0.974547</td>
443
+ <td id="T_03f5d_row2_col16" class="data row2 col16" >37.837296</td>
444
+ <td id="T_03f5d_row2_col17" class="data row2 col17" >6144.000000</td>
445
+ <td id="T_03f5d_row2_col18" class="data row2 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_2/metrics.json</td>
446
+ </tr>
447
+ <tr>
448
+ <th id="T_03f5d_level0_row3" class="row_heading level0 row3" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_3/metrics.json</th>
449
+ <td id="T_03f5d_row3_col0" class="data row3 col0" >5</td>
450
+ <td id="T_03f5d_row3_col1" class="data row3 col1" >32</td>
451
+ <td id="T_03f5d_row3_col2" class="data row3 col2" >3</td>
452
+ <td id="T_03f5d_row3_col3" class="data row3 col3" >0.010681</td>
453
+ <td id="T_03f5d_row3_col4" class="data row3 col4" >0.070592</td>
454
+ <td id="T_03f5d_row3_col5" class="data row3 col5" >3.609601</td>
455
+ <td id="T_03f5d_row3_col6" class="data row3 col6" >3.599065</td>
456
+ <td id="T_03f5d_row3_col7" class="data row3 col7" >3.658678</td>
457
+ <td id="T_03f5d_row3_col8" class="data row3 col8" >0.848690</td>
458
+ <td id="T_03f5d_row3_col9" class="data row3 col9" >0.823245</td>
459
+ <td id="T_03f5d_row3_col10" class="data row3 col10" >16.987318</td>
460
+ <td id="T_03f5d_row3_col11" class="data row3 col11" >15.157210</td>
461
+ <td id="T_03f5d_row3_col12" class="data row3 col12" >0.874669</td>
462
+ <td id="T_03f5d_row3_col13" class="data row3 col13" >31.911459</td>
463
+ <td id="T_03f5d_row3_col14" class="data row3 col14" >85.938217</td>
464
+ <td id="T_03f5d_row3_col15" class="data row3 col15" >0.780534</td>
465
+ <td id="T_03f5d_row3_col16" class="data row3 col16" >50.548058</td>
466
+ <td id="T_03f5d_row3_col17" class="data row3 col17" >6144.000000</td>
467
+ <td id="T_03f5d_row3_col18" class="data row3 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_3/metrics.json</td>
468
+ </tr>
469
+ <tr>
470
+ <th id="T_03f5d_level0_row4" class="row_heading level0 row4" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_4/metrics.json</th>
471
+ <td id="T_03f5d_row4_col0" class="data row4 col0" >5</td>
472
+ <td id="T_03f5d_row4_col1" class="data row4 col1" >32</td>
473
+ <td id="T_03f5d_row4_col2" class="data row4 col2" >4</td>
474
+ <td id="T_03f5d_row4_col3" class="data row4 col3" >0.012658</td>
475
+ <td id="T_03f5d_row4_col4" class="data row4 col4" >0.063325</td>
476
+ <td id="T_03f5d_row4_col5" class="data row4 col5" >3.611159</td>
477
+ <td id="T_03f5d_row4_col6" class="data row4 col6" >3.599065</td>
478
+ <td id="T_03f5d_row4_col7" class="data row4 col7" >3.660080</td>
479
+ <td id="T_03f5d_row4_col8" class="data row4 col8" >0.800111</td>
480
+ <td id="T_03f5d_row4_col9" class="data row4 col9" >0.801781</td>
481
+ <td id="T_03f5d_row4_col10" class="data row4 col10" >17.251986</td>
482
+ <td id="T_03f5d_row4_col11" class="data row4 col11" >15.012179</td>
483
+ <td id="T_03f5d_row4_col12" class="data row4 col12" >0.852544</td>
484
+ <td id="T_03f5d_row4_col13" class="data row4 col13" >31.955566</td>
485
+ <td id="T_03f5d_row4_col14" class="data row4 col14" >82.476707</td>
486
+ <td id="T_03f5d_row4_col15" class="data row4 col15" >0.729496</td>
487
+ <td id="T_03f5d_row4_col16" class="data row4 col16" >63.704514</td>
488
+ <td id="T_03f5d_row4_col17" class="data row4 col17" >6144.000000</td>
489
+ <td id="T_03f5d_row4_col18" class="data row4 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_4/metrics.json</td>
490
+ </tr>
491
+ <tr>
492
+ <th id="T_03f5d_level0_row5" class="row_heading level0 row5" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_5/metrics.json</th>
493
+ <td id="T_03f5d_row5_col0" class="data row5 col0" >5</td>
494
+ <td id="T_03f5d_row5_col1" class="data row5 col1" >32</td>
495
+ <td id="T_03f5d_row5_col2" class="data row5 col2" >5</td>
496
+ <td id="T_03f5d_row5_col3" class="data row5 col3" >0.014467</td>
497
+ <td id="T_03f5d_row5_col4" class="data row5 col4" >0.068505</td>
498
+ <td id="T_03f5d_row5_col5" class="data row5 col5" >3.613976</td>
499
+ <td id="T_03f5d_row5_col6" class="data row5 col6" >3.599065</td>
500
+ <td id="T_03f5d_row5_col7" class="data row5 col7" >3.669386</td>
501
+ <td id="T_03f5d_row5_col8" class="data row5 col8" >0.788825</td>
502
+ <td id="T_03f5d_row5_col9" class="data row5 col9" >0.787950</td>
503
+ <td id="T_03f5d_row5_col10" class="data row5 col10" >18.888968</td>
504
+ <td id="T_03f5d_row5_col11" class="data row5 col11" >16.209919</td>
505
+ <td id="T_03f5d_row5_col12" class="data row5 col12" >0.848440</td>
506
+ <td id="T_03f5d_row5_col13" class="data row5 col13" >32.000000</td>
507
+ <td id="T_03f5d_row5_col14" class="data row5 col14" >81.434013</td>
508
+ <td id="T_03f5d_row5_col15" class="data row5 col15" >0.717422</td>
509
+ <td id="T_03f5d_row5_col16" class="data row5 col16" >87.281723</td>
510
+ <td id="T_03f5d_row5_col17" class="data row5 col17" >6144.000000</td>
511
+ <td id="T_03f5d_row5_col18" class="data row5 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_5/metrics.json</td>
512
+ </tr>
513
+ <tr>
514
+ <th id="T_03f5d_level0_row6" class="row_heading level0 row6" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_6/metrics.json</th>
515
+ <td id="T_03f5d_row6_col0" class="data row6 col0" >5</td>
516
+ <td id="T_03f5d_row6_col1" class="data row6 col1" >32</td>
517
+ <td id="T_03f5d_row6_col2" class="data row6 col2" >6</td>
518
+ <td id="T_03f5d_row6_col3" class="data row6 col3" >0.016600</td>
519
+ <td id="T_03f5d_row6_col4" class="data row6 col4" >0.075694</td>
520
+ <td id="T_03f5d_row6_col5" class="data row6 col5" >3.618799</td>
521
+ <td id="T_03f5d_row6_col6" class="data row6 col6" >3.599065</td>
522
+ <td id="T_03f5d_row6_col7" class="data row6 col7" >3.676516</td>
523
+ <td id="T_03f5d_row6_col8" class="data row6 col8" >0.780703</td>
524
+ <td id="T_03f5d_row6_col9" class="data row6 col9" >0.745207</td>
525
+ <td id="T_03f5d_row6_col10" class="data row6 col10" >21.466564</td>
526
+ <td id="T_03f5d_row6_col11" class="data row6 col11" >18.402473</td>
527
+ <td id="T_03f5d_row6_col12" class="data row6 col12" >0.852635</td>
528
+ <td id="T_03f5d_row6_col13" class="data row6 col13" >32.000000</td>
529
+ <td id="T_03f5d_row6_col14" class="data row6 col14" >78.829765</td>
530
+ <td id="T_03f5d_row6_col15" class="data row6 col15" >0.706308</td>
531
+ <td id="T_03f5d_row6_col16" class="data row6 col16" >117.072495</td>
532
+ <td id="T_03f5d_row6_col17" class="data row6 col17" >6144.000000</td>
533
+ <td id="T_03f5d_row6_col18" class="data row6 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_6/metrics.json</td>
534
+ </tr>
535
+ <tr>
536
+ <th id="T_03f5d_level0_row7" class="row_heading level0 row7" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_7/metrics.json</th>
537
+ <td id="T_03f5d_row7_col0" class="data row7 col0" >5</td>
538
+ <td id="T_03f5d_row7_col1" class="data row7 col1" >32</td>
539
+ <td id="T_03f5d_row7_col2" class="data row7 col2" >7</td>
540
+ <td id="T_03f5d_row7_col3" class="data row7 col3" >0.017010</td>
541
+ <td id="T_03f5d_row7_col4" class="data row7 col4" >0.080486</td>
542
+ <td id="T_03f5d_row7_col5" class="data row7 col5" >3.614976</td>
543
+ <td id="T_03f5d_row7_col6" class="data row7 col6" >3.599065</td>
544
+ <td id="T_03f5d_row7_col7" class="data row7 col7" >3.672712</td>
545
+ <td id="T_03f5d_row7_col8" class="data row7 col8" >0.788663</td>
546
+ <td id="T_03f5d_row7_col9" class="data row7 col9" >0.783952</td>
547
+ <td id="T_03f5d_row7_col10" class="data row7 col10" >25.444439</td>
548
+ <td id="T_03f5d_row7_col11" class="data row7 col11" >22.004990</td>
549
+ <td id="T_03f5d_row7_col12" class="data row7 col12" >0.862489</td>
550
+ <td id="T_03f5d_row7_col13" class="data row7 col13" >32.000000</td>
551
+ <td id="T_03f5d_row7_col14" class="data row7 col14" >76.419937</td>
552
+ <td id="T_03f5d_row7_col15" class="data row7 col15" >0.718003</td>
553
+ <td id="T_03f5d_row7_col16" class="data row7 col16" >157.791412</td>
554
+ <td id="T_03f5d_row7_col17" class="data row7 col17" >6144.000000</td>
555
+ <td id="T_03f5d_row7_col18" class="data row7 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_7/metrics.json</td>
556
+ </tr>
557
+ <tr>
558
+ <th id="T_03f5d_level0_row8" class="row_heading level0 row8" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_8/metrics.json</th>
559
+ <td id="T_03f5d_row8_col0" class="data row8 col0" >5</td>
560
+ <td id="T_03f5d_row8_col1" class="data row8 col1" >32</td>
561
+ <td id="T_03f5d_row8_col2" class="data row8 col2" >8</td>
562
+ <td id="T_03f5d_row8_col3" class="data row8 col3" >0.018103</td>
563
+ <td id="T_03f5d_row8_col4" class="data row8 col4" >0.087324</td>
564
+ <td id="T_03f5d_row8_col5" class="data row8 col5" >3.616245</td>
565
+ <td id="T_03f5d_row8_col6" class="data row8 col6" >3.599065</td>
566
+ <td id="T_03f5d_row8_col7" class="data row8 col7" >3.680337</td>
567
+ <td id="T_03f5d_row8_col8" class="data row8 col8" >0.792688</td>
568
+ <td id="T_03f5d_row8_col9" class="data row8 col9" >0.788606</td>
569
+ <td id="T_03f5d_row8_col10" class="data row8 col10" >30.250225</td>
570
+ <td id="T_03f5d_row8_col11" class="data row8 col11" >26.306936</td>
571
+ <td id="T_03f5d_row8_col12" class="data row8 col12" >0.867637</td>
572
+ <td id="T_03f5d_row8_col13" class="data row8 col13" >32.000000</td>
573
+ <td id="T_03f5d_row8_col14" class="data row8 col14" >76.728195</td>
574
+ <td id="T_03f5d_row8_col15" class="data row8 col15" >0.723916</td>
575
+ <td id="T_03f5d_row8_col16" class="data row8 col16" >219.982910</td>
576
+ <td id="T_03f5d_row8_col17" class="data row8 col17" >6144.000000</td>
577
+ <td id="T_03f5d_row8_col18" class="data row8 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_8/metrics.json</td>
578
+ </tr>
579
+ <tr>
580
+ <th id="T_03f5d_level0_row9" class="row_heading level0 row9" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_9/metrics.json</th>
581
+ <td id="T_03f5d_row9_col0" class="data row9 col0" >5</td>
582
+ <td id="T_03f5d_row9_col1" class="data row9 col1" >32</td>
583
+ <td id="T_03f5d_row9_col2" class="data row9 col2" >9</td>
584
+ <td id="T_03f5d_row9_col3" class="data row9 col3" >0.019997</td>
585
+ <td id="T_03f5d_row9_col4" class="data row9 col4" >0.097589</td>
586
+ <td id="T_03f5d_row9_col5" class="data row9 col5" >3.617456</td>
587
+ <td id="T_03f5d_row9_col6" class="data row9 col6" >3.599065</td>
588
+ <td id="T_03f5d_row9_col7" class="data row9 col7" >3.696245</td>
589
+ <td id="T_03f5d_row9_col8" class="data row9 col8" >0.795088</td>
590
+ <td id="T_03f5d_row9_col9" class="data row9 col9" >0.810751</td>
591
+ <td id="T_03f5d_row9_col10" class="data row9 col10" >40.192413</td>
592
+ <td id="T_03f5d_row9_col11" class="data row9 col11" >35.945808</td>
593
+ <td id="T_03f5d_row9_col12" class="data row9 col12" >0.889800</td>
594
+ <td id="T_03f5d_row9_col13" class="data row9 col13" >32.000000</td>
595
+ <td id="T_03f5d_row9_col14" class="data row9 col14" >72.426567</td>
596
+ <td id="T_03f5d_row9_col15" class="data row9 col15" >0.742352</td>
597
+ <td id="T_03f5d_row9_col16" class="data row9 col16" >318.143433</td>
598
+ <td id="T_03f5d_row9_col17" class="data row9 col17" >6144.000000</td>
599
+ <td id="T_03f5d_row9_col18" class="data row9 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_9/metrics.json</td>
600
+ </tr>
601
+ <tr>
602
+ <th id="T_03f5d_level0_row10" class="row_heading level0 row10" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_10/metrics.json</th>
603
+ <td id="T_03f5d_row10_col0" class="data row10 col0" >5</td>
604
+ <td id="T_03f5d_row10_col1" class="data row10 col1" >32</td>
605
+ <td id="T_03f5d_row10_col2" class="data row10 col2" >10</td>
606
+ <td id="T_03f5d_row10_col3" class="data row10 col3" >0.023115</td>
607
+ <td id="T_03f5d_row10_col4" class="data row10 col4" >0.126748</td>
608
+ <td id="T_03f5d_row10_col5" class="data row10 col5" >3.617172</td>
609
+ <td id="T_03f5d_row10_col6" class="data row10 col6" >3.599065</td>
610
+ <td id="T_03f5d_row10_col7" class="data row10 col7" >3.708984</td>
611
+ <td id="T_03f5d_row10_col8" class="data row10 col8" >0.817629</td>
612
+ <td id="T_03f5d_row10_col9" class="data row10 col9" >0.835264</td>
613
+ <td id="T_03f5d_row10_col10" class="data row10 col10" >81.756828</td>
614
+ <td id="T_03f5d_row10_col11" class="data row10 col11" >78.393089</td>
615
+ <td id="T_03f5d_row10_col12" class="data row10 col12" >0.955360</td>
616
+ <td id="T_03f5d_row10_col13" class="data row10 col13" >32.000000</td>
617
+ <td id="T_03f5d_row10_col14" class="data row10 col14" >50.458115</td>
618
+ <td id="T_03f5d_row10_col15" class="data row10 col15" >0.792657</td>
619
+ <td id="T_03f5d_row10_col16" class="data row10 col16" >514.553589</td>
620
+ <td id="T_03f5d_row10_col17" class="data row10 col17" >6144.000000</td>
621
+ <td id="T_03f5d_row10_col18" class="data row10 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_10/metrics.json</td>
622
+ </tr>
623
+ <tr>
624
+ <th id="T_03f5d_level0_row11" class="row_heading level0 row11" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_11/metrics.json</th>
625
+ <td id="T_03f5d_row11_col0" class="data row11 col0" >5</td>
626
+ <td id="T_03f5d_row11_col1" class="data row11 col1" >32</td>
627
+ <td id="T_03f5d_row11_col2" class="data row11 col2" >11</td>
628
+ <td id="T_03f5d_row11_col3" class="data row11 col3" >0.028953</td>
629
+ <td id="T_03f5d_row11_col4" class="data row11 col4" >0.173841</td>
630
+ <td id="T_03f5d_row11_col5" class="data row11 col5" >3.623718</td>
631
+ <td id="T_03f5d_row11_col6" class="data row11 col6" >3.599065</td>
632
+ <td id="T_03f5d_row11_col7" class="data row11 col7" >3.783318</td>
633
+ <td id="T_03f5d_row11_col8" class="data row11 col8" >0.833454</td>
634
+ <td id="T_03f5d_row11_col9" class="data row11 col9" >0.866197</td>
635
+ <td id="T_03f5d_row11_col10" class="data row11 col10" >92.906296</td>
636
+ <td id="T_03f5d_row11_col11" class="data row11 col11" >87.663773</td>
637
+ <td id="T_03f5d_row11_col12" class="data row11 col12" >0.923381</td>
638
+ <td id="T_03f5d_row11_col13" class="data row11 col13" >32.000000</td>
639
+ <td id="T_03f5d_row11_col14" class="data row11 col14" >73.987030</td>
640
+ <td id="T_03f5d_row11_col15" class="data row11 col15" >0.840599</td>
641
+ <td id="T_03f5d_row11_col16" class="data row11 col16" >742.957520</td>
642
+ <td id="T_03f5d_row11_col17" class="data row11 col17" >6144.000000</td>
643
+ <td id="T_03f5d_row11_col18" class="data row11 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_11/metrics.json</td>
644
+ </tr>
645
+ </tbody>
646
+ </table>
benchmark_stats.png ADDED

Git LFS Details

  • SHA256: 4c06d084693334119a81889b712cccee3f900115be574d2420e6304a50e9b2dc
  • Pointer size: 132 Bytes
  • Size of remote file: 4.55 MB
v5_32k_layer_0/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.0.hook_mlp_out", "hook_layer": 0, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_32k_layer_0/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.0048454091884195805, "metrics/kl_div_with_ablation": 3.094083309173584, "metrics/ce_loss_with_sae": 3.6054646968841553, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 6.694648742675781, "metrics/kl_div_score": 0.9984339758486613, "metrics/ce_loss_score": 0.9979325038445924, "metrics/l2_norm_in": 29.933448791503906, "metrics/l2_norm_out": 29.601543426513672, "metrics/l2_ratio": 0.9893707036972046, "metrics/l0": 32.0, "metrics/l1": 71.21115112304688, "metrics/explained_variance": 0.9667970538139343, "metrics/mse": 21.729291915893555, "metrics/total_tokens_evaluated": 6144}
v5_32k_layer_0/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab7f7cbb78776f97da06167e5297aa3d045a8fc83eda19e91076e4b98ce2934f
3
+ size 201461056
v5_32k_layer_0/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ead023e8744163c7b1fd85e383df10bf880155bea44ad396ac7182c5a2890d81
3
+ size 131152
v5_32k_layer_1/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.1.hook_mlp_out", "hook_layer": 1, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_32k_layer_1/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.006601419299840927, "metrics/kl_div_with_ablation": 0.05105271190404892, "metrics/ce_loss_with_sae": 3.605595588684082, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6525371074676514, "metrics/kl_div_score": 0.8706940522131719, "metrics/ce_loss_score": 0.8778624933119316, "metrics/l2_norm_in": 18.973735809326172, "metrics/l2_norm_out": 17.91716766357422, "metrics/l2_ratio": 0.910649299621582, "metrics/l0": 32.0, "metrics/l1": 86.5653305053711, "metrics/explained_variance": 0.8854424357414246, "metrics/mse": 25.637441635131836, "metrics/total_tokens_evaluated": 6144}
v5_32k_layer_1/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fe30891436eb0a90f46ebc5850c61ccba4022a2acaca044335f7c740f746dba
3
+ size 201461056
v5_32k_layer_1/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:302f9ced1609a7507d856f4d3e8c31bae60f1db043c13595c527427f6c7e7f02
3
+ size 131152
v5_32k_layer_10/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.10.hook_mlp_out", "hook_layer": 10, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_32k_layer_10/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.02311515063047409, "metrics/kl_div_with_ablation": 0.12674781680107117, "metrics/ce_loss_with_sae": 3.6171722412109375, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.708984136581421, "metrics/kl_div_score": 0.8176288064452188, "metrics/ce_loss_score": 0.8352644912761693, "metrics/l2_norm_in": 81.75682830810547, "metrics/l2_norm_out": 78.3930892944336, "metrics/l2_ratio": 0.9553598165512085, "metrics/l0": 32.0, "metrics/l1": 50.45811462402344, "metrics/explained_variance": 0.7926573753356934, "metrics/mse": 514.5535888671875, "metrics/total_tokens_evaluated": 6144}
v5_32k_layer_10/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c676f5187854407e5c6f79d7db14fafe6bf3a662a9883e7fa442effa194d6d04
3
+ size 201461056
v5_32k_layer_10/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7164ca2c04e1c69c8662b4b0a105ee61196163ffbb1ab4eea3e1875644c25895
3
+ size 131152
v5_32k_layer_11/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.11.hook_mlp_out", "hook_layer": 11, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_32k_layer_11/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.028952505439519882, "metrics/kl_div_with_ablation": 0.17384076118469238, "metrics/ce_loss_with_sae": 3.62371826171875, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.783318281173706, "metrics/kl_div_score": 0.8334538732906256, "metrics/ce_loss_score": 0.8661971284238421, "metrics/l2_norm_in": 92.90629577636719, "metrics/l2_norm_out": 87.66377258300781, "metrics/l2_ratio": 0.9233807325363159, "metrics/l0": 32.0, "metrics/l1": 73.98703002929688, "metrics/explained_variance": 0.840599000453949, "metrics/mse": 742.95751953125, "metrics/total_tokens_evaluated": 6144}
v5_32k_layer_11/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72876f75e313f3785999392dddef0afe7b2dc44f6a0671b007dd1dcd176ee8c5
3
+ size 201461056
v5_32k_layer_11/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26db3acaed898f49efb23762df7d70847bc97c5da288926681567dc89f8107a9
3
+ size 131152
v5_32k_layer_2/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.2.hook_mlp_out", "hook_layer": 2, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_32k_layer_2/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.00936876516789198, "metrics/kl_div_with_ablation": 0.05874736234545708, "metrics/ce_loss_with_sae": 3.601879119873047, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6459126472473145, "metrics/kl_div_score": 0.8405244968650671, "metrics/ce_loss_score": 0.9399221354232932, "metrics/l2_norm_in": 49.106536865234375, "metrics/l2_norm_out": 47.64448165893555, "metrics/l2_ratio": 0.8887979388237, "metrics/l0": 31.875, "metrics/l1": 85.81163024902344, "metrics/explained_variance": 0.9745470285415649, "metrics/mse": 37.83729553222656, "metrics/total_tokens_evaluated": 6144}
v5_32k_layer_2/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80a546f38807810c98365c13fdf6ba7c883b2f177fec3d79963dd869992f3d9f
3
+ size 201461056
v5_32k_layer_2/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f0e1dcea111995b08893a3af42c3de1444f9ac0b12dc28c7df6d88125e51ee8
3
+ size 131152
v5_32k_layer_3/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.3.hook_mlp_out", "hook_layer": 3, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_32k_layer_3/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.010681239888072014, "metrics/kl_div_with_ablation": 0.07059153914451599, "metrics/ce_loss_with_sae": 3.6096014976501465, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.658677577972412, "metrics/kl_div_score": 0.8486895169376428, "metrics/ce_loss_score": 0.823244745735597, "metrics/l2_norm_in": 16.98731803894043, "metrics/l2_norm_out": 15.157210350036621, "metrics/l2_ratio": 0.8746687173843384, "metrics/l0": 31.91145896911621, "metrics/l1": 85.93821716308594, "metrics/explained_variance": 0.7805342078208923, "metrics/mse": 50.548057556152344, "metrics/total_tokens_evaluated": 6144}
v5_32k_layer_3/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb781b7a737037ffe78ab1bdca78ba1af47bb297c189ebd9902c60a3fd5f333f
3
+ size 201461056
v5_32k_layer_3/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26543a937a617417c20fe430ff78e4e031b1b941d59cc80b4db96b4ef7c784a5
3
+ size 131152
v5_32k_layer_4/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.4.hook_mlp_out", "hook_layer": 4, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_32k_layer_4/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.012658017687499523, "metrics/kl_div_with_ablation": 0.06332532316446304, "metrics/ce_loss_with_sae": 3.611159086227417, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6600804328918457, "metrics/kl_div_score": 0.8001112816333331, "metrics/ce_loss_score": 0.8017810322797447, "metrics/l2_norm_in": 17.251985549926758, "metrics/l2_norm_out": 15.012179374694824, "metrics/l2_ratio": 0.8525444865226746, "metrics/l0": 31.95556640625, "metrics/l1": 82.4767074584961, "metrics/explained_variance": 0.7294961810112, "metrics/mse": 63.70451354980469, "metrics/total_tokens_evaluated": 6144}
v5_32k_layer_4/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17f173d1919c9c9f53f3a956c96aebe863b0cc90f964c74b13d0056553a09ab3
3
+ size 201461056
v5_32k_layer_4/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc4ef7f40430139c30bc32dbce3fbb64304191924edf791b404eb896583dec26
3
+ size 131152
v5_32k_layer_5/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.5.hook_mlp_out", "hook_layer": 5, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_32k_layer_5/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.014466611668467522, "metrics/kl_div_with_ablation": 0.06850520521402359, "metrics/ce_loss_with_sae": 3.613976240158081, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6693859100341797, "metrics/kl_div_score": 0.7888246356861349, "metrics/ce_loss_score": 0.7879497811486054, "metrics/l2_norm_in": 18.888967514038086, "metrics/l2_norm_out": 16.209918975830078, "metrics/l2_ratio": 0.8484395742416382, "metrics/l0": 32.0, "metrics/l1": 81.43401336669922, "metrics/explained_variance": 0.7174215316772461, "metrics/mse": 87.28172302246094, "metrics/total_tokens_evaluated": 6144}
v5_32k_layer_5/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a5dbd05b4cb8acc8150fa92853ab5c42373121b1bc35bc108ffac1c6cc73ad4
3
+ size 201461056
v5_32k_layer_5/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d092024c813ffab3ab07a3d24630235d7d4348f5b4d68b3d737ad926b4d50022
3
+ size 131152
v5_32k_layer_6/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.6.hook_mlp_out", "hook_layer": 6, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_32k_layer_6/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.016599537804722786, "metrics/kl_div_with_ablation": 0.07569437474012375, "metrics/ce_loss_with_sae": 3.6187987327575684, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.67651629447937, "metrics/kl_div_score": 0.7807031518298047, "metrics/ce_loss_score": 0.7452071071490137, "metrics/l2_norm_in": 21.466564178466797, "metrics/l2_norm_out": 18.40247344970703, "metrics/l2_ratio": 0.852634608745575, "metrics/l0": 32.0, "metrics/l1": 78.82976531982422, "metrics/explained_variance": 0.7063077688217163, "metrics/mse": 117.07249450683594, "metrics/total_tokens_evaluated": 6144}
v5_32k_layer_6/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3105f1072fa405f49c442269f43c553fbdc3e6526b6bc0445eaa6c5cd683838b
3
+ size 201461056
v5_32k_layer_6/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71eaa2c4d06116f1c4001fc6c162b791a70bcc72f5abccfe6abb5aa870dfe8d9
3
+ size 131152
v5_32k_layer_7/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.7.hook_mlp_out", "hook_layer": 7, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_32k_layer_7/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.017009764909744263, "metrics/kl_div_with_ablation": 0.08048636466264725, "metrics/ce_loss_with_sae": 3.614975929260254, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6727118492126465, "metrics/kl_div_score": 0.7886627756013151, "metrics/ce_loss_score": 0.7839520361024154, "metrics/l2_norm_in": 25.444438934326172, "metrics/l2_norm_out": 22.004989624023438, "metrics/l2_ratio": 0.8624889254570007, "metrics/l0": 32.0, "metrics/l1": 76.41993713378906, "metrics/explained_variance": 0.7180025577545166, "metrics/mse": 157.79141235351562, "metrics/total_tokens_evaluated": 6144}
v5_32k_layer_7/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:062c7f24dd951d7780e3000d052e82f091564685418cd3c07c61717f23616ee3
3
+ size 201461056
v5_32k_layer_7/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64ac63219db967150e3ba33adb0fdbae4328f4c0b3259fc34f6c8617fba230d9
3
+ size 131152
v5_32k_layer_8/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.8.hook_mlp_out", "hook_layer": 8, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_32k_layer_8/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.01810324750840664, "metrics/kl_div_with_ablation": 0.08732372522354126, "metrics/ce_loss_with_sae": 3.6162450313568115, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6803367137908936, "metrics/kl_div_score": 0.792688098657451, "metrics/ce_loss_score": 0.7886059610420089, "metrics/l2_norm_in": 30.250225067138672, "metrics/l2_norm_out": 26.306936264038086, "metrics/l2_ratio": 0.8676368594169617, "metrics/l0": 32.0, "metrics/l1": 76.72819519042969, "metrics/explained_variance": 0.7239155769348145, "metrics/mse": 219.98291015625, "metrics/total_tokens_evaluated": 6144}
v5_32k_layer_8/sae_weights.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33f7d0d26aabda775fb2fe1fb8ae9e416e0a4cc92e12e491750d595294d320f1
3
+ size 201461056
v5_32k_layer_8/sparsity.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdd2f0c4c36e44ec05d021122bb7d7435ef8cfefe737018b58f5150987c84fea
3
+ size 131152
v5_32k_layer_9/cfg.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.9.hook_mlp_out", "hook_layer": 9, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
v5_32k_layer_9/metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metrics/kl_div_with_sae": 0.01999707892537117, "metrics/kl_div_with_ablation": 0.09758877754211426, "metrics/ce_loss_with_sae": 3.6174559593200684, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6962451934814453, "metrics/kl_div_score": 0.7950883346526042, "metrics/ce_loss_score": 0.8107506041388108, "metrics/l2_norm_in": 40.192413330078125, "metrics/l2_norm_out": 35.94580841064453, "metrics/l2_ratio": 0.8897998332977295, "metrics/l0": 32.0, "metrics/l1": 72.42656707763672, "metrics/explained_variance": 0.7423521876335144, "metrics/mse": 318.1434326171875, "metrics/total_tokens_evaluated": 6144}