Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- benchmark_stats.csv +13 -0
- benchmark_stats.html +646 -0
- benchmark_stats.png +3 -0
- v5_32k_layer_0/cfg.json +1 -0
- v5_32k_layer_0/metrics.json +1 -0
- v5_32k_layer_0/sae_weights.safetensors +3 -0
- v5_32k_layer_0/sparsity.safetensors +3 -0
- v5_32k_layer_1/cfg.json +1 -0
- v5_32k_layer_1/metrics.json +1 -0
- v5_32k_layer_1/sae_weights.safetensors +3 -0
- v5_32k_layer_1/sparsity.safetensors +3 -0
- v5_32k_layer_10/cfg.json +1 -0
- v5_32k_layer_10/metrics.json +1 -0
- v5_32k_layer_10/sae_weights.safetensors +3 -0
- v5_32k_layer_10/sparsity.safetensors +3 -0
- v5_32k_layer_11/cfg.json +1 -0
- v5_32k_layer_11/metrics.json +1 -0
- v5_32k_layer_11/sae_weights.safetensors +3 -0
- v5_32k_layer_11/sparsity.safetensors +3 -0
- v5_32k_layer_2/cfg.json +1 -0
- v5_32k_layer_2/metrics.json +1 -0
- v5_32k_layer_2/sae_weights.safetensors +3 -0
- v5_32k_layer_2/sparsity.safetensors +3 -0
- v5_32k_layer_3/cfg.json +1 -0
- v5_32k_layer_3/metrics.json +1 -0
- v5_32k_layer_3/sae_weights.safetensors +3 -0
- v5_32k_layer_3/sparsity.safetensors +3 -0
- v5_32k_layer_4/cfg.json +1 -0
- v5_32k_layer_4/metrics.json +1 -0
- v5_32k_layer_4/sae_weights.safetensors +3 -0
- v5_32k_layer_4/sparsity.safetensors +3 -0
- v5_32k_layer_5/cfg.json +1 -0
- v5_32k_layer_5/metrics.json +1 -0
- v5_32k_layer_5/sae_weights.safetensors +3 -0
- v5_32k_layer_5/sparsity.safetensors +3 -0
- v5_32k_layer_6/cfg.json +1 -0
- v5_32k_layer_6/metrics.json +1 -0
- v5_32k_layer_6/sae_weights.safetensors +3 -0
- v5_32k_layer_6/sparsity.safetensors +3 -0
- v5_32k_layer_7/cfg.json +1 -0
- v5_32k_layer_7/metrics.json +1 -0
- v5_32k_layer_7/sae_weights.safetensors +3 -0
- v5_32k_layer_7/sparsity.safetensors +3 -0
- v5_32k_layer_8/cfg.json +1 -0
- v5_32k_layer_8/metrics.json +1 -0
- v5_32k_layer_8/sae_weights.safetensors +3 -0
- v5_32k_layer_8/sparsity.safetensors +3 -0
- v5_32k_layer_9/cfg.json +1 -0
- v5_32k_layer_9/metrics.json +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
benchmark_stats.png filter=lfs diff=lfs merge=lfs -text
|
benchmark_stats.csv
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
,version,d_sae,layer,kl_div_with_sae,kl_div_with_ablation,ce_loss_with_sae,ce_loss_without_sae,ce_loss_with_ablation,kl_div_score,ce_loss_score,l2_norm_in,l2_norm_out,l2_ratio,l0,l1,explained_variance,mse,total_tokens_evaluated,filepath
|
2 |
+
OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_0/metrics.json,5,32,0,0.0048454091884195805,3.094083309173584,3.6054646968841553,3.599064588546753,6.694648742675781,0.9984339758486613,0.9979325038445924,29.933448791503906,29.601543426513672,0.9893707036972046,32.0,71.21115112304688,0.9667970538139343,21.729291915893555,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_0/metrics.json
|
3 |
+
OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_1/metrics.json,5,32,1,0.006601419299840927,0.05105271190404892,3.605595588684082,3.599064588546753,3.6525371074676514,0.8706940522131719,0.8778624933119316,18.973735809326172,17.91716766357422,0.910649299621582,32.0,86.5653305053711,0.8854424357414246,25.637441635131836,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_1/metrics.json
|
4 |
+
OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_2/metrics.json,5,32,2,0.00936876516789198,0.05874736234545708,3.601879119873047,3.599064588546753,3.6459126472473145,0.8405244968650671,0.9399221354232932,49.106536865234375,47.64448165893555,0.8887979388237,31.875,85.81163024902344,0.9745470285415649,37.83729553222656,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_2/metrics.json
|
5 |
+
OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_3/metrics.json,5,32,3,0.010681239888072014,0.07059153914451599,3.6096014976501465,3.599064588546753,3.658677577972412,0.8486895169376428,0.823244745735597,16.98731803894043,15.157210350036621,0.8746687173843384,31.91145896911621,85.93821716308594,0.7805342078208923,50.548057556152344,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_3/metrics.json
|
6 |
+
OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_4/metrics.json,5,32,4,0.012658017687499523,0.06332532316446304,3.611159086227417,3.599064588546753,3.6600804328918457,0.8001112816333331,0.8017810322797447,17.251985549926758,15.012179374694824,0.8525444865226746,31.95556640625,82.4767074584961,0.7294961810112,63.70451354980469,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_4/metrics.json
|
7 |
+
OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_5/metrics.json,5,32,5,0.014466611668467522,0.06850520521402359,3.613976240158081,3.599064588546753,3.6693859100341797,0.7888246356861349,0.7879497811486054,18.888967514038086,16.209918975830078,0.8484395742416382,32.0,81.43401336669922,0.7174215316772461,87.28172302246094,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_5/metrics.json
|
8 |
+
OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_6/metrics.json,5,32,6,0.016599537804722786,0.07569437474012375,3.6187987327575684,3.599064588546753,3.67651629447937,0.7807031518298047,0.7452071071490137,21.466564178466797,18.40247344970703,0.852634608745575,32.0,78.82976531982422,0.7063077688217163,117.07249450683594,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_6/metrics.json
|
9 |
+
OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_7/metrics.json,5,32,7,0.017009764909744263,0.08048636466264725,3.614975929260254,3.599064588546753,3.6727118492126465,0.7886627756013151,0.7839520361024154,25.444438934326172,22.004989624023438,0.8624889254570007,32.0,76.41993713378906,0.7180025577545166,157.79141235351562,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_7/metrics.json
|
10 |
+
OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_8/metrics.json,5,32,8,0.01810324750840664,0.08732372522354126,3.6162450313568115,3.599064588546753,3.6803367137908936,0.792688098657451,0.7886059610420089,30.250225067138672,26.306936264038086,0.8676368594169617,32.0,76.72819519042969,0.7239155769348145,219.98291015625,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_8/metrics.json
|
11 |
+
OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_9/metrics.json,5,32,9,0.01999707892537117,0.09758877754211426,3.6174559593200684,3.599064588546753,3.6962451934814453,0.7950883346526042,0.8107506041388108,40.192413330078125,35.94580841064453,0.8897998332977295,32.0,72.42656707763672,0.7423521876335144,318.1434326171875,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_9/metrics.json
|
12 |
+
OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_10/metrics.json,5,32,10,0.02311515063047409,0.12674781680107117,3.6171722412109375,3.599064588546753,3.708984136581421,0.8176288064452188,0.8352644912761693,81.75682830810547,78.3930892944336,0.9553598165512085,32.0,50.45811462402344,0.7926573753356934,514.5535888671875,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_10/metrics.json
|
13 |
+
OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_11/metrics.json,5,32,11,0.028952505439519882,0.17384076118469238,3.62371826171875,3.599064588546753,3.783318281173706,0.8334538732906256,0.8661971284238421,92.90629577636719,87.66377258300781,0.9233807325363159,32.0,73.98703002929688,0.840599000453949,742.95751953125,6144.0,OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_11/metrics.json
|
benchmark_stats.html
ADDED
@@ -0,0 +1,646 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<style type="text/css">
|
2 |
+
#T_03f5d_row0_col2, #T_03f5d_row0_col3, #T_03f5d_row0_col6, #T_03f5d_row0_col16, #T_03f5d_row0_col17, #T_03f5d_row1_col4, #T_03f5d_row1_col6, #T_03f5d_row1_col7, #T_03f5d_row1_col17, #T_03f5d_row2_col4, #T_03f5d_row2_col5, #T_03f5d_row2_col6, #T_03f5d_row2_col7, #T_03f5d_row2_col13, #T_03f5d_row2_col17, #T_03f5d_row3_col6, #T_03f5d_row3_col10, #T_03f5d_row3_col11, #T_03f5d_row3_col17, #T_03f5d_row4_col6, #T_03f5d_row4_col10, #T_03f5d_row4_col11, #T_03f5d_row4_col17, #T_03f5d_row5_col6, #T_03f5d_row5_col12, #T_03f5d_row5_col17, #T_03f5d_row6_col6, #T_03f5d_row6_col8, #T_03f5d_row6_col9, #T_03f5d_row6_col15, #T_03f5d_row6_col17, #T_03f5d_row7_col6, #T_03f5d_row7_col17, #T_03f5d_row8_col6, #T_03f5d_row8_col17, #T_03f5d_row9_col6, #T_03f5d_row9_col17, #T_03f5d_row10_col6, #T_03f5d_row10_col14, #T_03f5d_row10_col17, #T_03f5d_row11_col6, #T_03f5d_row11_col17 {
|
3 |
+
background-color: #440154;
|
4 |
+
color: #f1f1f1;
|
5 |
+
}
|
6 |
+
#T_03f5d_row0_col4, #T_03f5d_row0_col7, #T_03f5d_row0_col8, #T_03f5d_row0_col9, #T_03f5d_row0_col12, #T_03f5d_row0_col13, #T_03f5d_row1_col13, #T_03f5d_row1_col14, #T_03f5d_row2_col15, #T_03f5d_row5_col13, #T_03f5d_row6_col13, #T_03f5d_row7_col13, #T_03f5d_row8_col13, #T_03f5d_row9_col13, #T_03f5d_row10_col13, #T_03f5d_row11_col2, #T_03f5d_row11_col3, #T_03f5d_row11_col5, #T_03f5d_row11_col10, #T_03f5d_row11_col11, #T_03f5d_row11_col13, #T_03f5d_row11_col16 {
|
7 |
+
background-color: #fde725;
|
8 |
+
color: #000000;
|
9 |
+
}
|
10 |
+
#T_03f5d_row0_col5 {
|
11 |
+
background-color: #443983;
|
12 |
+
color: #f1f1f1;
|
13 |
+
}
|
14 |
+
#T_03f5d_row0_col10, #T_03f5d_row1_col5, #T_03f5d_row5_col9, #T_03f5d_row8_col9, #T_03f5d_row10_col8 {
|
15 |
+
background-color: #443a83;
|
16 |
+
color: #f1f1f1;
|
17 |
+
}
|
18 |
+
#T_03f5d_row0_col11 {
|
19 |
+
background-color: #414487;
|
20 |
+
color: #f1f1f1;
|
21 |
+
}
|
22 |
+
#T_03f5d_row0_col14 {
|
23 |
+
background-color: #1fa287;
|
24 |
+
color: #f1f1f1;
|
25 |
+
}
|
26 |
+
#T_03f5d_row0_col15 {
|
27 |
+
background-color: #ece51b;
|
28 |
+
color: #000000;
|
29 |
+
}
|
30 |
+
#T_03f5d_row1_col2, #T_03f5d_row5_col16 {
|
31 |
+
background-color: #482173;
|
32 |
+
color: #f1f1f1;
|
33 |
+
}
|
34 |
+
#T_03f5d_row1_col3 {
|
35 |
+
background-color: #481b6d;
|
36 |
+
color: #f1f1f1;
|
37 |
+
}
|
38 |
+
#T_03f5d_row1_col8, #T_03f5d_row9_col16 {
|
39 |
+
background-color: #297b8e;
|
40 |
+
color: #f1f1f1;
|
41 |
+
}
|
42 |
+
#T_03f5d_row1_col9 {
|
43 |
+
background-color: #1f968b;
|
44 |
+
color: #f1f1f1;
|
45 |
+
}
|
46 |
+
#T_03f5d_row1_col10, #T_03f5d_row5_col10, #T_03f5d_row10_col4 {
|
47 |
+
background-color: #460a5d;
|
48 |
+
color: #f1f1f1;
|
49 |
+
}
|
50 |
+
#T_03f5d_row1_col11, #T_03f5d_row3_col16, #T_03f5d_row5_col15, #T_03f5d_row11_col4 {
|
51 |
+
background-color: #471063;
|
52 |
+
color: #f1f1f1;
|
53 |
+
}
|
54 |
+
#T_03f5d_row1_col12 {
|
55 |
+
background-color: #26828e;
|
56 |
+
color: #f1f1f1;
|
57 |
+
}
|
58 |
+
#T_03f5d_row1_col15 {
|
59 |
+
background-color: #35b779;
|
60 |
+
color: #f1f1f1;
|
61 |
+
}
|
62 |
+
#T_03f5d_row1_col16, #T_03f5d_row3_col4, #T_03f5d_row3_col7, #T_03f5d_row4_col4, #T_03f5d_row4_col7, #T_03f5d_row5_col4, #T_03f5d_row5_col7 {
|
63 |
+
background-color: #440256;
|
64 |
+
color: #f1f1f1;
|
65 |
+
}
|
66 |
+
#T_03f5d_row2_col2 {
|
67 |
+
background-color: #433e85;
|
68 |
+
color: #f1f1f1;
|
69 |
+
}
|
70 |
+
#T_03f5d_row2_col3, #T_03f5d_row7_col16 {
|
71 |
+
background-color: #424086;
|
72 |
+
color: #f1f1f1;
|
73 |
+
}
|
74 |
+
#T_03f5d_row2_col8, #T_03f5d_row3_col15, #T_03f5d_row8_col16 {
|
75 |
+
background-color: #38598c;
|
76 |
+
color: #f1f1f1;
|
77 |
+
}
|
78 |
+
#T_03f5d_row2_col9 {
|
79 |
+
background-color: #69cd5b;
|
80 |
+
color: #000000;
|
81 |
+
}
|
82 |
+
#T_03f5d_row2_col10, #T_03f5d_row4_col5 {
|
83 |
+
background-color: #277e8e;
|
84 |
+
color: #f1f1f1;
|
85 |
+
}
|
86 |
+
#T_03f5d_row2_col11 {
|
87 |
+
background-color: #25838e;
|
88 |
+
color: #f1f1f1;
|
89 |
+
}
|
90 |
+
#T_03f5d_row2_col12, #T_03f5d_row9_col11 {
|
91 |
+
background-color: #365c8d;
|
92 |
+
color: #f1f1f1;
|
93 |
+
}
|
94 |
+
#T_03f5d_row2_col14 {
|
95 |
+
background-color: #f1e51d;
|
96 |
+
color: #000000;
|
97 |
+
}
|
98 |
+
#T_03f5d_row2_col16, #T_03f5d_row10_col7 {
|
99 |
+
background-color: #46085c;
|
100 |
+
color: #f1f1f1;
|
101 |
+
}
|
102 |
+
#T_03f5d_row3_col2 {
|
103 |
+
background-color: #38588c;
|
104 |
+
color: #f1f1f1;
|
105 |
+
}
|
106 |
+
#T_03f5d_row3_col3 {
|
107 |
+
background-color: #3c4f8a;
|
108 |
+
color: #f1f1f1;
|
109 |
+
}
|
110 |
+
#T_03f5d_row3_col5 {
|
111 |
+
background-color: #2e6d8e;
|
112 |
+
color: #f1f1f1;
|
113 |
+
}
|
114 |
+
#T_03f5d_row3_col8, #T_03f5d_row3_col9 {
|
115 |
+
background-color: #33628d;
|
116 |
+
color: #f1f1f1;
|
117 |
+
}
|
118 |
+
#T_03f5d_row3_col12 {
|
119 |
+
background-color: #423f85;
|
120 |
+
color: #f1f1f1;
|
121 |
+
}
|
122 |
+
#T_03f5d_row3_col13 {
|
123 |
+
background-color: #365d8d;
|
124 |
+
color: #f1f1f1;
|
125 |
+
}
|
126 |
+
#T_03f5d_row3_col14 {
|
127 |
+
background-color: #f4e61e;
|
128 |
+
color: #000000;
|
129 |
+
}
|
130 |
+
#T_03f5d_row4_col2 {
|
131 |
+
background-color: #2d708e;
|
132 |
+
color: #f1f1f1;
|
133 |
+
}
|
134 |
+
#T_03f5d_row4_col3, #T_03f5d_row10_col15 {
|
135 |
+
background-color: #32658e;
|
136 |
+
color: #f1f1f1;
|
137 |
+
}
|
138 |
+
#T_03f5d_row4_col8, #T_03f5d_row4_col15 {
|
139 |
+
background-color: #482071;
|
140 |
+
color: #f1f1f1;
|
141 |
+
}
|
142 |
+
#T_03f5d_row4_col9 {
|
143 |
+
background-color: #3e4a89;
|
144 |
+
color: #f1f1f1;
|
145 |
+
}
|
146 |
+
#T_03f5d_row4_col12, #T_03f5d_row6_col12 {
|
147 |
+
background-color: #460b5e;
|
148 |
+
color: #f1f1f1;
|
149 |
+
}
|
150 |
+
#T_03f5d_row4_col13 {
|
151 |
+
background-color: #2eb37c;
|
152 |
+
color: #f1f1f1;
|
153 |
+
}
|
154 |
+
#T_03f5d_row4_col14 {
|
155 |
+
background-color: #b5de2b;
|
156 |
+
color: #000000;
|
157 |
+
}
|
158 |
+
#T_03f5d_row4_col16, #T_03f5d_row8_col8 {
|
159 |
+
background-color: #481668;
|
160 |
+
color: #f1f1f1;
|
161 |
+
}
|
162 |
+
#T_03f5d_row5_col2 {
|
163 |
+
background-color: #25858e;
|
164 |
+
color: #f1f1f1;
|
165 |
+
}
|
166 |
+
#T_03f5d_row5_col3 {
|
167 |
+
background-color: #2a788e;
|
168 |
+
color: #f1f1f1;
|
169 |
+
}
|
170 |
+
#T_03f5d_row5_col5 {
|
171 |
+
background-color: #1e9d89;
|
172 |
+
color: #f1f1f1;
|
173 |
+
}
|
174 |
+
#T_03f5d_row5_col8, #T_03f5d_row7_col8 {
|
175 |
+
background-color: #470e61;
|
176 |
+
color: #f1f1f1;
|
177 |
+
}
|
178 |
+
#T_03f5d_row5_col11, #T_03f5d_row9_col7 {
|
179 |
+
background-color: #46075a;
|
180 |
+
color: #f1f1f1;
|
181 |
+
}
|
182 |
+
#T_03f5d_row5_col14 {
|
183 |
+
background-color: #a0da39;
|
184 |
+
color: #000000;
|
185 |
+
}
|
186 |
+
#T_03f5d_row6_col2 {
|
187 |
+
background-color: #1e9b8a;
|
188 |
+
color: #f1f1f1;
|
189 |
+
}
|
190 |
+
#T_03f5d_row6_col3 {
|
191 |
+
background-color: #228d8d;
|
192 |
+
color: #f1f1f1;
|
193 |
+
}
|
194 |
+
#T_03f5d_row6_col4, #T_03f5d_row6_col7, #T_03f5d_row7_col4, #T_03f5d_row7_col7, #T_03f5d_row8_col7 {
|
195 |
+
background-color: #450457;
|
196 |
+
color: #f1f1f1;
|
197 |
+
}
|
198 |
+
#T_03f5d_row6_col5 {
|
199 |
+
background-color: #6ccd5a;
|
200 |
+
color: #000000;
|
201 |
+
}
|
202 |
+
#T_03f5d_row6_col10 {
|
203 |
+
background-color: #481769;
|
204 |
+
color: #f1f1f1;
|
205 |
+
}
|
206 |
+
#T_03f5d_row6_col11, #T_03f5d_row7_col15, #T_03f5d_row11_col7 {
|
207 |
+
background-color: #471164;
|
208 |
+
color: #f1f1f1;
|
209 |
+
}
|
210 |
+
#T_03f5d_row6_col14 {
|
211 |
+
background-color: #73d056;
|
212 |
+
color: #000000;
|
213 |
+
}
|
214 |
+
#T_03f5d_row6_col16 {
|
215 |
+
background-color: #472e7c;
|
216 |
+
color: #f1f1f1;
|
217 |
+
}
|
218 |
+
#T_03f5d_row7_col2 {
|
219 |
+
background-color: #2ab07f;
|
220 |
+
color: #f1f1f1;
|
221 |
+
}
|
222 |
+
#T_03f5d_row7_col3 {
|
223 |
+
background-color: #20928c;
|
224 |
+
color: #f1f1f1;
|
225 |
+
}
|
226 |
+
#T_03f5d_row7_col5 {
|
227 |
+
background-color: #22a884;
|
228 |
+
color: #f1f1f1;
|
229 |
+
}
|
230 |
+
#T_03f5d_row7_col9, #T_03f5d_row8_col11 {
|
231 |
+
background-color: #453581;
|
232 |
+
color: #f1f1f1;
|
233 |
+
}
|
234 |
+
#T_03f5d_row7_col10 {
|
235 |
+
background-color: #482878;
|
236 |
+
color: #f1f1f1;
|
237 |
+
}
|
238 |
+
#T_03f5d_row7_col11 {
|
239 |
+
background-color: #482374;
|
240 |
+
color: #f1f1f1;
|
241 |
+
}
|
242 |
+
#T_03f5d_row7_col12 {
|
243 |
+
background-color: #482475;
|
244 |
+
color: #f1f1f1;
|
245 |
+
}
|
246 |
+
#T_03f5d_row7_col14 {
|
247 |
+
background-color: #4ec36b;
|
248 |
+
color: #000000;
|
249 |
+
}
|
250 |
+
#T_03f5d_row8_col2, #T_03f5d_row8_col14 {
|
251 |
+
background-color: #52c569;
|
252 |
+
color: #000000;
|
253 |
+
}
|
254 |
+
#T_03f5d_row8_col3 {
|
255 |
+
background-color: #1e9c89;
|
256 |
+
color: #f1f1f1;
|
257 |
+
}
|
258 |
+
#T_03f5d_row8_col4, #T_03f5d_row9_col4 {
|
259 |
+
background-color: #450559;
|
260 |
+
color: #f1f1f1;
|
261 |
+
}
|
262 |
+
#T_03f5d_row8_col5 {
|
263 |
+
background-color: #32b67a;
|
264 |
+
color: #f1f1f1;
|
265 |
+
}
|
266 |
+
#T_03f5d_row8_col10 {
|
267 |
+
background-color: #443b84;
|
268 |
+
color: #f1f1f1;
|
269 |
+
}
|
270 |
+
#T_03f5d_row8_col12, #T_03f5d_row9_col15 {
|
271 |
+
background-color: #472f7d;
|
272 |
+
color: #f1f1f1;
|
273 |
+
}
|
274 |
+
#T_03f5d_row8_col15, #T_03f5d_row9_col8 {
|
275 |
+
background-color: #48186a;
|
276 |
+
color: #f1f1f1;
|
277 |
+
}
|
278 |
+
#T_03f5d_row9_col2 {
|
279 |
+
background-color: #86d549;
|
280 |
+
color: #000000;
|
281 |
+
}
|
282 |
+
#T_03f5d_row9_col3 {
|
283 |
+
background-color: #28ae80;
|
284 |
+
color: #f1f1f1;
|
285 |
+
}
|
286 |
+
#T_03f5d_row9_col5 {
|
287 |
+
background-color: #4ac16d;
|
288 |
+
color: #000000;
|
289 |
+
}
|
290 |
+
#T_03f5d_row9_col9 {
|
291 |
+
background-color: #3a548c;
|
292 |
+
color: #f1f1f1;
|
293 |
+
}
|
294 |
+
#T_03f5d_row9_col10 {
|
295 |
+
background-color: #34618d;
|
296 |
+
color: #f1f1f1;
|
297 |
+
}
|
298 |
+
#T_03f5d_row9_col12 {
|
299 |
+
background-color: #355e8d;
|
300 |
+
color: #f1f1f1;
|
301 |
+
}
|
302 |
+
#T_03f5d_row9_col14 {
|
303 |
+
background-color: #24aa83;
|
304 |
+
color: #f1f1f1;
|
305 |
+
}
|
306 |
+
#T_03f5d_row10_col2 {
|
307 |
+
background-color: #c2df23;
|
308 |
+
color: #000000;
|
309 |
+
}
|
310 |
+
#T_03f5d_row10_col3, #T_03f5d_row10_col12 {
|
311 |
+
background-color: #63cb5f;
|
312 |
+
color: #000000;
|
313 |
+
}
|
314 |
+
#T_03f5d_row10_col5 {
|
315 |
+
background-color: #44bf70;
|
316 |
+
color: #f1f1f1;
|
317 |
+
}
|
318 |
+
#T_03f5d_row10_col9 {
|
319 |
+
background-color: #2e6e8e;
|
320 |
+
color: #f1f1f1;
|
321 |
+
}
|
322 |
+
#T_03f5d_row10_col10 {
|
323 |
+
background-color: #9dd93b;
|
324 |
+
color: #000000;
|
325 |
+
}
|
326 |
+
#T_03f5d_row10_col11 {
|
327 |
+
background-color: #aadc32;
|
328 |
+
color: #000000;
|
329 |
+
}
|
330 |
+
#T_03f5d_row10_col16 {
|
331 |
+
background-color: #3bbb75;
|
332 |
+
color: #f1f1f1;
|
333 |
+
}
|
334 |
+
#T_03f5d_row11_col8 {
|
335 |
+
background-color: #3c508b;
|
336 |
+
color: #f1f1f1;
|
337 |
+
}
|
338 |
+
#T_03f5d_row11_col9 {
|
339 |
+
background-color: #228b8d;
|
340 |
+
color: #f1f1f1;
|
341 |
+
}
|
342 |
+
#T_03f5d_row11_col12 {
|
343 |
+
background-color: #1f988b;
|
344 |
+
color: #f1f1f1;
|
345 |
+
}
|
346 |
+
#T_03f5d_row11_col14 {
|
347 |
+
background-color: #2fb47c;
|
348 |
+
color: #f1f1f1;
|
349 |
+
}
|
350 |
+
#T_03f5d_row11_col15 {
|
351 |
+
background-color: #21918c;
|
352 |
+
color: #f1f1f1;
|
353 |
+
}
|
354 |
+
</style>
|
355 |
+
<table id="T_03f5d">
|
356 |
+
<thead>
|
357 |
+
<tr>
|
358 |
+
<th class="blank level0" > </th>
|
359 |
+
<th id="T_03f5d_level0_col0" class="col_heading level0 col0" >version</th>
|
360 |
+
<th id="T_03f5d_level0_col1" class="col_heading level0 col1" >d_sae</th>
|
361 |
+
<th id="T_03f5d_level0_col2" class="col_heading level0 col2" >layer</th>
|
362 |
+
<th id="T_03f5d_level0_col3" class="col_heading level0 col3" >kl_div_with_sae</th>
|
363 |
+
<th id="T_03f5d_level0_col4" class="col_heading level0 col4" >kl_div_with_ablation</th>
|
364 |
+
<th id="T_03f5d_level0_col5" class="col_heading level0 col5" >ce_loss_with_sae</th>
|
365 |
+
<th id="T_03f5d_level0_col6" class="col_heading level0 col6" >ce_loss_without_sae</th>
|
366 |
+
<th id="T_03f5d_level0_col7" class="col_heading level0 col7" >ce_loss_with_ablation</th>
|
367 |
+
<th id="T_03f5d_level0_col8" class="col_heading level0 col8" >kl_div_score</th>
|
368 |
+
<th id="T_03f5d_level0_col9" class="col_heading level0 col9" >ce_loss_score</th>
|
369 |
+
<th id="T_03f5d_level0_col10" class="col_heading level0 col10" >l2_norm_in</th>
|
370 |
+
<th id="T_03f5d_level0_col11" class="col_heading level0 col11" >l2_norm_out</th>
|
371 |
+
<th id="T_03f5d_level0_col12" class="col_heading level0 col12" >l2_ratio</th>
|
372 |
+
<th id="T_03f5d_level0_col13" class="col_heading level0 col13" >l0</th>
|
373 |
+
<th id="T_03f5d_level0_col14" class="col_heading level0 col14" >l1</th>
|
374 |
+
<th id="T_03f5d_level0_col15" class="col_heading level0 col15" >explained_variance</th>
|
375 |
+
<th id="T_03f5d_level0_col16" class="col_heading level0 col16" >mse</th>
|
376 |
+
<th id="T_03f5d_level0_col17" class="col_heading level0 col17" >total_tokens_evaluated</th>
|
377 |
+
<th id="T_03f5d_level0_col18" class="col_heading level0 col18" >filepath</th>
|
378 |
+
</tr>
|
379 |
+
</thead>
|
380 |
+
<tbody>
|
381 |
+
<tr>
|
382 |
+
<th id="T_03f5d_level0_row0" class="row_heading level0 row0" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_0/metrics.json</th>
|
383 |
+
<td id="T_03f5d_row0_col0" class="data row0 col0" >5</td>
|
384 |
+
<td id="T_03f5d_row0_col1" class="data row0 col1" >32</td>
|
385 |
+
<td id="T_03f5d_row0_col2" class="data row0 col2" >0</td>
|
386 |
+
<td id="T_03f5d_row0_col3" class="data row0 col3" >0.004845</td>
|
387 |
+
<td id="T_03f5d_row0_col4" class="data row0 col4" >3.094083</td>
|
388 |
+
<td id="T_03f5d_row0_col5" class="data row0 col5" >3.605465</td>
|
389 |
+
<td id="T_03f5d_row0_col6" class="data row0 col6" >3.599065</td>
|
390 |
+
<td id="T_03f5d_row0_col7" class="data row0 col7" >6.694649</td>
|
391 |
+
<td id="T_03f5d_row0_col8" class="data row0 col8" >0.998434</td>
|
392 |
+
<td id="T_03f5d_row0_col9" class="data row0 col9" >0.997933</td>
|
393 |
+
<td id="T_03f5d_row0_col10" class="data row0 col10" >29.933449</td>
|
394 |
+
<td id="T_03f5d_row0_col11" class="data row0 col11" >29.601543</td>
|
395 |
+
<td id="T_03f5d_row0_col12" class="data row0 col12" >0.989371</td>
|
396 |
+
<td id="T_03f5d_row0_col13" class="data row0 col13" >32.000000</td>
|
397 |
+
<td id="T_03f5d_row0_col14" class="data row0 col14" >71.211151</td>
|
398 |
+
<td id="T_03f5d_row0_col15" class="data row0 col15" >0.966797</td>
|
399 |
+
<td id="T_03f5d_row0_col16" class="data row0 col16" >21.729292</td>
|
400 |
+
<td id="T_03f5d_row0_col17" class="data row0 col17" >6144.000000</td>
|
401 |
+
<td id="T_03f5d_row0_col18" class="data row0 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_0/metrics.json</td>
|
402 |
+
</tr>
|
403 |
+
<tr>
|
404 |
+
<th id="T_03f5d_level0_row1" class="row_heading level0 row1" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_1/metrics.json</th>
|
405 |
+
<td id="T_03f5d_row1_col0" class="data row1 col0" >5</td>
|
406 |
+
<td id="T_03f5d_row1_col1" class="data row1 col1" >32</td>
|
407 |
+
<td id="T_03f5d_row1_col2" class="data row1 col2" >1</td>
|
408 |
+
<td id="T_03f5d_row1_col3" class="data row1 col3" >0.006601</td>
|
409 |
+
<td id="T_03f5d_row1_col4" class="data row1 col4" >0.051053</td>
|
410 |
+
<td id="T_03f5d_row1_col5" class="data row1 col5" >3.605596</td>
|
411 |
+
<td id="T_03f5d_row1_col6" class="data row1 col6" >3.599065</td>
|
412 |
+
<td id="T_03f5d_row1_col7" class="data row1 col7" >3.652537</td>
|
413 |
+
<td id="T_03f5d_row1_col8" class="data row1 col8" >0.870694</td>
|
414 |
+
<td id="T_03f5d_row1_col9" class="data row1 col9" >0.877862</td>
|
415 |
+
<td id="T_03f5d_row1_col10" class="data row1 col10" >18.973736</td>
|
416 |
+
<td id="T_03f5d_row1_col11" class="data row1 col11" >17.917168</td>
|
417 |
+
<td id="T_03f5d_row1_col12" class="data row1 col12" >0.910649</td>
|
418 |
+
<td id="T_03f5d_row1_col13" class="data row1 col13" >32.000000</td>
|
419 |
+
<td id="T_03f5d_row1_col14" class="data row1 col14" >86.565331</td>
|
420 |
+
<td id="T_03f5d_row1_col15" class="data row1 col15" >0.885442</td>
|
421 |
+
<td id="T_03f5d_row1_col16" class="data row1 col16" >25.637442</td>
|
422 |
+
<td id="T_03f5d_row1_col17" class="data row1 col17" >6144.000000</td>
|
423 |
+
<td id="T_03f5d_row1_col18" class="data row1 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_1/metrics.json</td>
|
424 |
+
</tr>
|
425 |
+
<tr>
|
426 |
+
<th id="T_03f5d_level0_row2" class="row_heading level0 row2" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_2/metrics.json</th>
|
427 |
+
<td id="T_03f5d_row2_col0" class="data row2 col0" >5</td>
|
428 |
+
<td id="T_03f5d_row2_col1" class="data row2 col1" >32</td>
|
429 |
+
<td id="T_03f5d_row2_col2" class="data row2 col2" >2</td>
|
430 |
+
<td id="T_03f5d_row2_col3" class="data row2 col3" >0.009369</td>
|
431 |
+
<td id="T_03f5d_row2_col4" class="data row2 col4" >0.058747</td>
|
432 |
+
<td id="T_03f5d_row2_col5" class="data row2 col5" >3.601879</td>
|
433 |
+
<td id="T_03f5d_row2_col6" class="data row2 col6" >3.599065</td>
|
434 |
+
<td id="T_03f5d_row2_col7" class="data row2 col7" >3.645913</td>
|
435 |
+
<td id="T_03f5d_row2_col8" class="data row2 col8" >0.840524</td>
|
436 |
+
<td id="T_03f5d_row2_col9" class="data row2 col9" >0.939922</td>
|
437 |
+
<td id="T_03f5d_row2_col10" class="data row2 col10" >49.106537</td>
|
438 |
+
<td id="T_03f5d_row2_col11" class="data row2 col11" >47.644482</td>
|
439 |
+
<td id="T_03f5d_row2_col12" class="data row2 col12" >0.888798</td>
|
440 |
+
<td id="T_03f5d_row2_col13" class="data row2 col13" >31.875000</td>
|
441 |
+
<td id="T_03f5d_row2_col14" class="data row2 col14" >85.811630</td>
|
442 |
+
<td id="T_03f5d_row2_col15" class="data row2 col15" >0.974547</td>
|
443 |
+
<td id="T_03f5d_row2_col16" class="data row2 col16" >37.837296</td>
|
444 |
+
<td id="T_03f5d_row2_col17" class="data row2 col17" >6144.000000</td>
|
445 |
+
<td id="T_03f5d_row2_col18" class="data row2 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_2/metrics.json</td>
|
446 |
+
</tr>
|
447 |
+
<tr>
|
448 |
+
<th id="T_03f5d_level0_row3" class="row_heading level0 row3" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_3/metrics.json</th>
|
449 |
+
<td id="T_03f5d_row3_col0" class="data row3 col0" >5</td>
|
450 |
+
<td id="T_03f5d_row3_col1" class="data row3 col1" >32</td>
|
451 |
+
<td id="T_03f5d_row3_col2" class="data row3 col2" >3</td>
|
452 |
+
<td id="T_03f5d_row3_col3" class="data row3 col3" >0.010681</td>
|
453 |
+
<td id="T_03f5d_row3_col4" class="data row3 col4" >0.070592</td>
|
454 |
+
<td id="T_03f5d_row3_col5" class="data row3 col5" >3.609601</td>
|
455 |
+
<td id="T_03f5d_row3_col6" class="data row3 col6" >3.599065</td>
|
456 |
+
<td id="T_03f5d_row3_col7" class="data row3 col7" >3.658678</td>
|
457 |
+
<td id="T_03f5d_row3_col8" class="data row3 col8" >0.848690</td>
|
458 |
+
<td id="T_03f5d_row3_col9" class="data row3 col9" >0.823245</td>
|
459 |
+
<td id="T_03f5d_row3_col10" class="data row3 col10" >16.987318</td>
|
460 |
+
<td id="T_03f5d_row3_col11" class="data row3 col11" >15.157210</td>
|
461 |
+
<td id="T_03f5d_row3_col12" class="data row3 col12" >0.874669</td>
|
462 |
+
<td id="T_03f5d_row3_col13" class="data row3 col13" >31.911459</td>
|
463 |
+
<td id="T_03f5d_row3_col14" class="data row3 col14" >85.938217</td>
|
464 |
+
<td id="T_03f5d_row3_col15" class="data row3 col15" >0.780534</td>
|
465 |
+
<td id="T_03f5d_row3_col16" class="data row3 col16" >50.548058</td>
|
466 |
+
<td id="T_03f5d_row3_col17" class="data row3 col17" >6144.000000</td>
|
467 |
+
<td id="T_03f5d_row3_col18" class="data row3 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_3/metrics.json</td>
|
468 |
+
</tr>
|
469 |
+
<tr>
|
470 |
+
<th id="T_03f5d_level0_row4" class="row_heading level0 row4" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_4/metrics.json</th>
|
471 |
+
<td id="T_03f5d_row4_col0" class="data row4 col0" >5</td>
|
472 |
+
<td id="T_03f5d_row4_col1" class="data row4 col1" >32</td>
|
473 |
+
<td id="T_03f5d_row4_col2" class="data row4 col2" >4</td>
|
474 |
+
<td id="T_03f5d_row4_col3" class="data row4 col3" >0.012658</td>
|
475 |
+
<td id="T_03f5d_row4_col4" class="data row4 col4" >0.063325</td>
|
476 |
+
<td id="T_03f5d_row4_col5" class="data row4 col5" >3.611159</td>
|
477 |
+
<td id="T_03f5d_row4_col6" class="data row4 col6" >3.599065</td>
|
478 |
+
<td id="T_03f5d_row4_col7" class="data row4 col7" >3.660080</td>
|
479 |
+
<td id="T_03f5d_row4_col8" class="data row4 col8" >0.800111</td>
|
480 |
+
<td id="T_03f5d_row4_col9" class="data row4 col9" >0.801781</td>
|
481 |
+
<td id="T_03f5d_row4_col10" class="data row4 col10" >17.251986</td>
|
482 |
+
<td id="T_03f5d_row4_col11" class="data row4 col11" >15.012179</td>
|
483 |
+
<td id="T_03f5d_row4_col12" class="data row4 col12" >0.852544</td>
|
484 |
+
<td id="T_03f5d_row4_col13" class="data row4 col13" >31.955566</td>
|
485 |
+
<td id="T_03f5d_row4_col14" class="data row4 col14" >82.476707</td>
|
486 |
+
<td id="T_03f5d_row4_col15" class="data row4 col15" >0.729496</td>
|
487 |
+
<td id="T_03f5d_row4_col16" class="data row4 col16" >63.704514</td>
|
488 |
+
<td id="T_03f5d_row4_col17" class="data row4 col17" >6144.000000</td>
|
489 |
+
<td id="T_03f5d_row4_col18" class="data row4 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_4/metrics.json</td>
|
490 |
+
</tr>
|
491 |
+
<tr>
|
492 |
+
<th id="T_03f5d_level0_row5" class="row_heading level0 row5" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_5/metrics.json</th>
|
493 |
+
<td id="T_03f5d_row5_col0" class="data row5 col0" >5</td>
|
494 |
+
<td id="T_03f5d_row5_col1" class="data row5 col1" >32</td>
|
495 |
+
<td id="T_03f5d_row5_col2" class="data row5 col2" >5</td>
|
496 |
+
<td id="T_03f5d_row5_col3" class="data row5 col3" >0.014467</td>
|
497 |
+
<td id="T_03f5d_row5_col4" class="data row5 col4" >0.068505</td>
|
498 |
+
<td id="T_03f5d_row5_col5" class="data row5 col5" >3.613976</td>
|
499 |
+
<td id="T_03f5d_row5_col6" class="data row5 col6" >3.599065</td>
|
500 |
+
<td id="T_03f5d_row5_col7" class="data row5 col7" >3.669386</td>
|
501 |
+
<td id="T_03f5d_row5_col8" class="data row5 col8" >0.788825</td>
|
502 |
+
<td id="T_03f5d_row5_col9" class="data row5 col9" >0.787950</td>
|
503 |
+
<td id="T_03f5d_row5_col10" class="data row5 col10" >18.888968</td>
|
504 |
+
<td id="T_03f5d_row5_col11" class="data row5 col11" >16.209919</td>
|
505 |
+
<td id="T_03f5d_row5_col12" class="data row5 col12" >0.848440</td>
|
506 |
+
<td id="T_03f5d_row5_col13" class="data row5 col13" >32.000000</td>
|
507 |
+
<td id="T_03f5d_row5_col14" class="data row5 col14" >81.434013</td>
|
508 |
+
<td id="T_03f5d_row5_col15" class="data row5 col15" >0.717422</td>
|
509 |
+
<td id="T_03f5d_row5_col16" class="data row5 col16" >87.281723</td>
|
510 |
+
<td id="T_03f5d_row5_col17" class="data row5 col17" >6144.000000</td>
|
511 |
+
<td id="T_03f5d_row5_col18" class="data row5 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_5/metrics.json</td>
|
512 |
+
</tr>
|
513 |
+
<tr>
|
514 |
+
<th id="T_03f5d_level0_row6" class="row_heading level0 row6" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_6/metrics.json</th>
|
515 |
+
<td id="T_03f5d_row6_col0" class="data row6 col0" >5</td>
|
516 |
+
<td id="T_03f5d_row6_col1" class="data row6 col1" >32</td>
|
517 |
+
<td id="T_03f5d_row6_col2" class="data row6 col2" >6</td>
|
518 |
+
<td id="T_03f5d_row6_col3" class="data row6 col3" >0.016600</td>
|
519 |
+
<td id="T_03f5d_row6_col4" class="data row6 col4" >0.075694</td>
|
520 |
+
<td id="T_03f5d_row6_col5" class="data row6 col5" >3.618799</td>
|
521 |
+
<td id="T_03f5d_row6_col6" class="data row6 col6" >3.599065</td>
|
522 |
+
<td id="T_03f5d_row6_col7" class="data row6 col7" >3.676516</td>
|
523 |
+
<td id="T_03f5d_row6_col8" class="data row6 col8" >0.780703</td>
|
524 |
+
<td id="T_03f5d_row6_col9" class="data row6 col9" >0.745207</td>
|
525 |
+
<td id="T_03f5d_row6_col10" class="data row6 col10" >21.466564</td>
|
526 |
+
<td id="T_03f5d_row6_col11" class="data row6 col11" >18.402473</td>
|
527 |
+
<td id="T_03f5d_row6_col12" class="data row6 col12" >0.852635</td>
|
528 |
+
<td id="T_03f5d_row6_col13" class="data row6 col13" >32.000000</td>
|
529 |
+
<td id="T_03f5d_row6_col14" class="data row6 col14" >78.829765</td>
|
530 |
+
<td id="T_03f5d_row6_col15" class="data row6 col15" >0.706308</td>
|
531 |
+
<td id="T_03f5d_row6_col16" class="data row6 col16" >117.072495</td>
|
532 |
+
<td id="T_03f5d_row6_col17" class="data row6 col17" >6144.000000</td>
|
533 |
+
<td id="T_03f5d_row6_col18" class="data row6 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_6/metrics.json</td>
|
534 |
+
</tr>
|
535 |
+
<tr>
|
536 |
+
<th id="T_03f5d_level0_row7" class="row_heading level0 row7" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_7/metrics.json</th>
|
537 |
+
<td id="T_03f5d_row7_col0" class="data row7 col0" >5</td>
|
538 |
+
<td id="T_03f5d_row7_col1" class="data row7 col1" >32</td>
|
539 |
+
<td id="T_03f5d_row7_col2" class="data row7 col2" >7</td>
|
540 |
+
<td id="T_03f5d_row7_col3" class="data row7 col3" >0.017010</td>
|
541 |
+
<td id="T_03f5d_row7_col4" class="data row7 col4" >0.080486</td>
|
542 |
+
<td id="T_03f5d_row7_col5" class="data row7 col5" >3.614976</td>
|
543 |
+
<td id="T_03f5d_row7_col6" class="data row7 col6" >3.599065</td>
|
544 |
+
<td id="T_03f5d_row7_col7" class="data row7 col7" >3.672712</td>
|
545 |
+
<td id="T_03f5d_row7_col8" class="data row7 col8" >0.788663</td>
|
546 |
+
<td id="T_03f5d_row7_col9" class="data row7 col9" >0.783952</td>
|
547 |
+
<td id="T_03f5d_row7_col10" class="data row7 col10" >25.444439</td>
|
548 |
+
<td id="T_03f5d_row7_col11" class="data row7 col11" >22.004990</td>
|
549 |
+
<td id="T_03f5d_row7_col12" class="data row7 col12" >0.862489</td>
|
550 |
+
<td id="T_03f5d_row7_col13" class="data row7 col13" >32.000000</td>
|
551 |
+
<td id="T_03f5d_row7_col14" class="data row7 col14" >76.419937</td>
|
552 |
+
<td id="T_03f5d_row7_col15" class="data row7 col15" >0.718003</td>
|
553 |
+
<td id="T_03f5d_row7_col16" class="data row7 col16" >157.791412</td>
|
554 |
+
<td id="T_03f5d_row7_col17" class="data row7 col17" >6144.000000</td>
|
555 |
+
<td id="T_03f5d_row7_col18" class="data row7 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_7/metrics.json</td>
|
556 |
+
</tr>
|
557 |
+
<tr>
|
558 |
+
<th id="T_03f5d_level0_row8" class="row_heading level0 row8" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_8/metrics.json</th>
|
559 |
+
<td id="T_03f5d_row8_col0" class="data row8 col0" >5</td>
|
560 |
+
<td id="T_03f5d_row8_col1" class="data row8 col1" >32</td>
|
561 |
+
<td id="T_03f5d_row8_col2" class="data row8 col2" >8</td>
|
562 |
+
<td id="T_03f5d_row8_col3" class="data row8 col3" >0.018103</td>
|
563 |
+
<td id="T_03f5d_row8_col4" class="data row8 col4" >0.087324</td>
|
564 |
+
<td id="T_03f5d_row8_col5" class="data row8 col5" >3.616245</td>
|
565 |
+
<td id="T_03f5d_row8_col6" class="data row8 col6" >3.599065</td>
|
566 |
+
<td id="T_03f5d_row8_col7" class="data row8 col7" >3.680337</td>
|
567 |
+
<td id="T_03f5d_row8_col8" class="data row8 col8" >0.792688</td>
|
568 |
+
<td id="T_03f5d_row8_col9" class="data row8 col9" >0.788606</td>
|
569 |
+
<td id="T_03f5d_row8_col10" class="data row8 col10" >30.250225</td>
|
570 |
+
<td id="T_03f5d_row8_col11" class="data row8 col11" >26.306936</td>
|
571 |
+
<td id="T_03f5d_row8_col12" class="data row8 col12" >0.867637</td>
|
572 |
+
<td id="T_03f5d_row8_col13" class="data row8 col13" >32.000000</td>
|
573 |
+
<td id="T_03f5d_row8_col14" class="data row8 col14" >76.728195</td>
|
574 |
+
<td id="T_03f5d_row8_col15" class="data row8 col15" >0.723916</td>
|
575 |
+
<td id="T_03f5d_row8_col16" class="data row8 col16" >219.982910</td>
|
576 |
+
<td id="T_03f5d_row8_col17" class="data row8 col17" >6144.000000</td>
|
577 |
+
<td id="T_03f5d_row8_col18" class="data row8 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_8/metrics.json</td>
|
578 |
+
</tr>
|
579 |
+
<tr>
|
580 |
+
<th id="T_03f5d_level0_row9" class="row_heading level0 row9" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_9/metrics.json</th>
|
581 |
+
<td id="T_03f5d_row9_col0" class="data row9 col0" >5</td>
|
582 |
+
<td id="T_03f5d_row9_col1" class="data row9 col1" >32</td>
|
583 |
+
<td id="T_03f5d_row9_col2" class="data row9 col2" >9</td>
|
584 |
+
<td id="T_03f5d_row9_col3" class="data row9 col3" >0.019997</td>
|
585 |
+
<td id="T_03f5d_row9_col4" class="data row9 col4" >0.097589</td>
|
586 |
+
<td id="T_03f5d_row9_col5" class="data row9 col5" >3.617456</td>
|
587 |
+
<td id="T_03f5d_row9_col6" class="data row9 col6" >3.599065</td>
|
588 |
+
<td id="T_03f5d_row9_col7" class="data row9 col7" >3.696245</td>
|
589 |
+
<td id="T_03f5d_row9_col8" class="data row9 col8" >0.795088</td>
|
590 |
+
<td id="T_03f5d_row9_col9" class="data row9 col9" >0.810751</td>
|
591 |
+
<td id="T_03f5d_row9_col10" class="data row9 col10" >40.192413</td>
|
592 |
+
<td id="T_03f5d_row9_col11" class="data row9 col11" >35.945808</td>
|
593 |
+
<td id="T_03f5d_row9_col12" class="data row9 col12" >0.889800</td>
|
594 |
+
<td id="T_03f5d_row9_col13" class="data row9 col13" >32.000000</td>
|
595 |
+
<td id="T_03f5d_row9_col14" class="data row9 col14" >72.426567</td>
|
596 |
+
<td id="T_03f5d_row9_col15" class="data row9 col15" >0.742352</td>
|
597 |
+
<td id="T_03f5d_row9_col16" class="data row9 col16" >318.143433</td>
|
598 |
+
<td id="T_03f5d_row9_col17" class="data row9 col17" >6144.000000</td>
|
599 |
+
<td id="T_03f5d_row9_col18" class="data row9 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_9/metrics.json</td>
|
600 |
+
</tr>
|
601 |
+
<tr>
|
602 |
+
<th id="T_03f5d_level0_row10" class="row_heading level0 row10" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_10/metrics.json</th>
|
603 |
+
<td id="T_03f5d_row10_col0" class="data row10 col0" >5</td>
|
604 |
+
<td id="T_03f5d_row10_col1" class="data row10 col1" >32</td>
|
605 |
+
<td id="T_03f5d_row10_col2" class="data row10 col2" >10</td>
|
606 |
+
<td id="T_03f5d_row10_col3" class="data row10 col3" >0.023115</td>
|
607 |
+
<td id="T_03f5d_row10_col4" class="data row10 col4" >0.126748</td>
|
608 |
+
<td id="T_03f5d_row10_col5" class="data row10 col5" >3.617172</td>
|
609 |
+
<td id="T_03f5d_row10_col6" class="data row10 col6" >3.599065</td>
|
610 |
+
<td id="T_03f5d_row10_col7" class="data row10 col7" >3.708984</td>
|
611 |
+
<td id="T_03f5d_row10_col8" class="data row10 col8" >0.817629</td>
|
612 |
+
<td id="T_03f5d_row10_col9" class="data row10 col9" >0.835264</td>
|
613 |
+
<td id="T_03f5d_row10_col10" class="data row10 col10" >81.756828</td>
|
614 |
+
<td id="T_03f5d_row10_col11" class="data row10 col11" >78.393089</td>
|
615 |
+
<td id="T_03f5d_row10_col12" class="data row10 col12" >0.955360</td>
|
616 |
+
<td id="T_03f5d_row10_col13" class="data row10 col13" >32.000000</td>
|
617 |
+
<td id="T_03f5d_row10_col14" class="data row10 col14" >50.458115</td>
|
618 |
+
<td id="T_03f5d_row10_col15" class="data row10 col15" >0.792657</td>
|
619 |
+
<td id="T_03f5d_row10_col16" class="data row10 col16" >514.553589</td>
|
620 |
+
<td id="T_03f5d_row10_col17" class="data row10 col17" >6144.000000</td>
|
621 |
+
<td id="T_03f5d_row10_col18" class="data row10 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_10/metrics.json</td>
|
622 |
+
</tr>
|
623 |
+
<tr>
|
624 |
+
<th id="T_03f5d_level0_row11" class="row_heading level0 row11" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_11/metrics.json</th>
|
625 |
+
<td id="T_03f5d_row11_col0" class="data row11 col0" >5</td>
|
626 |
+
<td id="T_03f5d_row11_col1" class="data row11 col1" >32</td>
|
627 |
+
<td id="T_03f5d_row11_col2" class="data row11 col2" >11</td>
|
628 |
+
<td id="T_03f5d_row11_col3" class="data row11 col3" >0.028953</td>
|
629 |
+
<td id="T_03f5d_row11_col4" class="data row11 col4" >0.173841</td>
|
630 |
+
<td id="T_03f5d_row11_col5" class="data row11 col5" >3.623718</td>
|
631 |
+
<td id="T_03f5d_row11_col6" class="data row11 col6" >3.599065</td>
|
632 |
+
<td id="T_03f5d_row11_col7" class="data row11 col7" >3.783318</td>
|
633 |
+
<td id="T_03f5d_row11_col8" class="data row11 col8" >0.833454</td>
|
634 |
+
<td id="T_03f5d_row11_col9" class="data row11 col9" >0.866197</td>
|
635 |
+
<td id="T_03f5d_row11_col10" class="data row11 col10" >92.906296</td>
|
636 |
+
<td id="T_03f5d_row11_col11" class="data row11 col11" >87.663773</td>
|
637 |
+
<td id="T_03f5d_row11_col12" class="data row11 col12" >0.923381</td>
|
638 |
+
<td id="T_03f5d_row11_col13" class="data row11 col13" >32.000000</td>
|
639 |
+
<td id="T_03f5d_row11_col14" class="data row11 col14" >73.987030</td>
|
640 |
+
<td id="T_03f5d_row11_col15" class="data row11 col15" >0.840599</td>
|
641 |
+
<td id="T_03f5d_row11_col16" class="data row11 col16" >742.957520</td>
|
642 |
+
<td id="T_03f5d_row11_col17" class="data row11 col17" >6144.000000</td>
|
643 |
+
<td id="T_03f5d_row11_col18" class="data row11 col18" >OAI_GPT2Small_v5_32k_resid_delta_mlp/v5_32k_layer_11/metrics.json</td>
|
644 |
+
</tr>
|
645 |
+
</tbody>
|
646 |
+
</table>
|
benchmark_stats.png
ADDED
Git LFS Details
|
v5_32k_layer_0/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.0.hook_mlp_out", "hook_layer": 0, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_0/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.0048454091884195805, "metrics/kl_div_with_ablation": 3.094083309173584, "metrics/ce_loss_with_sae": 3.6054646968841553, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 6.694648742675781, "metrics/kl_div_score": 0.9984339758486613, "metrics/ce_loss_score": 0.9979325038445924, "metrics/l2_norm_in": 29.933448791503906, "metrics/l2_norm_out": 29.601543426513672, "metrics/l2_ratio": 0.9893707036972046, "metrics/l0": 32.0, "metrics/l1": 71.21115112304688, "metrics/explained_variance": 0.9667970538139343, "metrics/mse": 21.729291915893555, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_0/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab7f7cbb78776f97da06167e5297aa3d045a8fc83eda19e91076e4b98ce2934f
|
3 |
+
size 201461056
|
v5_32k_layer_0/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ead023e8744163c7b1fd85e383df10bf880155bea44ad396ac7182c5a2890d81
|
3 |
+
size 131152
|
v5_32k_layer_1/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.1.hook_mlp_out", "hook_layer": 1, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_1/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.006601419299840927, "metrics/kl_div_with_ablation": 0.05105271190404892, "metrics/ce_loss_with_sae": 3.605595588684082, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6525371074676514, "metrics/kl_div_score": 0.8706940522131719, "metrics/ce_loss_score": 0.8778624933119316, "metrics/l2_norm_in": 18.973735809326172, "metrics/l2_norm_out": 17.91716766357422, "metrics/l2_ratio": 0.910649299621582, "metrics/l0": 32.0, "metrics/l1": 86.5653305053711, "metrics/explained_variance": 0.8854424357414246, "metrics/mse": 25.637441635131836, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_1/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fe30891436eb0a90f46ebc5850c61ccba4022a2acaca044335f7c740f746dba
|
3 |
+
size 201461056
|
v5_32k_layer_1/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:302f9ced1609a7507d856f4d3e8c31bae60f1db043c13595c527427f6c7e7f02
|
3 |
+
size 131152
|
v5_32k_layer_10/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.10.hook_mlp_out", "hook_layer": 10, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_10/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.02311515063047409, "metrics/kl_div_with_ablation": 0.12674781680107117, "metrics/ce_loss_with_sae": 3.6171722412109375, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.708984136581421, "metrics/kl_div_score": 0.8176288064452188, "metrics/ce_loss_score": 0.8352644912761693, "metrics/l2_norm_in": 81.75682830810547, "metrics/l2_norm_out": 78.3930892944336, "metrics/l2_ratio": 0.9553598165512085, "metrics/l0": 32.0, "metrics/l1": 50.45811462402344, "metrics/explained_variance": 0.7926573753356934, "metrics/mse": 514.5535888671875, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_10/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c676f5187854407e5c6f79d7db14fafe6bf3a662a9883e7fa442effa194d6d04
|
3 |
+
size 201461056
|
v5_32k_layer_10/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7164ca2c04e1c69c8662b4b0a105ee61196163ffbb1ab4eea3e1875644c25895
|
3 |
+
size 131152
|
v5_32k_layer_11/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.11.hook_mlp_out", "hook_layer": 11, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_11/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.028952505439519882, "metrics/kl_div_with_ablation": 0.17384076118469238, "metrics/ce_loss_with_sae": 3.62371826171875, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.783318281173706, "metrics/kl_div_score": 0.8334538732906256, "metrics/ce_loss_score": 0.8661971284238421, "metrics/l2_norm_in": 92.90629577636719, "metrics/l2_norm_out": 87.66377258300781, "metrics/l2_ratio": 0.9233807325363159, "metrics/l0": 32.0, "metrics/l1": 73.98703002929688, "metrics/explained_variance": 0.840599000453949, "metrics/mse": 742.95751953125, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_11/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72876f75e313f3785999392dddef0afe7b2dc44f6a0671b007dd1dcd176ee8c5
|
3 |
+
size 201461056
|
v5_32k_layer_11/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26db3acaed898f49efb23762df7d70847bc97c5da288926681567dc89f8107a9
|
3 |
+
size 131152
|
v5_32k_layer_2/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.2.hook_mlp_out", "hook_layer": 2, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_2/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.00936876516789198, "metrics/kl_div_with_ablation": 0.05874736234545708, "metrics/ce_loss_with_sae": 3.601879119873047, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6459126472473145, "metrics/kl_div_score": 0.8405244968650671, "metrics/ce_loss_score": 0.9399221354232932, "metrics/l2_norm_in": 49.106536865234375, "metrics/l2_norm_out": 47.64448165893555, "metrics/l2_ratio": 0.8887979388237, "metrics/l0": 31.875, "metrics/l1": 85.81163024902344, "metrics/explained_variance": 0.9745470285415649, "metrics/mse": 37.83729553222656, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_2/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80a546f38807810c98365c13fdf6ba7c883b2f177fec3d79963dd869992f3d9f
|
3 |
+
size 201461056
|
v5_32k_layer_2/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f0e1dcea111995b08893a3af42c3de1444f9ac0b12dc28c7df6d88125e51ee8
|
3 |
+
size 131152
|
v5_32k_layer_3/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.3.hook_mlp_out", "hook_layer": 3, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_3/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.010681239888072014, "metrics/kl_div_with_ablation": 0.07059153914451599, "metrics/ce_loss_with_sae": 3.6096014976501465, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.658677577972412, "metrics/kl_div_score": 0.8486895169376428, "metrics/ce_loss_score": 0.823244745735597, "metrics/l2_norm_in": 16.98731803894043, "metrics/l2_norm_out": 15.157210350036621, "metrics/l2_ratio": 0.8746687173843384, "metrics/l0": 31.91145896911621, "metrics/l1": 85.93821716308594, "metrics/explained_variance": 0.7805342078208923, "metrics/mse": 50.548057556152344, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_3/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb781b7a737037ffe78ab1bdca78ba1af47bb297c189ebd9902c60a3fd5f333f
|
3 |
+
size 201461056
|
v5_32k_layer_3/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26543a937a617417c20fe430ff78e4e031b1b941d59cc80b4db96b4ef7c784a5
|
3 |
+
size 131152
|
v5_32k_layer_4/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.4.hook_mlp_out", "hook_layer": 4, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_4/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.012658017687499523, "metrics/kl_div_with_ablation": 0.06332532316446304, "metrics/ce_loss_with_sae": 3.611159086227417, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6600804328918457, "metrics/kl_div_score": 0.8001112816333331, "metrics/ce_loss_score": 0.8017810322797447, "metrics/l2_norm_in": 17.251985549926758, "metrics/l2_norm_out": 15.012179374694824, "metrics/l2_ratio": 0.8525444865226746, "metrics/l0": 31.95556640625, "metrics/l1": 82.4767074584961, "metrics/explained_variance": 0.7294961810112, "metrics/mse": 63.70451354980469, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_4/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17f173d1919c9c9f53f3a956c96aebe863b0cc90f964c74b13d0056553a09ab3
|
3 |
+
size 201461056
|
v5_32k_layer_4/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc4ef7f40430139c30bc32dbce3fbb64304191924edf791b404eb896583dec26
|
3 |
+
size 131152
|
v5_32k_layer_5/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.5.hook_mlp_out", "hook_layer": 5, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_5/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.014466611668467522, "metrics/kl_div_with_ablation": 0.06850520521402359, "metrics/ce_loss_with_sae": 3.613976240158081, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6693859100341797, "metrics/kl_div_score": 0.7888246356861349, "metrics/ce_loss_score": 0.7879497811486054, "metrics/l2_norm_in": 18.888967514038086, "metrics/l2_norm_out": 16.209918975830078, "metrics/l2_ratio": 0.8484395742416382, "metrics/l0": 32.0, "metrics/l1": 81.43401336669922, "metrics/explained_variance": 0.7174215316772461, "metrics/mse": 87.28172302246094, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_5/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a5dbd05b4cb8acc8150fa92853ab5c42373121b1bc35bc108ffac1c6cc73ad4
|
3 |
+
size 201461056
|
v5_32k_layer_5/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d092024c813ffab3ab07a3d24630235d7d4348f5b4d68b3d737ad926b4d50022
|
3 |
+
size 131152
|
v5_32k_layer_6/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.6.hook_mlp_out", "hook_layer": 6, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_6/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.016599537804722786, "metrics/kl_div_with_ablation": 0.07569437474012375, "metrics/ce_loss_with_sae": 3.6187987327575684, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.67651629447937, "metrics/kl_div_score": 0.7807031518298047, "metrics/ce_loss_score": 0.7452071071490137, "metrics/l2_norm_in": 21.466564178466797, "metrics/l2_norm_out": 18.40247344970703, "metrics/l2_ratio": 0.852634608745575, "metrics/l0": 32.0, "metrics/l1": 78.82976531982422, "metrics/explained_variance": 0.7063077688217163, "metrics/mse": 117.07249450683594, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_6/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3105f1072fa405f49c442269f43c553fbdc3e6526b6bc0445eaa6c5cd683838b
|
3 |
+
size 201461056
|
v5_32k_layer_6/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71eaa2c4d06116f1c4001fc6c162b791a70bcc72f5abccfe6abb5aa870dfe8d9
|
3 |
+
size 131152
|
v5_32k_layer_7/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.7.hook_mlp_out", "hook_layer": 7, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_7/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.017009764909744263, "metrics/kl_div_with_ablation": 0.08048636466264725, "metrics/ce_loss_with_sae": 3.614975929260254, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6727118492126465, "metrics/kl_div_score": 0.7886627756013151, "metrics/ce_loss_score": 0.7839520361024154, "metrics/l2_norm_in": 25.444438934326172, "metrics/l2_norm_out": 22.004989624023438, "metrics/l2_ratio": 0.8624889254570007, "metrics/l0": 32.0, "metrics/l1": 76.41993713378906, "metrics/explained_variance": 0.7180025577545166, "metrics/mse": 157.79141235351562, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_7/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:062c7f24dd951d7780e3000d052e82f091564685418cd3c07c61717f23616ee3
|
3 |
+
size 201461056
|
v5_32k_layer_7/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64ac63219db967150e3ba33adb0fdbae4328f4c0b3259fc34f6c8617fba230d9
|
3 |
+
size 131152
|
v5_32k_layer_8/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.8.hook_mlp_out", "hook_layer": 8, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_8/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.01810324750840664, "metrics/kl_div_with_ablation": 0.08732372522354126, "metrics/ce_loss_with_sae": 3.6162450313568115, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6803367137908936, "metrics/kl_div_score": 0.792688098657451, "metrics/ce_loss_score": 0.7886059610420089, "metrics/l2_norm_in": 30.250225067138672, "metrics/l2_norm_out": 26.306936264038086, "metrics/l2_ratio": 0.8676368594169617, "metrics/l0": 32.0, "metrics/l1": 76.72819519042969, "metrics/explained_variance": 0.7239155769348145, "metrics/mse": 219.98291015625, "metrics/total_tokens_evaluated": 6144}
|
v5_32k_layer_8/sae_weights.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33f7d0d26aabda775fb2fe1fb8ae9e416e0a4cc92e12e491750d595294d320f1
|
3 |
+
size 201461056
|
v5_32k_layer_8/sparsity.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdd2f0c4c36e44ec05d021122bb7d7435ef8cfefe737018b58f5150987c84fea
|
3 |
+
size 131152
|
v5_32k_layer_9/cfg.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"architecture": "standard", "d_in": 768, "d_sae": 32768, "dtype": "torch.float32", "device": "cuda", "model_name": "gpt2-small", "hook_name": "blocks.9.hook_mlp_out", "hook_layer": 9, "hook_head_index": null, "activation_fn_str": "topk", "activation_fn_kwargs": {"k": 32}, "apply_b_dec_to_input": true, "finetuning_scaling_factor": false, "sae_lens_training_version": null, "prepend_bos": false, "dataset_path": "Skylion007/openwebtext", "dataset_trust_remote_code": true, "context_size": 64, "normalize_activations": "layer_norm"}
|
v5_32k_layer_9/metrics.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metrics/kl_div_with_sae": 0.01999707892537117, "metrics/kl_div_with_ablation": 0.09758877754211426, "metrics/ce_loss_with_sae": 3.6174559593200684, "metrics/ce_loss_without_sae": 3.599064588546753, "metrics/ce_loss_with_ablation": 3.6962451934814453, "metrics/kl_div_score": 0.7950883346526042, "metrics/ce_loss_score": 0.8107506041388108, "metrics/l2_norm_in": 40.192413330078125, "metrics/l2_norm_out": 35.94580841064453, "metrics/l2_ratio": 0.8897998332977295, "metrics/l0": 32.0, "metrics/l1": 72.42656707763672, "metrics/explained_variance": 0.7423521876335144, "metrics/mse": 318.1434326171875, "metrics/total_tokens_evaluated": 6144}
|