SAELens
Tom Lieberum commited on
Commit
745af8a
1 Parent(s): d0d2353

9B: Add sparsity lambdas for residual stream and clean up the feature splitting suite so that we only have SAEs with learning rate 7e-5

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. layer_0/width_131k/average_l0_11/hparams.json +1 -0
  2. layer_0/width_131k/average_l0_15/hparams.json +1 -0
  3. layer_0/width_131k/average_l0_21/hparams.json +1 -0
  4. layer_0/width_131k/average_l0_30/hparams.json +1 -0
  5. layer_0/width_131k/average_l0_41/hparams.json +1 -0
  6. layer_0/width_131k/average_l0_8/hparams.json +1 -0
  7. layer_0/width_16k/average_l0_11/hparams.json +1 -0
  8. layer_0/width_16k/average_l0_129/hparams.json +1 -0
  9. layer_0/width_16k/average_l0_17/hparams.json +1 -0
  10. layer_0/width_16k/average_l0_35/hparams.json +1 -0
  11. layer_0/width_16k/average_l0_68/hparams.json +1 -0
  12. layer_1/width_131k/average_l0_13/hparams.json +1 -0
  13. layer_1/width_131k/average_l0_20/hparams.json +1 -0
  14. layer_1/width_131k/average_l0_33/hparams.json +1 -0
  15. layer_1/width_131k/average_l0_56/hparams.json +1 -0
  16. layer_1/width_131k/average_l0_6/hparams.json +1 -0
  17. layer_1/width_131k/average_l0_9/hparams.json +1 -0
  18. layer_1/width_16k/average_l0_15/hparams.json +1 -0
  19. layer_1/width_16k/average_l0_175/hparams.json +1 -0
  20. layer_1/width_16k/average_l0_31/hparams.json +1 -0
  21. layer_1/width_16k/average_l0_69/hparams.json +1 -0
  22. layer_1/width_16k/average_l0_9/hparams.json +1 -0
  23. layer_10/width_131k/average_l0_15/hparams.json +1 -0
  24. layer_10/width_131k/average_l0_151/hparams.json +1 -0
  25. layer_10/width_131k/average_l0_27/hparams.json +1 -0
  26. layer_10/width_131k/average_l0_47/hparams.json +1 -0
  27. layer_10/width_131k/average_l0_84/hparams.json +1 -0
  28. layer_10/width_131k/average_l0_9/hparams.json +1 -0
  29. layer_10/width_16k/average_l0_10/hparams.json +1 -0
  30. layer_10/width_16k/average_l0_113/hparams.json +1 -0
  31. layer_10/width_16k/average_l0_17/hparams.json +1 -0
  32. layer_10/width_16k/average_l0_243/hparams.json +1 -0
  33. layer_10/width_16k/average_l0_31/hparams.json +1 -0
  34. layer_10/width_16k/average_l0_57/hparams.json +1 -0
  35. layer_11/width_131k/average_l0_16/hparams.json +1 -0
  36. layer_11/width_131k/average_l0_162/hparams.json +1 -0
  37. layer_11/width_131k/average_l0_27/hparams.json +1 -0
  38. layer_11/width_131k/average_l0_49/hparams.json +1 -0
  39. layer_11/width_131k/average_l0_88/hparams.json +1 -0
  40. layer_11/width_131k/average_l0_9/hparams.json +1 -0
  41. layer_11/width_16k/average_l0_10/hparams.json +1 -0
  42. layer_11/width_16k/average_l0_118/hparams.json +1 -0
  43. layer_11/width_16k/average_l0_18/hparams.json +1 -0
  44. layer_11/width_16k/average_l0_255/hparams.json +1 -0
  45. layer_11/width_16k/average_l0_32/hparams.json +1 -0
  46. layer_11/width_16k/average_l0_60/hparams.json +1 -0
  47. layer_12/width_131k/average_l0_10/hparams.json +1 -0
  48. layer_12/width_131k/average_l0_17/hparams.json +1 -0
  49. layer_12/width_131k/average_l0_183/hparams.json +1 -0
  50. layer_12/width_131k/average_l0_29/hparams.json +1 -0
layer_0/width_131k/average_l0_11/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.002}
layer_0/width_131k/average_l0_15/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.001}
layer_0/width_131k/average_l0_21/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.0005}
layer_0/width_131k/average_l0_30/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.00025}
layer_0/width_131k/average_l0_41/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.000125}
layer_0/width_131k/average_l0_8/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.004}
layer_0/width_16k/average_l0_11/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.002}
layer_0/width_16k/average_l0_129/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.000125}
layer_0/width_16k/average_l0_17/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.001}
layer_0/width_16k/average_l0_35/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.0005}
layer_0/width_16k/average_l0_68/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.00025}
layer_1/width_131k/average_l0_13/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.001}
layer_1/width_131k/average_l0_20/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.0005}
layer_1/width_131k/average_l0_33/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.00025}
layer_1/width_131k/average_l0_56/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.000125}
layer_1/width_131k/average_l0_6/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.004}
layer_1/width_131k/average_l0_9/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.002}
layer_1/width_16k/average_l0_15/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.001}
layer_1/width_16k/average_l0_175/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.000125}
layer_1/width_16k/average_l0_31/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.0005}
layer_1/width_16k/average_l0_69/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.00025}
layer_1/width_16k/average_l0_9/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.002}
layer_10/width_131k/average_l0_15/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.002}
layer_10/width_131k/average_l0_151/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.000125}
layer_10/width_131k/average_l0_27/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.001}
layer_10/width_131k/average_l0_47/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.0005}
layer_10/width_131k/average_l0_84/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.00025}
layer_10/width_131k/average_l0_9/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.004}
layer_10/width_16k/average_l0_10/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.004}
layer_10/width_16k/average_l0_113/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.00025}
layer_10/width_16k/average_l0_17/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.002}
layer_10/width_16k/average_l0_243/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.000125}
layer_10/width_16k/average_l0_31/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.001}
layer_10/width_16k/average_l0_57/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.0005}
layer_11/width_131k/average_l0_16/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.002}
layer_11/width_131k/average_l0_162/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.000125}
layer_11/width_131k/average_l0_27/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.001}
layer_11/width_131k/average_l0_49/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.0005}
layer_11/width_131k/average_l0_88/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.00025}
layer_11/width_131k/average_l0_9/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.004}
layer_11/width_16k/average_l0_10/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.004}
layer_11/width_16k/average_l0_118/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.00025}
layer_11/width_16k/average_l0_18/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.002}
layer_11/width_16k/average_l0_255/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.000125}
layer_11/width_16k/average_l0_32/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.001}
layer_11/width_16k/average_l0_60/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.0005}
layer_12/width_131k/average_l0_10/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.004}
layer_12/width_131k/average_l0_17/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.002}
layer_12/width_131k/average_l0_183/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.000125}
layer_12/width_131k/average_l0_29/hparams.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"sparsity_lambda": 0.001}