Rename folders
Browse files
    	
        Llama3_1-8B-Base-L21R-32x/hyperparams.json
    ADDED
    
    | 
         @@ -0,0 +1,35 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {
         
     | 
| 2 | 
         
            +
                "device": "cuda:0",
         
     | 
| 3 | 
         
            +
                "seed": 42,
         
     | 
| 4 | 
         
            +
                "dtype": "torch.bfloat16",
         
     | 
| 5 | 
         
            +
                "hook_point_in": "blocks.21.hook_resid_post",
         
     | 
| 6 | 
         
            +
                "hook_point_out": "blocks.21.hook_resid_post",
         
     | 
| 7 | 
         
            +
                "use_decoder_bias": true,
         
     | 
| 8 | 
         
            +
                "apply_decoder_bias_to_pre_encoder": false,
         
     | 
| 9 | 
         
            +
                "expansion_factor": 32,
         
     | 
| 10 | 
         
            +
                "d_model": 4096,
         
     | 
| 11 | 
         
            +
                "d_sae": 131072,
         
     | 
| 12 | 
         
            +
                "bias_init_method": "all_zero",
         
     | 
| 13 | 
         
            +
                "act_fn": "jumprelu",
         
     | 
| 14 | 
         
            +
                "jump_relu_threshold": 0.60546875,
         
     | 
| 15 | 
         
            +
                "norm_activation": "dataset-wise",
         
     | 
| 16 | 
         
            +
                "dataset_average_activation_norm": {
         
     | 
| 17 | 
         
            +
                    "in": 21.5,
         
     | 
| 18 | 
         
            +
                    "out": 21.5
         
     | 
| 19 | 
         
            +
                },
         
     | 
| 20 | 
         
            +
                "decoder_exactly_fixed_norm": false,
         
     | 
| 21 | 
         
            +
                "sparsity_include_decoder_norm": true,
         
     | 
| 22 | 
         
            +
                "use_glu_encoder": false,
         
     | 
| 23 | 
         
            +
                "init_decoder_norm": 0.5,
         
     | 
| 24 | 
         
            +
                "init_encoder_norm": null,
         
     | 
| 25 | 
         
            +
                "init_encoder_with_decoder_transpose": true,
         
     | 
| 26 | 
         
            +
                "lp": 1,
         
     | 
| 27 | 
         
            +
                "l1_coefficient": 8e-05,
         
     | 
| 28 | 
         
            +
                "l1_coefficient_warmup_steps": 78125,
         
     | 
| 29 | 
         
            +
                "top_k": 50,
         
     | 
| 30 | 
         
            +
                "k_warmup_steps": 78125,
         
     | 
| 31 | 
         
            +
                "use_batch_norm_mse": true,
         
     | 
| 32 | 
         
            +
                "use_ghost_grads": false,
         
     | 
| 33 | 
         
            +
                "tp_size": 1,
         
     | 
| 34 | 
         
            +
                "ddp_size": 1
         
     | 
| 35 | 
         
            +
            }
         
     |