cybershiptrooper iarcuschin commited on
Commit
c0a69c0
1 Parent(s): 43c361b

Update metadata (#5)

Browse files

- Update metadata (2aba64721e770b874c805c30afb2437c05a17a42)


Co-authored-by: Ivan Arcuschin <[email protected]>

benchmark_cases_metadata.csv CHANGED
@@ -1,19 +1,19 @@
1
- case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,training_args_file_url,weights_file_url,circuit_file_url,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.checkpoint_index,transformer_cfg.checkpoint_label_type,transformer_cfg.checkpoint_value,transformer_cfg.tokenizer_name,transformer_cfg.window_size,transformer_cfg.attn_types,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.device,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.rotary_dim,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.n_key_value_heads,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs
2
- 11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1460593486680443,True,False,standard,False,5.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
3
- 13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
4
- 18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
5
- 19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,32.0,15.0,8.0,custom,4.0,128.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.15689290811054724,True,False,standard,False,3.0,False,,24576.0,False,False,True,torch.float32,,,False,10000.0,False,False
6
- 20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg_1110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta_1110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_1110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,14.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.16,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
7
- 21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
8
- 24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
9
- 3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg_10110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta_10110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_10110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,12.0,5.0,3.0,custom,4.0,48.0,gelu,6.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.22188007849009167,True,False,standard,False,1.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
10
- 33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17457431218879393,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
11
- 34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.16329931618554522,True,False,standard,False,5.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
12
- 35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
13
- 36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg_10110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta_10110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_10110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.19402850002906638,True,False,standard,False,3.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
14
- 37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
15
- 38,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/38,Checks if tokens alternate between two types.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,2.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
16
- 4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17056057308448835,True,False,standard,False,1.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
17
- 8,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8,Fills gaps between tokens with a specified filler.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.13333333333333333,True,False,standard,False,8.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
18
- ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect object identification,16,16,,,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth,,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
19
- ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,Indirect object identification,16,16,,,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_100_100_40.pth,,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
 
1
+ case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,training_args_file_url,weights_file_url,circuit_file_url,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.tokenizer_name,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs,training_args.model_pair,training_args.next_token,training_args.non_ioi_thresh,training_args.use_per_token_check,training_args.batch_size,training_args.num_workers,training_args.early_stop,training_args.scheduler_val_metric,training_args.scheduler_mode
2
+ 11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1460593486680443,True,False,standard,False,5,False,3456,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
3
+ 13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2,20,10,5,custom,4,80,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3,False,9600,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
4
+ 18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,26,10,6,custom,4,104,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3,False,15808,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
5
+ 19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,32,15,8,custom,4,128,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.15689290811054724,True,False,standard,False,3,False,24576,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
6
+ 20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,13,10,3,custom,4,52,gelu,14,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16,True,False,standard,False,2,False,3952,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
7
+ 21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,0.05,0.0005,False,1.0,1.0,0.5,2000.0,gelu,0.1,,4,50,10,12,custom,4,200,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.09847319278346618,True,False,standard,False,3,False,118400,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
8
+ 26,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26,Creates a cascading effect by repeating each token in sequence incrementally.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,21,10,5,custom,4,84,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.12344267996967354,True,False,standard,False,27,False,10416,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
9
+ 29,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29,Creates abbreviations for each token in the sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,13,10,3,custom,4,52,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3952,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
10
+ 3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2,12,5,3,custom,4,48,gelu,6,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.22188007849009167,True,False,standard,False,1,False,3456,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
11
+ 33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,4,10,1,custom,4,16,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17457431218879393,True,False,standard,False,2,False,384,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
12
+ 34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,16,10,4,custom,4,64,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.16329931618554522,True,False,standard,False,5,False,6144,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
13
+ 35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,9,10,2,custom,4,36,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,1872,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
14
+ 36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,6,10,1,custom,4,24,gelu,5,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.19402850002906638,True,False,standard,False,3,False,768,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
15
+ 37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,0.05,0.01,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2,12,10,3,custom,4,48,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.1539600717839002,True,False,standard,False,8,False,3456,False,False,True,torch.float32,False,False,10000,False,False,strict,True,,True,,,True,,
16
+ 4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2,20,10,5,custom,4,80,gelu,7,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.17056057308448835,True,False,standard,False,1,False,9600,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
17
+ 8,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8,Fills gaps between tokens with a specified filler.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2,20,10,5,custom,4,80,gelu,10,1e-05,True,True,True,True,False,False,,False,,gpt2,,1,causal,False,0.0,0.13333333333333333,True,False,standard,False,8,False,9600,False,False,True,torch.float32,False,False,10000,False,False,,True,,True,,,True,,
18
+ ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect object identification,16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,,,1.0,,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,,True,0.65,False,256.0,0.0,True,"val/accuracy,val/IIA",max
19
+ ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,Indirect object identification,16,16,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,,,1.0,,6,64,1024,16,gpt2,4,3072,gelu_new,50257,1e-05,False,True,False,False,False,False,GPT2LMHeadModel,False,gpt2,gpt2,LNPre,1,causal,False,,0.02886751345948129,True,False,standard,False,50257,False,2457600,False,False,True,torch.float32,False,False,10000,False,False,,True,0.65,False,256.0,0.0,True,"val/accuracy,val/IIA",max
benchmark_cases_metadata.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d42203edfeb52102b4df24aecf54b5a51c9c4f547b6cede024422c898564f69f
3
- size 56701
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:568194933b940c4c03457b1c64a8cb074943dfd075ff83f06e84a6376e3a8dcf
3
+ size 58286
benchmark_metadata.json CHANGED
@@ -27,16 +27,16 @@
27
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
28
  },
29
  {
30
- "file_name": "ll_model_510.pth",
31
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_510.pth"
32
  },
33
  {
34
- "file_name": "ll_model_cfg_510.pkl",
35
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg_510.pkl"
36
  },
37
  {
38
- "file_name": "meta_510.json",
39
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta_510.json"
40
  }
41
  ],
42
  "transformer_cfg": {
@@ -66,7 +66,6 @@
66
  "attn_types": null,
67
  "init_mode": "gpt2",
68
  "normalization_type": null,
69
- "device": "cpu",
70
  "n_devices": 1,
71
  "attention_dir": "causal",
72
  "attn_only": false,
@@ -91,7 +90,7 @@
91
  "trust_remote_code": false,
92
  "rotary_adjacent_pairs": false
93
  },
94
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg_510.pkl",
95
  "training_args": {
96
  "atol": 0.05,
97
  "lr": 0.01,
@@ -104,8 +103,8 @@
104
  "clip_grad_norm": 1.0,
105
  "lr_scheduler": ""
106
  },
107
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta_510.json",
108
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_510.pth",
109
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
110
  },
111
  {
@@ -125,16 +124,16 @@
125
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
126
  },
127
  {
128
- "file_name": "ll_model_510.pth",
129
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_510.pth"
130
  },
131
  {
132
- "file_name": "ll_model_cfg_510.pkl",
133
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg_510.pkl"
134
  },
135
  {
136
- "file_name": "meta_510.json",
137
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta_510.json"
138
  }
139
  ],
140
  "transformer_cfg": {
@@ -164,7 +163,6 @@
164
  "attn_types": null,
165
  "init_mode": "gpt2",
166
  "normalization_type": null,
167
- "device": "cpu",
168
  "n_devices": 1,
169
  "attention_dir": "bidirectional",
170
  "attn_only": false,
@@ -189,7 +187,7 @@
189
  "trust_remote_code": false,
190
  "rotary_adjacent_pairs": false
191
  },
192
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg_510.pkl",
193
  "training_args": {
194
  "atol": 0.05,
195
  "lr": 0.01,
@@ -202,8 +200,8 @@
202
  "clip_grad_norm": 1.0,
203
  "lr_scheduler": ""
204
  },
205
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta_510.json",
206
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_510.pth",
207
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
208
  },
209
  {
@@ -225,26 +223,26 @@
225
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
226
  },
227
  {
228
- "file_name": "ll_model_510.pth",
229
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_510.pth"
230
  },
231
  {
232
- "file_name": "ll_model_cfg_510.pkl",
233
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg_510.pkl"
234
  },
235
  {
236
- "file_name": "meta_510.json",
237
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta_510.json"
238
  }
239
  ],
240
  "transformer_cfg": {
241
  "n_layers": 2,
242
- "d_model": 12,
243
  "n_ctx": 10,
244
- "d_head": 3,
245
  "model_name": "custom",
246
  "n_heads": 4,
247
- "d_mlp": 48,
248
  "act_fn": "gelu",
249
  "d_vocab": 7,
250
  "eps": 1e-05,
@@ -264,7 +262,6 @@
264
  "attn_types": null,
265
  "init_mode": "gpt2",
266
  "normalization_type": null,
267
- "device": "cpu",
268
  "n_devices": 1,
269
  "attention_dir": "bidirectional",
270
  "attn_only": false,
@@ -277,7 +274,7 @@
277
  "d_vocab_out": 3,
278
  "parallel_attn_mlp": false,
279
  "rotary_dim": null,
280
- "n_params": 3456,
281
  "use_hook_tokens": false,
282
  "gated_mlp": false,
283
  "default_prepend_bos": true,
@@ -289,21 +286,22 @@
289
  "trust_remote_code": false,
290
  "rotary_adjacent_pairs": false
291
  },
292
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg_510.pkl",
293
  "training_args": {
294
  "atol": 0.05,
295
  "lr": 0.001,
296
  "use_single_loss": false,
297
  "iit_weight": 1.0,
298
  "behavior_weight": 1.0,
299
- "strict_weight": 0.4,
300
  "epochs": 2000,
301
  "act_fn": "gelu",
302
  "clip_grad_norm": 0.1,
303
- "lr_scheduler": ""
 
304
  },
305
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta_510.json",
306
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_510.pth",
307
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
308
  },
309
  {
@@ -323,16 +321,16 @@
323
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
324
  },
325
  {
326
- "file_name": "ll_model_510.pth",
327
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_510.pth"
328
  },
329
  {
330
- "file_name": "ll_model_cfg_510.pkl",
331
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg_510.pkl"
332
  },
333
  {
334
- "file_name": "meta_510.json",
335
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta_510.json"
336
  }
337
  ],
338
  "transformer_cfg": {
@@ -362,7 +360,6 @@
362
  "attn_types": null,
363
  "init_mode": "gpt2",
364
  "normalization_type": null,
365
- "device": "cpu",
366
  "n_devices": 1,
367
  "attention_dir": "causal",
368
  "attn_only": false,
@@ -387,7 +384,7 @@
387
  "trust_remote_code": false,
388
  "rotary_adjacent_pairs": false
389
  },
390
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg_510.pkl",
391
  "training_args": {
392
  "atol": 0.05,
393
  "lr": 0.001,
@@ -400,8 +397,8 @@
400
  "clip_grad_norm": 0.1,
401
  "lr_scheduler": ""
402
  },
403
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta_510.json",
404
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_510.pth",
405
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
406
  },
407
  {
@@ -430,26 +427,26 @@
430
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
431
  },
432
  {
433
- "file_name": "ll_model_1110.pth",
434
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_1110.pth"
435
  },
436
  {
437
- "file_name": "ll_model_cfg_1110.pkl",
438
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg_1110.pkl"
439
  },
440
  {
441
- "file_name": "meta_1110.json",
442
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta_1110.json"
443
  }
444
  ],
445
  "transformer_cfg": {
446
  "n_layers": 2,
447
- "d_model": 4,
448
  "n_ctx": 10,
449
- "d_head": 1,
450
  "model_name": "custom",
451
  "n_heads": 4,
452
- "d_mlp": 16,
453
  "act_fn": "gelu",
454
  "d_vocab": 14,
455
  "eps": 1e-05,
@@ -469,7 +466,6 @@
469
  "attn_types": null,
470
  "init_mode": "gpt2",
471
  "normalization_type": null,
472
- "device": "cuda",
473
  "n_devices": 1,
474
  "attention_dir": "causal",
475
  "attn_only": false,
@@ -482,7 +478,7 @@
482
  "d_vocab_out": 2,
483
  "parallel_attn_mlp": false,
484
  "rotary_dim": null,
485
- "n_params": 384,
486
  "use_hook_tokens": false,
487
  "gated_mlp": false,
488
  "default_prepend_bos": true,
@@ -494,21 +490,22 @@
494
  "trust_remote_code": false,
495
  "rotary_adjacent_pairs": false
496
  },
497
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg_1110.pkl",
498
  "training_args": {
499
  "atol": 0.05,
500
- "lr": 0.001,
501
  "use_single_loss": false,
502
  "iit_weight": 1.0,
503
  "behavior_weight": 1.0,
504
- "strict_weight": 1.0,
505
  "epochs": 2000,
506
  "act_fn": "gelu",
507
  "clip_grad_norm": 0.1,
508
- "lr_scheduler": ""
 
509
  },
510
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta_1110.json",
511
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_1110.pth",
512
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
513
  },
514
  {
@@ -528,26 +525,26 @@
528
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
529
  },
530
  {
531
- "file_name": "ll_model_510.pth",
532
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_510.pth"
533
  },
534
  {
535
- "file_name": "ll_model_cfg_510.pkl",
536
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg_510.pkl"
537
  },
538
  {
539
- "file_name": "meta_510.json",
540
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta_510.json"
541
  }
542
  ],
543
  "transformer_cfg": {
544
- "n_layers": 2,
545
- "d_model": 20,
546
  "n_ctx": 10,
547
- "d_head": 5,
548
  "model_name": "custom",
549
  "n_heads": 4,
550
- "d_mlp": 80,
551
  "act_fn": "gelu",
552
  "d_vocab": 5,
553
  "eps": 1e-05,
@@ -567,12 +564,11 @@
567
  "attn_types": null,
568
  "init_mode": "gpt2",
569
  "normalization_type": null,
570
- "device": "cpu",
571
  "n_devices": 1,
572
  "attention_dir": "causal",
573
  "attn_only": false,
574
  "seed": 0,
575
- "initializer_range": 0.1885618083164127,
576
  "init_weights": true,
577
  "scale_attn_by_inverse_layer_idx": false,
578
  "positional_embedding_type": "standard",
@@ -580,7 +576,7 @@
580
  "d_vocab_out": 3,
581
  "parallel_attn_mlp": false,
582
  "rotary_dim": null,
583
- "n_params": 9600,
584
  "use_hook_tokens": false,
585
  "gated_mlp": false,
586
  "default_prepend_bos": true,
@@ -592,27 +588,28 @@
592
  "trust_remote_code": false,
593
  "rotary_adjacent_pairs": false
594
  },
595
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg_510.pkl",
596
  "training_args": {
597
  "atol": 0.05,
598
- "lr": 0.01,
599
  "use_single_loss": false,
600
  "iit_weight": 1.0,
601
  "behavior_weight": 1.0,
602
- "strict_weight": 0.4,
603
- "epochs": 500,
604
  "act_fn": "gelu",
605
- "clip_grad_norm": 1.0,
606
- "lr_scheduler": ""
 
607
  },
608
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta_510.json",
609
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_510.pth",
610
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
611
  },
612
  {
613
- "case_id": "24",
614
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24",
615
- "task_description": "Identifies the first occurrence of each token in a sequence.",
616
  "vocab": [
617
  "a",
618
  "b",
@@ -623,29 +620,29 @@
623
  "files": [
624
  {
625
  "file_name": "edges.pkl",
626
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
627
  },
628
  {
629
- "file_name": "ll_model_510.pth",
630
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_510.pth"
631
  },
632
  {
633
- "file_name": "ll_model_cfg_510.pkl",
634
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg_510.pkl"
635
  },
636
  {
637
- "file_name": "meta_510.json",
638
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta_510.json"
639
  }
640
  ],
641
  "transformer_cfg": {
642
  "n_layers": 2,
643
- "d_model": 20,
644
  "n_ctx": 10,
645
  "d_head": 5,
646
  "model_name": "custom",
647
  "n_heads": 4,
648
- "d_mlp": 80,
649
  "act_fn": "gelu",
650
  "d_vocab": 5,
651
  "eps": 1e-05,
@@ -665,20 +662,19 @@
665
  "attn_types": null,
666
  "init_mode": "gpt2",
667
  "normalization_type": null,
668
- "device": "cpu",
669
  "n_devices": 1,
670
  "attention_dir": "causal",
671
  "attn_only": false,
672
  "seed": 0,
673
- "initializer_range": 0.1885618083164127,
674
  "init_weights": true,
675
  "scale_attn_by_inverse_layer_idx": false,
676
  "positional_embedding_type": "standard",
677
  "final_rms": false,
678
- "d_vocab_out": 3,
679
  "parallel_attn_mlp": false,
680
  "rotary_dim": null,
681
- "n_params": 9600,
682
  "use_hook_tokens": false,
683
  "gated_mlp": false,
684
  "default_prepend_bos": true,
@@ -690,7 +686,7 @@
690
  "trust_remote_code": false,
691
  "rotary_adjacent_pairs": false
692
  },
693
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg_510.pkl",
694
  "training_args": {
695
  "atol": 0.05,
696
  "lr": 0.01,
@@ -698,14 +694,118 @@
698
  "iit_weight": 1.0,
699
  "behavior_weight": 1.0,
700
  "strict_weight": 0.4,
701
- "epochs": 500,
702
  "act_fn": "gelu",
703
- "clip_grad_norm": 1.0,
704
- "lr_scheduler": ""
 
705
  },
706
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta_510.json",
707
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_510.pth",
708
- "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
709
  },
710
  {
711
  "case_id": "3",
@@ -725,16 +825,16 @@
725
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
726
  },
727
  {
728
- "file_name": "ll_model_10110.pth",
729
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_10110.pth"
730
  },
731
  {
732
- "file_name": "ll_model_cfg_10110.pkl",
733
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg_10110.pkl"
734
  },
735
  {
736
- "file_name": "meta_10110.json",
737
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta_10110.json"
738
  }
739
  ],
740
  "transformer_cfg": {
@@ -764,7 +864,6 @@
764
  "attn_types": null,
765
  "init_mode": "gpt2",
766
  "normalization_type": null,
767
- "device": "cpu",
768
  "n_devices": 1,
769
  "attention_dir": "causal",
770
  "attn_only": false,
@@ -789,7 +888,7 @@
789
  "trust_remote_code": false,
790
  "rotary_adjacent_pairs": false
791
  },
792
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg_10110.pkl",
793
  "training_args": {
794
  "atol": 0.05,
795
  "lr": 0.001,
@@ -802,8 +901,8 @@
802
  "clip_grad_norm": 0.1,
803
  "lr_scheduler": ""
804
  },
805
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta_10110.json",
806
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_10110.pth",
807
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
808
  },
809
  {
@@ -828,16 +927,16 @@
828
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
829
  },
830
  {
831
- "file_name": "ll_model_510.pth",
832
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_510.pth"
833
  },
834
  {
835
- "file_name": "ll_model_cfg_510.pkl",
836
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg_510.pkl"
837
  },
838
  {
839
- "file_name": "meta_510.json",
840
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta_510.json"
841
  }
842
  ],
843
  "transformer_cfg": {
@@ -867,7 +966,6 @@
867
  "attn_types": null,
868
  "init_mode": "gpt2",
869
  "normalization_type": null,
870
- "device": "cpu",
871
  "n_devices": 1,
872
  "attention_dir": "causal",
873
  "attn_only": false,
@@ -892,7 +990,7 @@
892
  "trust_remote_code": false,
893
  "rotary_adjacent_pairs": false
894
  },
895
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg_510.pkl",
896
  "training_args": {
897
  "atol": 0.05,
898
  "lr": 0.001,
@@ -905,8 +1003,8 @@
905
  "clip_grad_norm": 0.1,
906
  "lr_scheduler": ""
907
  },
908
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta_510.json",
909
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_510.pth",
910
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
911
  },
912
  {
@@ -931,26 +1029,26 @@
931
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
932
  },
933
  {
934
- "file_name": "ll_model_510.pth",
935
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_510.pth"
936
  },
937
  {
938
- "file_name": "ll_model_cfg_510.pkl",
939
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg_510.pkl"
940
  },
941
  {
942
- "file_name": "meta_510.json",
943
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta_510.json"
944
  }
945
  ],
946
  "transformer_cfg": {
947
  "n_layers": 2,
948
- "d_model": 4,
949
  "n_ctx": 10,
950
- "d_head": 1,
951
  "model_name": "custom",
952
  "n_heads": 4,
953
- "d_mlp": 16,
954
  "act_fn": "gelu",
955
  "d_vocab": 10,
956
  "eps": 1e-05,
@@ -970,7 +1068,6 @@
970
  "attn_types": null,
971
  "init_mode": "gpt2",
972
  "normalization_type": null,
973
- "device": "cpu",
974
  "n_devices": 1,
975
  "attention_dir": "causal",
976
  "attn_only": false,
@@ -983,7 +1080,7 @@
983
  "d_vocab_out": 5,
984
  "parallel_attn_mlp": false,
985
  "rotary_dim": null,
986
- "n_params": 384,
987
  "use_hook_tokens": false,
988
  "gated_mlp": false,
989
  "default_prepend_bos": true,
@@ -995,21 +1092,22 @@
995
  "trust_remote_code": false,
996
  "rotary_adjacent_pairs": false
997
  },
998
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg_510.pkl",
999
  "training_args": {
1000
  "atol": 0.05,
1001
- "lr": 0.001,
1002
  "use_single_loss": false,
1003
  "iit_weight": 1.0,
1004
  "behavior_weight": 1.0,
1005
- "strict_weight": 0.4,
1006
  "epochs": 2000,
1007
  "act_fn": "gelu",
1008
  "clip_grad_norm": 0.1,
1009
- "lr_scheduler": ""
 
1010
  },
1011
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta_510.json",
1012
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_510.pth",
1013
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
1014
  },
1015
  {
@@ -1034,26 +1132,26 @@
1034
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
1035
  },
1036
  {
1037
- "file_name": "ll_model_510.pth",
1038
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_510.pth"
1039
  },
1040
  {
1041
- "file_name": "ll_model_cfg_510.pkl",
1042
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg_510.pkl"
1043
  },
1044
  {
1045
- "file_name": "meta_510.json",
1046
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta_510.json"
1047
  }
1048
  ],
1049
  "transformer_cfg": {
1050
  "n_layers": 2,
1051
- "d_model": 4,
1052
  "n_ctx": 10,
1053
- "d_head": 1,
1054
  "model_name": "custom",
1055
  "n_heads": 4,
1056
- "d_mlp": 16,
1057
  "act_fn": "gelu",
1058
  "d_vocab": 10,
1059
  "eps": 1e-05,
@@ -1073,7 +1171,6 @@
1073
  "attn_types": null,
1074
  "init_mode": "gpt2",
1075
  "normalization_type": null,
1076
- "device": "cpu",
1077
  "n_devices": 1,
1078
  "attention_dir": "causal",
1079
  "attn_only": false,
@@ -1086,7 +1183,7 @@
1086
  "d_vocab_out": 8,
1087
  "parallel_attn_mlp": false,
1088
  "rotary_dim": null,
1089
- "n_params": 384,
1090
  "use_hook_tokens": false,
1091
  "gated_mlp": false,
1092
  "default_prepend_bos": true,
@@ -1098,21 +1195,22 @@
1098
  "trust_remote_code": false,
1099
  "rotary_adjacent_pairs": false
1100
  },
1101
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg_510.pkl",
1102
  "training_args": {
1103
  "atol": 0.05,
1104
- "lr": 0.001,
1105
  "use_single_loss": false,
1106
  "iit_weight": 1.0,
1107
  "behavior_weight": 1.0,
1108
- "strict_weight": 0.4,
1109
  "epochs": 2000,
1110
  "act_fn": "gelu",
1111
  "clip_grad_norm": 0.1,
1112
- "lr_scheduler": ""
 
1113
  },
1114
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta_510.json",
1115
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_510.pth",
1116
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
1117
  },
1118
  {
@@ -1132,26 +1230,26 @@
1132
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
1133
  },
1134
  {
1135
- "file_name": "ll_model_10110.pth",
1136
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_10110.pth"
1137
  },
1138
  {
1139
- "file_name": "ll_model_cfg_10110.pkl",
1140
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg_10110.pkl"
1141
  },
1142
  {
1143
- "file_name": "meta_10110.json",
1144
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta_10110.json"
1145
  }
1146
  ],
1147
  "transformer_cfg": {
1148
  "n_layers": 2,
1149
- "d_model": 4,
1150
  "n_ctx": 10,
1151
  "d_head": 1,
1152
  "model_name": "custom",
1153
  "n_heads": 4,
1154
- "d_mlp": 16,
1155
  "act_fn": "gelu",
1156
  "d_vocab": 5,
1157
  "eps": 1e-05,
@@ -1171,7 +1269,6 @@
1171
  "attn_types": null,
1172
  "init_mode": "gpt2",
1173
  "normalization_type": null,
1174
- "device": "cuda",
1175
  "n_devices": 1,
1176
  "attention_dir": "causal",
1177
  "attn_only": false,
@@ -1184,7 +1281,7 @@
1184
  "d_vocab_out": 3,
1185
  "parallel_attn_mlp": false,
1186
  "rotary_dim": null,
1187
- "n_params": 384,
1188
  "use_hook_tokens": false,
1189
  "gated_mlp": false,
1190
  "default_prepend_bos": true,
@@ -1196,21 +1293,22 @@
1196
  "trust_remote_code": false,
1197
  "rotary_adjacent_pairs": false
1198
  },
1199
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg_10110.pkl",
1200
  "training_args": {
1201
  "atol": 0.05,
1202
- "lr": 0.001,
1203
  "use_single_loss": false,
1204
  "iit_weight": 1.0,
1205
  "behavior_weight": 1.0,
1206
- "strict_weight": 10.0,
1207
  "epochs": 2000,
1208
  "act_fn": "gelu",
1209
  "clip_grad_norm": 0.1,
1210
- "lr_scheduler": ""
 
1211
  },
1212
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta_10110.json",
1213
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_10110.pth",
1214
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
1215
  },
1216
  {
@@ -1235,26 +1333,26 @@
1235
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
1236
  },
1237
  {
1238
- "file_name": "ll_model_510.pth",
1239
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_510.pth"
1240
  },
1241
  {
1242
- "file_name": "ll_model_cfg_510.pkl",
1243
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg_510.pkl"
1244
  },
1245
  {
1246
- "file_name": "meta_510.json",
1247
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta_510.json"
1248
  }
1249
  ],
1250
  "transformer_cfg": {
1251
  "n_layers": 2,
1252
- "d_model": 4,
1253
  "n_ctx": 10,
1254
- "d_head": 1,
1255
  "model_name": "custom",
1256
  "n_heads": 4,
1257
- "d_mlp": 16,
1258
  "act_fn": "gelu",
1259
  "d_vocab": 10,
1260
  "eps": 1e-05,
@@ -1274,7 +1372,6 @@
1274
  "attn_types": null,
1275
  "init_mode": "gpt2",
1276
  "normalization_type": null,
1277
- "device": "cpu",
1278
  "n_devices": 1,
1279
  "attention_dir": "causal",
1280
  "attn_only": false,
@@ -1287,7 +1384,7 @@
1287
  "d_vocab_out": 8,
1288
  "parallel_attn_mlp": false,
1289
  "rotary_dim": null,
1290
- "n_params": 384,
1291
  "use_hook_tokens": false,
1292
  "gated_mlp": false,
1293
  "default_prepend_bos": true,
@@ -1299,28 +1396,31 @@
1299
  "trust_remote_code": false,
1300
  "rotary_adjacent_pairs": false
1301
  },
1302
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg_510.pkl",
1303
  "training_args": {
1304
  "atol": 0.05,
1305
- "lr": 0.001,
1306
  "use_single_loss": false,
1307
  "iit_weight": 1.0,
1308
  "behavior_weight": 1.0,
1309
- "strict_weight": 0.4,
1310
  "epochs": 2000,
1311
  "act_fn": "gelu",
1312
  "clip_grad_norm": 0.1,
1313
- "lr_scheduler": ""
 
1314
  },
1315
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta_510.json",
1316
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_510.pth",
1317
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
1318
  },
1319
  {
1320
- "case_id": "38",
1321
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/38",
1322
- "task_description": "Checks if tokens alternate between two types.",
1323
  "vocab": [
 
 
1324
  "a",
1325
  "b",
1326
  "c"
@@ -1330,19 +1430,19 @@
1330
  "files": [
1331
  {
1332
  "file_name": "edges.pkl",
1333
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/edges.pkl"
1334
  },
1335
  {
1336
- "file_name": "ll_model_510.pth",
1337
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_510.pth"
1338
  },
1339
  {
1340
- "file_name": "ll_model_cfg_510.pkl",
1341
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_cfg_510.pkl"
1342
  },
1343
  {
1344
- "file_name": "meta_510.json",
1345
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/meta_510.json"
1346
  }
1347
  ],
1348
  "transformer_cfg": {
@@ -1354,7 +1454,7 @@
1354
  "n_heads": 4,
1355
  "d_mlp": 80,
1356
  "act_fn": "gelu",
1357
- "d_vocab": 5,
1358
  "eps": 1e-05,
1359
  "use_attn_result": true,
1360
  "use_attn_scale": true,
@@ -1372,17 +1472,16 @@
1372
  "attn_types": null,
1373
  "init_mode": "gpt2",
1374
  "normalization_type": null,
1375
- "device": "cpu",
1376
  "n_devices": 1,
1377
  "attention_dir": "causal",
1378
  "attn_only": false,
1379
  "seed": 0,
1380
- "initializer_range": 0.1539600717839002,
1381
  "init_weights": true,
1382
  "scale_attn_by_inverse_layer_idx": false,
1383
  "positional_embedding_type": "standard",
1384
  "final_rms": false,
1385
- "d_vocab_out": 2,
1386
  "parallel_attn_mlp": false,
1387
  "rotary_dim": null,
1388
  "n_params": 9600,
@@ -1397,7 +1496,7 @@
1397
  "trust_remote_code": false,
1398
  "rotary_adjacent_pairs": false
1399
  },
1400
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_cfg_510.pkl",
1401
  "training_args": {
1402
  "atol": 0.05,
1403
  "lr": 0.001,
@@ -1410,39 +1509,42 @@
1410
  "clip_grad_norm": 0.1,
1411
  "lr_scheduler": ""
1412
  },
1413
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/meta_510.json",
1414
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_510.pth",
1415
- "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/edges.pkl"
1416
  },
1417
  {
1418
- "case_id": "4",
1419
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4",
1420
- "task_description": "Return fraction of previous open tokens minus the fraction of close tokens.",
1421
  "vocab": [
1422
- "(",
1423
- ")",
1424
- "a",
 
1425
  "b",
1426
- "c"
 
 
1427
  ],
1428
  "max_seq_len": 10,
1429
  "min_seq_len": 4,
1430
  "files": [
1431
  {
1432
  "file_name": "edges.pkl",
1433
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
1434
  },
1435
  {
1436
- "file_name": "ll_model_510.pth",
1437
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_510.pth"
1438
  },
1439
  {
1440
- "file_name": "ll_model_cfg_510.pkl",
1441
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg_510.pkl"
1442
  },
1443
  {
1444
- "file_name": "meta_510.json",
1445
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta_510.json"
1446
  }
1447
  ],
1448
  "transformer_cfg": {
@@ -1454,7 +1556,7 @@
1454
  "n_heads": 4,
1455
  "d_mlp": 80,
1456
  "act_fn": "gelu",
1457
- "d_vocab": 7,
1458
  "eps": 1e-05,
1459
  "use_attn_result": true,
1460
  "use_attn_scale": true,
@@ -1472,17 +1574,16 @@
1472
  "attn_types": null,
1473
  "init_mode": "gpt2",
1474
  "normalization_type": null,
1475
- "device": "cpu",
1476
  "n_devices": 1,
1477
  "attention_dir": "causal",
1478
  "attn_only": false,
1479
  "seed": 0,
1480
- "initializer_range": 0.17056057308448835,
1481
  "init_weights": true,
1482
  "scale_attn_by_inverse_layer_idx": false,
1483
  "positional_embedding_type": "standard",
1484
  "final_rms": false,
1485
- "d_vocab_out": 1,
1486
  "parallel_attn_mlp": false,
1487
  "rotary_dim": null,
1488
  "n_params": 9600,
@@ -1497,143 +1598,123 @@
1497
  "trust_remote_code": false,
1498
  "rotary_adjacent_pairs": false
1499
  },
1500
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg_510.pkl",
1501
  "training_args": {
1502
  "atol": 0.05,
1503
- "lr": 0.001,
1504
  "use_single_loss": false,
1505
  "iit_weight": 1.0,
1506
  "behavior_weight": 1.0,
1507
  "strict_weight": 0.4,
1508
- "epochs": 2000,
1509
  "act_fn": "gelu",
1510
- "clip_grad_norm": 0.1,
1511
  "lr_scheduler": ""
1512
  },
1513
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta_510.json",
1514
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_510.pth",
1515
- "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
1516
  },
1517
  {
1518
- "case_id": "8",
1519
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8",
1520
- "task_description": "Fills gaps between tokens with a specified filler.",
1521
- "vocab": [
1522
- "J",
1523
- "LB",
1524
- "TPSI",
1525
- "V",
1526
- "b",
1527
- "no",
1528
- "oCLrZaW",
1529
- "poiVg"
1530
- ],
1531
- "max_seq_len": 10,
1532
- "min_seq_len": 4,
1533
  "files": [
1534
  {
1535
  "file_name": "edges.pkl",
1536
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
1537
  },
1538
  {
1539
- "file_name": "ll_model_510.pth",
1540
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_510.pth"
1541
  },
1542
  {
1543
- "file_name": "ll_model_cfg_510.pkl",
1544
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg_510.pkl"
1545
  },
1546
  {
1547
- "file_name": "meta_510.json",
1548
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta_510.json"
1549
  }
1550
  ],
1551
  "transformer_cfg": {
1552
- "n_layers": 2,
1553
- "d_model": 20,
1554
- "n_ctx": 10,
1555
- "d_head": 5,
1556
- "model_name": "custom",
1557
  "n_heads": 4,
1558
- "d_mlp": 80,
1559
- "act_fn": "gelu",
1560
- "d_vocab": 10,
1561
  "eps": 1e-05,
1562
- "use_attn_result": true,
1563
  "use_attn_scale": true,
1564
- "use_split_qkv_input": true,
1565
- "use_hook_mlp_in": true,
1566
  "use_attn_in": false,
1567
  "use_local_attn": false,
1568
- "original_architecture": null,
1569
  "from_checkpoint": false,
1570
  "checkpoint_index": null,
1571
  "checkpoint_label_type": null,
1572
  "checkpoint_value": null,
1573
- "tokenizer_name": null,
1574
  "window_size": null,
1575
  "attn_types": null,
1576
  "init_mode": "gpt2",
1577
- "normalization_type": null,
1578
- "device": "cpu",
1579
  "n_devices": 1,
1580
  "attention_dir": "causal",
1581
  "attn_only": false,
1582
- "seed": 0,
1583
- "initializer_range": 0.13333333333333333,
1584
  "init_weights": true,
1585
  "scale_attn_by_inverse_layer_idx": false,
1586
  "positional_embedding_type": "standard",
1587
  "final_rms": false,
1588
- "d_vocab_out": 8,
1589
  "parallel_attn_mlp": false,
1590
  "rotary_dim": null,
1591
- "n_params": 9600,
1592
  "use_hook_tokens": false,
1593
  "gated_mlp": false,
1594
  "default_prepend_bos": true,
1595
  "dtype": "torch.float32",
1596
- "tokenizer_prepends_bos": null,
1597
  "n_key_value_heads": null,
1598
  "post_embedding_ln": false,
1599
  "rotary_base": 10000,
1600
  "trust_remote_code": false,
1601
  "rotary_adjacent_pairs": false
1602
  },
1603
- "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg_510.pkl",
1604
  "training_args": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1605
  "atol": 0.05,
1606
- "lr": 0.01,
1607
  "use_single_loss": false,
1608
  "iit_weight": 1.0,
1609
  "behavior_weight": 1.0,
1610
- "strict_weight": 0.4,
1611
- "epochs": 500,
1612
- "act_fn": "gelu",
1613
- "clip_grad_norm": 1.0,
1614
- "lr_scheduler": ""
1615
  },
1616
- "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta_510.json",
1617
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_510.pth",
1618
- "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
1619
- },
1620
- {
1621
- "case_id": "ioi",
1622
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi",
1623
- "task_description": "Indirect object identification",
1624
- "max_seq_len": 16,
1625
- "min_seq_len": 16,
1626
- "files": [
1627
- {
1628
- "file_name": "corr_100_100_40.json",
1629
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/corr_100_100_40.json"
1630
- },
1631
- {
1632
- "file_name": "ll_model_100_100_40.pth",
1633
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
1634
- }
1635
- ],
1636
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
1637
  },
1638
  {
1639
  "case_id": "ioi_next_token",
@@ -1643,19 +1724,98 @@
1643
  "min_seq_len": 16,
1644
  "files": [
1645
  {
1646
- "file_name": "corr_100_100_40.json",
1647
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/corr_100_100_40.json"
 
 
 
 
1648
  },
1649
  {
1650
- "file_name": "ll_model_100_100_40.pth",
1651
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_100_100_40.pth"
1652
  },
1653
  {
1654
- "file_name": "training_args.json",
1655
- "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/training_args.json"
1656
  }
1657
  ],
1658
- "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_100_100_40.pth"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1659
  }
1660
  ]
1661
  }
 
27
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
28
  },
29
  {
30
+ "file_name": "ll_model.pth",
31
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth"
32
  },
33
  {
34
+ "file_name": "ll_model_cfg.pkl",
35
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl"
36
  },
37
  {
38
+ "file_name": "meta.json",
39
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json"
40
  }
41
  ],
42
  "transformer_cfg": {
 
66
  "attn_types": null,
67
  "init_mode": "gpt2",
68
  "normalization_type": null,
 
69
  "n_devices": 1,
70
  "attention_dir": "causal",
71
  "attn_only": false,
 
90
  "trust_remote_code": false,
91
  "rotary_adjacent_pairs": false
92
  },
93
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg.pkl",
94
  "training_args": {
95
  "atol": 0.05,
96
  "lr": 0.01,
 
103
  "clip_grad_norm": 1.0,
104
  "lr_scheduler": ""
105
  },
106
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta.json",
107
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model.pth",
108
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
109
  },
110
  {
 
124
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
125
  },
126
  {
127
+ "file_name": "ll_model.pth",
128
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth"
129
  },
130
  {
131
+ "file_name": "ll_model_cfg.pkl",
132
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl"
133
  },
134
  {
135
+ "file_name": "meta.json",
136
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json"
137
  }
138
  ],
139
  "transformer_cfg": {
 
163
  "attn_types": null,
164
  "init_mode": "gpt2",
165
  "normalization_type": null,
 
166
  "n_devices": 1,
167
  "attention_dir": "bidirectional",
168
  "attn_only": false,
 
187
  "trust_remote_code": false,
188
  "rotary_adjacent_pairs": false
189
  },
190
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg.pkl",
191
  "training_args": {
192
  "atol": 0.05,
193
  "lr": 0.01,
 
200
  "clip_grad_norm": 1.0,
201
  "lr_scheduler": ""
202
  },
203
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta.json",
204
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model.pth",
205
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
206
  },
207
  {
 
223
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
224
  },
225
  {
226
+ "file_name": "ll_model.pth",
227
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth"
228
  },
229
  {
230
+ "file_name": "ll_model_cfg.pkl",
231
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl"
232
  },
233
  {
234
+ "file_name": "meta.json",
235
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json"
236
  }
237
  ],
238
  "transformer_cfg": {
239
  "n_layers": 2,
240
+ "d_model": 26,
241
  "n_ctx": 10,
242
+ "d_head": 6,
243
  "model_name": "custom",
244
  "n_heads": 4,
245
+ "d_mlp": 104,
246
  "act_fn": "gelu",
247
  "d_vocab": 7,
248
  "eps": 1e-05,
 
262
  "attn_types": null,
263
  "init_mode": "gpt2",
264
  "normalization_type": null,
 
265
  "n_devices": 1,
266
  "attention_dir": "bidirectional",
267
  "attn_only": false,
 
274
  "d_vocab_out": 3,
275
  "parallel_attn_mlp": false,
276
  "rotary_dim": null,
277
+ "n_params": 15808,
278
  "use_hook_tokens": false,
279
  "gated_mlp": false,
280
  "default_prepend_bos": true,
 
286
  "trust_remote_code": false,
287
  "rotary_adjacent_pairs": false
288
  },
289
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg.pkl",
290
  "training_args": {
291
  "atol": 0.05,
292
  "lr": 0.001,
293
  "use_single_loss": false,
294
  "iit_weight": 1.0,
295
  "behavior_weight": 1.0,
296
+ "strict_weight": 1.0,
297
  "epochs": 2000,
298
  "act_fn": "gelu",
299
  "clip_grad_norm": 0.1,
300
+ "lr_scheduler": "",
301
+ "model_pair": "strict"
302
  },
303
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta.json",
304
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model.pth",
305
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
306
  },
307
  {
 
321
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
322
  },
323
  {
324
+ "file_name": "ll_model.pth",
325
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth"
326
  },
327
  {
328
+ "file_name": "ll_model_cfg.pkl",
329
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl"
330
  },
331
  {
332
+ "file_name": "meta.json",
333
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json"
334
  }
335
  ],
336
  "transformer_cfg": {
 
360
  "attn_types": null,
361
  "init_mode": "gpt2",
362
  "normalization_type": null,
 
363
  "n_devices": 1,
364
  "attention_dir": "causal",
365
  "attn_only": false,
 
384
  "trust_remote_code": false,
385
  "rotary_adjacent_pairs": false
386
  },
387
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg.pkl",
388
  "training_args": {
389
  "atol": 0.05,
390
  "lr": 0.001,
 
397
  "clip_grad_norm": 0.1,
398
  "lr_scheduler": ""
399
  },
400
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta.json",
401
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model.pth",
402
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
403
  },
404
  {
 
427
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
428
  },
429
  {
430
+ "file_name": "ll_model.pth",
431
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth"
432
  },
433
  {
434
+ "file_name": "ll_model_cfg.pkl",
435
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl"
436
  },
437
  {
438
+ "file_name": "meta.json",
439
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json"
440
  }
441
  ],
442
  "transformer_cfg": {
443
  "n_layers": 2,
444
+ "d_model": 13,
445
  "n_ctx": 10,
446
+ "d_head": 3,
447
  "model_name": "custom",
448
  "n_heads": 4,
449
+ "d_mlp": 52,
450
  "act_fn": "gelu",
451
  "d_vocab": 14,
452
  "eps": 1e-05,
 
466
  "attn_types": null,
467
  "init_mode": "gpt2",
468
  "normalization_type": null,
 
469
  "n_devices": 1,
470
  "attention_dir": "causal",
471
  "attn_only": false,
 
478
  "d_vocab_out": 2,
479
  "parallel_attn_mlp": false,
480
  "rotary_dim": null,
481
+ "n_params": 3952,
482
  "use_hook_tokens": false,
483
  "gated_mlp": false,
484
  "default_prepend_bos": true,
 
490
  "trust_remote_code": false,
491
  "rotary_adjacent_pairs": false
492
  },
493
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg.pkl",
494
  "training_args": {
495
  "atol": 0.05,
496
+ "lr": 0.01,
497
  "use_single_loss": false,
498
  "iit_weight": 1.0,
499
  "behavior_weight": 1.0,
500
+ "strict_weight": 0.4,
501
  "epochs": 2000,
502
  "act_fn": "gelu",
503
  "clip_grad_norm": 0.1,
504
+ "lr_scheduler": "",
505
+ "model_pair": "strict"
506
  },
507
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta.json",
508
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model.pth",
509
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
510
  },
511
  {
 
525
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
526
  },
527
  {
528
+ "file_name": "ll_model.pth",
529
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth"
530
  },
531
  {
532
+ "file_name": "ll_model_cfg.pkl",
533
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl"
534
  },
535
  {
536
+ "file_name": "meta.json",
537
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json"
538
  }
539
  ],
540
  "transformer_cfg": {
541
+ "n_layers": 4,
542
+ "d_model": 50,
543
  "n_ctx": 10,
544
+ "d_head": 12,
545
  "model_name": "custom",
546
  "n_heads": 4,
547
+ "d_mlp": 200,
548
  "act_fn": "gelu",
549
  "d_vocab": 5,
550
  "eps": 1e-05,
 
564
  "attn_types": null,
565
  "init_mode": "gpt2",
566
  "normalization_type": null,
 
567
  "n_devices": 1,
568
  "attention_dir": "causal",
569
  "attn_only": false,
570
  "seed": 0,
571
+ "initializer_range": 0.09847319278346618,
572
  "init_weights": true,
573
  "scale_attn_by_inverse_layer_idx": false,
574
  "positional_embedding_type": "standard",
 
576
  "d_vocab_out": 3,
577
  "parallel_attn_mlp": false,
578
  "rotary_dim": null,
579
+ "n_params": 118400,
580
  "use_hook_tokens": false,
581
  "gated_mlp": false,
582
  "default_prepend_bos": true,
 
588
  "trust_remote_code": false,
589
  "rotary_adjacent_pairs": false
590
  },
591
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg.pkl",
592
  "training_args": {
593
  "atol": 0.05,
594
+ "lr": 0.0005,
595
  "use_single_loss": false,
596
  "iit_weight": 1.0,
597
  "behavior_weight": 1.0,
598
+ "strict_weight": 0.5,
599
+ "epochs": 2000,
600
  "act_fn": "gelu",
601
+ "clip_grad_norm": 0.1,
602
+ "lr_scheduler": "",
603
+ "model_pair": "strict"
604
  },
605
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta.json",
606
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model.pth",
607
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
608
  },
609
  {
610
+ "case_id": "26",
611
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/26",
612
+ "task_description": "Creates a cascading effect by repeating each token in sequence incrementally.",
613
  "vocab": [
614
  "a",
615
  "b",
 
620
  "files": [
621
  {
622
  "file_name": "edges.pkl",
623
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl"
624
  },
625
  {
626
+ "file_name": "ll_model.pth",
627
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth"
628
  },
629
  {
630
+ "file_name": "ll_model_cfg.pkl",
631
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl"
632
  },
633
  {
634
+ "file_name": "meta.json",
635
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json"
636
  }
637
  ],
638
  "transformer_cfg": {
639
  "n_layers": 2,
640
+ "d_model": 21,
641
  "n_ctx": 10,
642
  "d_head": 5,
643
  "model_name": "custom",
644
  "n_heads": 4,
645
+ "d_mlp": 84,
646
  "act_fn": "gelu",
647
  "d_vocab": 5,
648
  "eps": 1e-05,
 
662
  "attn_types": null,
663
  "init_mode": "gpt2",
664
  "normalization_type": null,
 
665
  "n_devices": 1,
666
  "attention_dir": "causal",
667
  "attn_only": false,
668
  "seed": 0,
669
+ "initializer_range": 0.12344267996967354,
670
  "init_weights": true,
671
  "scale_attn_by_inverse_layer_idx": false,
672
  "positional_embedding_type": "standard",
673
  "final_rms": false,
674
+ "d_vocab_out": 27,
675
  "parallel_attn_mlp": false,
676
  "rotary_dim": null,
677
+ "n_params": 10416,
678
  "use_hook_tokens": false,
679
  "gated_mlp": false,
680
  "default_prepend_bos": true,
 
686
  "trust_remote_code": false,
687
  "rotary_adjacent_pairs": false
688
  },
689
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model_cfg.pkl",
690
  "training_args": {
691
  "atol": 0.05,
692
  "lr": 0.01,
 
694
  "iit_weight": 1.0,
695
  "behavior_weight": 1.0,
696
  "strict_weight": 0.4,
697
+ "epochs": 2000,
698
  "act_fn": "gelu",
699
+ "clip_grad_norm": 0.1,
700
+ "lr_scheduler": "",
701
+ "model_pair": "strict"
702
  },
703
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/meta.json",
704
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/ll_model.pth",
705
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/26/edges.pkl"
706
+ },
707
+ {
708
+ "case_id": "29",
709
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/29",
710
+ "task_description": "Creates abbreviations for each token in the sequence.",
711
+ "vocab": [
712
+ "J",
713
+ "LB",
714
+ "TPSI",
715
+ "V",
716
+ "b",
717
+ "no",
718
+ "oCLrZaW",
719
+ "poiVg"
720
+ ],
721
+ "max_seq_len": 10,
722
+ "min_seq_len": 4,
723
+ "files": [
724
+ {
725
+ "file_name": "edges.pkl",
726
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl"
727
+ },
728
+ {
729
+ "file_name": "ll_model.pth",
730
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth"
731
+ },
732
+ {
733
+ "file_name": "ll_model_cfg.pkl",
734
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl"
735
+ },
736
+ {
737
+ "file_name": "meta.json",
738
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json"
739
+ }
740
+ ],
741
+ "transformer_cfg": {
742
+ "n_layers": 2,
743
+ "d_model": 13,
744
+ "n_ctx": 10,
745
+ "d_head": 3,
746
+ "model_name": "custom",
747
+ "n_heads": 4,
748
+ "d_mlp": 52,
749
+ "act_fn": "gelu",
750
+ "d_vocab": 10,
751
+ "eps": 1e-05,
752
+ "use_attn_result": true,
753
+ "use_attn_scale": true,
754
+ "use_split_qkv_input": true,
755
+ "use_hook_mlp_in": true,
756
+ "use_attn_in": false,
757
+ "use_local_attn": false,
758
+ "original_architecture": null,
759
+ "from_checkpoint": false,
760
+ "checkpoint_index": null,
761
+ "checkpoint_label_type": null,
762
+ "checkpoint_value": null,
763
+ "tokenizer_name": null,
764
+ "window_size": null,
765
+ "attn_types": null,
766
+ "init_mode": "gpt2",
767
+ "normalization_type": null,
768
+ "n_devices": 1,
769
+ "attention_dir": "causal",
770
+ "attn_only": false,
771
+ "seed": 0,
772
+ "initializer_range": 0.1539600717839002,
773
+ "init_weights": true,
774
+ "scale_attn_by_inverse_layer_idx": false,
775
+ "positional_embedding_type": "standard",
776
+ "final_rms": false,
777
+ "d_vocab_out": 8,
778
+ "parallel_attn_mlp": false,
779
+ "rotary_dim": null,
780
+ "n_params": 3952,
781
+ "use_hook_tokens": false,
782
+ "gated_mlp": false,
783
+ "default_prepend_bos": true,
784
+ "dtype": "torch.float32",
785
+ "tokenizer_prepends_bos": null,
786
+ "n_key_value_heads": null,
787
+ "post_embedding_ln": false,
788
+ "rotary_base": 10000,
789
+ "trust_remote_code": false,
790
+ "rotary_adjacent_pairs": false
791
+ },
792
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model_cfg.pkl",
793
+ "training_args": {
794
+ "atol": 0.05,
795
+ "lr": 0.01,
796
+ "use_single_loss": false,
797
+ "iit_weight": 1.0,
798
+ "behavior_weight": 1.0,
799
+ "strict_weight": 0.4,
800
+ "epochs": 2000,
801
+ "act_fn": "gelu",
802
+ "clip_grad_norm": 0.1,
803
+ "lr_scheduler": "",
804
+ "model_pair": "strict"
805
+ },
806
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/meta.json",
807
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/ll_model.pth",
808
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/29/edges.pkl"
809
  },
810
  {
811
  "case_id": "3",
 
825
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
826
  },
827
  {
828
+ "file_name": "ll_model.pth",
829
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth"
830
  },
831
  {
832
+ "file_name": "ll_model_cfg.pkl",
833
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl"
834
  },
835
  {
836
+ "file_name": "meta.json",
837
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json"
838
  }
839
  ],
840
  "transformer_cfg": {
 
864
  "attn_types": null,
865
  "init_mode": "gpt2",
866
  "normalization_type": null,
 
867
  "n_devices": 1,
868
  "attention_dir": "causal",
869
  "attn_only": false,
 
888
  "trust_remote_code": false,
889
  "rotary_adjacent_pairs": false
890
  },
891
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg.pkl",
892
  "training_args": {
893
  "atol": 0.05,
894
  "lr": 0.001,
 
901
  "clip_grad_norm": 0.1,
902
  "lr_scheduler": ""
903
  },
904
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta.json",
905
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model.pth",
906
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
907
  },
908
  {
 
927
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
928
  },
929
  {
930
+ "file_name": "ll_model.pth",
931
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth"
932
  },
933
  {
934
+ "file_name": "ll_model_cfg.pkl",
935
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl"
936
  },
937
  {
938
+ "file_name": "meta.json",
939
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json"
940
  }
941
  ],
942
  "transformer_cfg": {
 
966
  "attn_types": null,
967
  "init_mode": "gpt2",
968
  "normalization_type": null,
 
969
  "n_devices": 1,
970
  "attention_dir": "causal",
971
  "attn_only": false,
 
990
  "trust_remote_code": false,
991
  "rotary_adjacent_pairs": false
992
  },
993
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg.pkl",
994
  "training_args": {
995
  "atol": 0.05,
996
  "lr": 0.001,
 
1003
  "clip_grad_norm": 0.1,
1004
  "lr_scheduler": ""
1005
  },
1006
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta.json",
1007
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model.pth",
1008
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
1009
  },
1010
  {
 
1029
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
1030
  },
1031
  {
1032
+ "file_name": "ll_model.pth",
1033
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth"
1034
  },
1035
  {
1036
+ "file_name": "ll_model_cfg.pkl",
1037
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl"
1038
  },
1039
  {
1040
+ "file_name": "meta.json",
1041
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json"
1042
  }
1043
  ],
1044
  "transformer_cfg": {
1045
  "n_layers": 2,
1046
+ "d_model": 16,
1047
  "n_ctx": 10,
1048
+ "d_head": 4,
1049
  "model_name": "custom",
1050
  "n_heads": 4,
1051
+ "d_mlp": 64,
1052
  "act_fn": "gelu",
1053
  "d_vocab": 10,
1054
  "eps": 1e-05,
 
1068
  "attn_types": null,
1069
  "init_mode": "gpt2",
1070
  "normalization_type": null,
 
1071
  "n_devices": 1,
1072
  "attention_dir": "causal",
1073
  "attn_only": false,
 
1080
  "d_vocab_out": 5,
1081
  "parallel_attn_mlp": false,
1082
  "rotary_dim": null,
1083
+ "n_params": 6144,
1084
  "use_hook_tokens": false,
1085
  "gated_mlp": false,
1086
  "default_prepend_bos": true,
 
1092
  "trust_remote_code": false,
1093
  "rotary_adjacent_pairs": false
1094
  },
1095
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg.pkl",
1096
  "training_args": {
1097
  "atol": 0.05,
1098
+ "lr": 0.01,
1099
  "use_single_loss": false,
1100
  "iit_weight": 1.0,
1101
  "behavior_weight": 1.0,
1102
+ "strict_weight": 1.0,
1103
  "epochs": 2000,
1104
  "act_fn": "gelu",
1105
  "clip_grad_norm": 0.1,
1106
+ "lr_scheduler": "",
1107
+ "model_pair": "strict"
1108
  },
1109
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta.json",
1110
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model.pth",
1111
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
1112
  },
1113
  {
 
1132
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
1133
  },
1134
  {
1135
+ "file_name": "ll_model.pth",
1136
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth"
1137
  },
1138
  {
1139
+ "file_name": "ll_model_cfg.pkl",
1140
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl"
1141
  },
1142
  {
1143
+ "file_name": "meta.json",
1144
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json"
1145
  }
1146
  ],
1147
  "transformer_cfg": {
1148
  "n_layers": 2,
1149
+ "d_model": 9,
1150
  "n_ctx": 10,
1151
+ "d_head": 2,
1152
  "model_name": "custom",
1153
  "n_heads": 4,
1154
+ "d_mlp": 36,
1155
  "act_fn": "gelu",
1156
  "d_vocab": 10,
1157
  "eps": 1e-05,
 
1171
  "attn_types": null,
1172
  "init_mode": "gpt2",
1173
  "normalization_type": null,
 
1174
  "n_devices": 1,
1175
  "attention_dir": "causal",
1176
  "attn_only": false,
 
1183
  "d_vocab_out": 8,
1184
  "parallel_attn_mlp": false,
1185
  "rotary_dim": null,
1186
+ "n_params": 1872,
1187
  "use_hook_tokens": false,
1188
  "gated_mlp": false,
1189
  "default_prepend_bos": true,
 
1195
  "trust_remote_code": false,
1196
  "rotary_adjacent_pairs": false
1197
  },
1198
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg.pkl",
1199
  "training_args": {
1200
  "atol": 0.05,
1201
+ "lr": 0.01,
1202
  "use_single_loss": false,
1203
  "iit_weight": 1.0,
1204
  "behavior_weight": 1.0,
1205
+ "strict_weight": 1.0,
1206
  "epochs": 2000,
1207
  "act_fn": "gelu",
1208
  "clip_grad_norm": 0.1,
1209
+ "lr_scheduler": "",
1210
+ "model_pair": "strict"
1211
  },
1212
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta.json",
1213
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model.pth",
1214
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
1215
  },
1216
  {
 
1230
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
1231
  },
1232
  {
1233
+ "file_name": "ll_model.pth",
1234
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth"
1235
  },
1236
  {
1237
+ "file_name": "ll_model_cfg.pkl",
1238
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl"
1239
  },
1240
  {
1241
+ "file_name": "meta.json",
1242
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json"
1243
  }
1244
  ],
1245
  "transformer_cfg": {
1246
  "n_layers": 2,
1247
+ "d_model": 6,
1248
  "n_ctx": 10,
1249
  "d_head": 1,
1250
  "model_name": "custom",
1251
  "n_heads": 4,
1252
+ "d_mlp": 24,
1253
  "act_fn": "gelu",
1254
  "d_vocab": 5,
1255
  "eps": 1e-05,
 
1269
  "attn_types": null,
1270
  "init_mode": "gpt2",
1271
  "normalization_type": null,
 
1272
  "n_devices": 1,
1273
  "attention_dir": "causal",
1274
  "attn_only": false,
 
1281
  "d_vocab_out": 3,
1282
  "parallel_attn_mlp": false,
1283
  "rotary_dim": null,
1284
+ "n_params": 768,
1285
  "use_hook_tokens": false,
1286
  "gated_mlp": false,
1287
  "default_prepend_bos": true,
 
1293
  "trust_remote_code": false,
1294
  "rotary_adjacent_pairs": false
1295
  },
1296
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg.pkl",
1297
  "training_args": {
1298
  "atol": 0.05,
1299
+ "lr": 0.01,
1300
  "use_single_loss": false,
1301
  "iit_weight": 1.0,
1302
  "behavior_weight": 1.0,
1303
+ "strict_weight": 1.0,
1304
  "epochs": 2000,
1305
  "act_fn": "gelu",
1306
  "clip_grad_norm": 0.1,
1307
+ "lr_scheduler": "",
1308
+ "model_pair": "strict"
1309
  },
1310
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta.json",
1311
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model.pth",
1312
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
1313
  },
1314
  {
 
1333
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
1334
  },
1335
  {
1336
+ "file_name": "ll_model.pth",
1337
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth"
1338
  },
1339
  {
1340
+ "file_name": "ll_model_cfg.pkl",
1341
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl"
1342
  },
1343
  {
1344
+ "file_name": "meta.json",
1345
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json"
1346
  }
1347
  ],
1348
  "transformer_cfg": {
1349
  "n_layers": 2,
1350
+ "d_model": 12,
1351
  "n_ctx": 10,
1352
+ "d_head": 3,
1353
  "model_name": "custom",
1354
  "n_heads": 4,
1355
+ "d_mlp": 48,
1356
  "act_fn": "gelu",
1357
  "d_vocab": 10,
1358
  "eps": 1e-05,
 
1372
  "attn_types": null,
1373
  "init_mode": "gpt2",
1374
  "normalization_type": null,
 
1375
  "n_devices": 1,
1376
  "attention_dir": "causal",
1377
  "attn_only": false,
 
1384
  "d_vocab_out": 8,
1385
  "parallel_attn_mlp": false,
1386
  "rotary_dim": null,
1387
+ "n_params": 3456,
1388
  "use_hook_tokens": false,
1389
  "gated_mlp": false,
1390
  "default_prepend_bos": true,
 
1396
  "trust_remote_code": false,
1397
  "rotary_adjacent_pairs": false
1398
  },
1399
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg.pkl",
1400
  "training_args": {
1401
  "atol": 0.05,
1402
+ "lr": 0.01,
1403
  "use_single_loss": false,
1404
  "iit_weight": 1.0,
1405
  "behavior_weight": 1.0,
1406
+ "strict_weight": 1.0,
1407
  "epochs": 2000,
1408
  "act_fn": "gelu",
1409
  "clip_grad_norm": 0.1,
1410
+ "lr_scheduler": "",
1411
+ "model_pair": "strict"
1412
  },
1413
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta.json",
1414
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model.pth",
1415
  "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
1416
  },
1417
  {
1418
+ "case_id": "4",
1419
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4",
1420
+ "task_description": "Return fraction of previous open tokens minus the fraction of close tokens.",
1421
  "vocab": [
1422
+ "(",
1423
+ ")",
1424
  "a",
1425
  "b",
1426
  "c"
 
1430
  "files": [
1431
  {
1432
  "file_name": "edges.pkl",
1433
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
1434
  },
1435
  {
1436
+ "file_name": "ll_model.pth",
1437
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth"
1438
  },
1439
  {
1440
+ "file_name": "ll_model_cfg.pkl",
1441
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl"
1442
  },
1443
  {
1444
+ "file_name": "meta.json",
1445
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json"
1446
  }
1447
  ],
1448
  "transformer_cfg": {
 
1454
  "n_heads": 4,
1455
  "d_mlp": 80,
1456
  "act_fn": "gelu",
1457
+ "d_vocab": 7,
1458
  "eps": 1e-05,
1459
  "use_attn_result": true,
1460
  "use_attn_scale": true,
 
1472
  "attn_types": null,
1473
  "init_mode": "gpt2",
1474
  "normalization_type": null,
 
1475
  "n_devices": 1,
1476
  "attention_dir": "causal",
1477
  "attn_only": false,
1478
  "seed": 0,
1479
+ "initializer_range": 0.17056057308448835,
1480
  "init_weights": true,
1481
  "scale_attn_by_inverse_layer_idx": false,
1482
  "positional_embedding_type": "standard",
1483
  "final_rms": false,
1484
+ "d_vocab_out": 1,
1485
  "parallel_attn_mlp": false,
1486
  "rotary_dim": null,
1487
  "n_params": 9600,
 
1496
  "trust_remote_code": false,
1497
  "rotary_adjacent_pairs": false
1498
  },
1499
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg.pkl",
1500
  "training_args": {
1501
  "atol": 0.05,
1502
  "lr": 0.001,
 
1509
  "clip_grad_norm": 0.1,
1510
  "lr_scheduler": ""
1511
  },
1512
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta.json",
1513
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model.pth",
1514
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
1515
  },
1516
  {
1517
+ "case_id": "8",
1518
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8",
1519
+ "task_description": "Fills gaps between tokens with a specified filler.",
1520
  "vocab": [
1521
+ "J",
1522
+ "LB",
1523
+ "TPSI",
1524
+ "V",
1525
  "b",
1526
+ "no",
1527
+ "oCLrZaW",
1528
+ "poiVg"
1529
  ],
1530
  "max_seq_len": 10,
1531
  "min_seq_len": 4,
1532
  "files": [
1533
  {
1534
  "file_name": "edges.pkl",
1535
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
1536
  },
1537
  {
1538
+ "file_name": "ll_model.pth",
1539
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model.pth"
1540
  },
1541
  {
1542
+ "file_name": "ll_model_cfg.pkl",
1543
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg.pkl"
1544
  },
1545
  {
1546
+ "file_name": "meta.json",
1547
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta.json"
1548
  }
1549
  ],
1550
  "transformer_cfg": {
 
1556
  "n_heads": 4,
1557
  "d_mlp": 80,
1558
  "act_fn": "gelu",
1559
+ "d_vocab": 10,
1560
  "eps": 1e-05,
1561
  "use_attn_result": true,
1562
  "use_attn_scale": true,
 
1574
  "attn_types": null,
1575
  "init_mode": "gpt2",
1576
  "normalization_type": null,
 
1577
  "n_devices": 1,
1578
  "attention_dir": "causal",
1579
  "attn_only": false,
1580
  "seed": 0,
1581
+ "initializer_range": 0.13333333333333333,
1582
  "init_weights": true,
1583
  "scale_attn_by_inverse_layer_idx": false,
1584
  "positional_embedding_type": "standard",
1585
  "final_rms": false,
1586
+ "d_vocab_out": 8,
1587
  "parallel_attn_mlp": false,
1588
  "rotary_dim": null,
1589
  "n_params": 9600,
 
1598
  "trust_remote_code": false,
1599
  "rotary_adjacent_pairs": false
1600
  },
1601
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg.pkl",
1602
  "training_args": {
1603
  "atol": 0.05,
1604
+ "lr": 0.01,
1605
  "use_single_loss": false,
1606
  "iit_weight": 1.0,
1607
  "behavior_weight": 1.0,
1608
  "strict_weight": 0.4,
1609
+ "epochs": 500,
1610
  "act_fn": "gelu",
1611
+ "clip_grad_norm": 1.0,
1612
  "lr_scheduler": ""
1613
  },
1614
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta.json",
1615
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model.pth",
1616
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
1617
  },
1618
  {
1619
+ "case_id": "ioi",
1620
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi",
1621
+ "task_description": "Indirect object identification",
1622
+ "max_seq_len": 16,
1623
+ "min_seq_len": 16,
 
 
 
 
 
 
 
 
 
 
1624
  "files": [
1625
  {
1626
  "file_name": "edges.pkl",
1627
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl"
1628
  },
1629
  {
1630
+ "file_name": "ll_model.pth",
1631
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth"
1632
  },
1633
  {
1634
+ "file_name": "ll_model_cfg.pkl",
1635
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl"
1636
  },
1637
  {
1638
+ "file_name": "meta.json",
1639
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json"
1640
  }
1641
  ],
1642
  "transformer_cfg": {
1643
+ "n_layers": 6,
1644
+ "d_model": 64,
1645
+ "n_ctx": 1024,
1646
+ "d_head": 16,
1647
+ "model_name": "gpt2",
1648
  "n_heads": 4,
1649
+ "d_mlp": 3072,
1650
+ "act_fn": "gelu_new",
1651
+ "d_vocab": 50257,
1652
  "eps": 1e-05,
1653
+ "use_attn_result": false,
1654
  "use_attn_scale": true,
1655
+ "use_split_qkv_input": false,
1656
+ "use_hook_mlp_in": false,
1657
  "use_attn_in": false,
1658
  "use_local_attn": false,
1659
+ "original_architecture": "GPT2LMHeadModel",
1660
  "from_checkpoint": false,
1661
  "checkpoint_index": null,
1662
  "checkpoint_label_type": null,
1663
  "checkpoint_value": null,
1664
+ "tokenizer_name": "gpt2",
1665
  "window_size": null,
1666
  "attn_types": null,
1667
  "init_mode": "gpt2",
1668
+ "normalization_type": "LNPre",
 
1669
  "n_devices": 1,
1670
  "attention_dir": "causal",
1671
  "attn_only": false,
1672
+ "seed": null,
1673
+ "initializer_range": 0.02886751345948129,
1674
  "init_weights": true,
1675
  "scale_attn_by_inverse_layer_idx": false,
1676
  "positional_embedding_type": "standard",
1677
  "final_rms": false,
1678
+ "d_vocab_out": 50257,
1679
  "parallel_attn_mlp": false,
1680
  "rotary_dim": null,
1681
+ "n_params": 2457600,
1682
  "use_hook_tokens": false,
1683
  "gated_mlp": false,
1684
  "default_prepend_bos": true,
1685
  "dtype": "torch.float32",
1686
+ "tokenizer_prepends_bos": false,
1687
  "n_key_value_heads": null,
1688
  "post_embedding_ln": false,
1689
  "rotary_base": 10000,
1690
  "trust_remote_code": false,
1691
  "rotary_adjacent_pairs": false
1692
  },
1693
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_cfg.pkl",
1694
  "training_args": {
1695
+ "next_token": true,
1696
+ "non_ioi_thresh": 0.65,
1697
+ "use_per_token_check": false,
1698
+ "batch_size": 256,
1699
+ "lr": 0.001,
1700
+ "num_workers": 0,
1701
+ "early_stop": true,
1702
+ "lr_scheduler": null,
1703
+ "scheduler_val_metric": [
1704
+ "val/accuracy",
1705
+ "val/IIA"
1706
+ ],
1707
+ "scheduler_mode": "max",
1708
+ "clip_grad_norm": 1.0,
1709
  "atol": 0.05,
 
1710
  "use_single_loss": false,
1711
  "iit_weight": 1.0,
1712
  "behavior_weight": 1.0,
1713
+ "strict_weight": 0.4
 
 
 
 
1714
  },
1715
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/meta.json",
1716
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model.pth",
1717
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/edges.pkl"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1718
  },
1719
  {
1720
  "case_id": "ioi_next_token",
 
1724
  "min_seq_len": 16,
1725
  "files": [
1726
  {
1727
+ "file_name": "edges.pkl",
1728
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl"
1729
+ },
1730
+ {
1731
+ "file_name": "ll_model.pth",
1732
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth"
1733
  },
1734
  {
1735
+ "file_name": "ll_model_cfg.pkl",
1736
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl"
1737
  },
1738
  {
1739
+ "file_name": "meta.json",
1740
+ "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json"
1741
  }
1742
  ],
1743
+ "transformer_cfg": {
1744
+ "n_layers": 6,
1745
+ "d_model": 64,
1746
+ "n_ctx": 1024,
1747
+ "d_head": 16,
1748
+ "model_name": "gpt2",
1749
+ "n_heads": 4,
1750
+ "d_mlp": 3072,
1751
+ "act_fn": "gelu_new",
1752
+ "d_vocab": 50257,
1753
+ "eps": 1e-05,
1754
+ "use_attn_result": false,
1755
+ "use_attn_scale": true,
1756
+ "use_split_qkv_input": false,
1757
+ "use_hook_mlp_in": false,
1758
+ "use_attn_in": false,
1759
+ "use_local_attn": false,
1760
+ "original_architecture": "GPT2LMHeadModel",
1761
+ "from_checkpoint": false,
1762
+ "checkpoint_index": null,
1763
+ "checkpoint_label_type": null,
1764
+ "checkpoint_value": null,
1765
+ "tokenizer_name": "gpt2",
1766
+ "window_size": null,
1767
+ "attn_types": null,
1768
+ "init_mode": "gpt2",
1769
+ "normalization_type": "LNPre",
1770
+ "n_devices": 1,
1771
+ "attention_dir": "causal",
1772
+ "attn_only": false,
1773
+ "seed": null,
1774
+ "initializer_range": 0.02886751345948129,
1775
+ "init_weights": true,
1776
+ "scale_attn_by_inverse_layer_idx": false,
1777
+ "positional_embedding_type": "standard",
1778
+ "final_rms": false,
1779
+ "d_vocab_out": 50257,
1780
+ "parallel_attn_mlp": false,
1781
+ "rotary_dim": null,
1782
+ "n_params": 2457600,
1783
+ "use_hook_tokens": false,
1784
+ "gated_mlp": false,
1785
+ "default_prepend_bos": true,
1786
+ "dtype": "torch.float32",
1787
+ "tokenizer_prepends_bos": false,
1788
+ "n_key_value_heads": null,
1789
+ "post_embedding_ln": false,
1790
+ "rotary_base": 10000,
1791
+ "trust_remote_code": false,
1792
+ "rotary_adjacent_pairs": false
1793
+ },
1794
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_cfg.pkl",
1795
+ "training_args": {
1796
+ "next_token": true,
1797
+ "non_ioi_thresh": 0.65,
1798
+ "use_per_token_check": false,
1799
+ "batch_size": 256,
1800
+ "lr": 0.001,
1801
+ "num_workers": 0,
1802
+ "early_stop": true,
1803
+ "lr_scheduler": null,
1804
+ "scheduler_val_metric": [
1805
+ "val/accuracy",
1806
+ "val/IIA"
1807
+ ],
1808
+ "scheduler_mode": "max",
1809
+ "clip_grad_norm": 1.0,
1810
+ "atol": 0.05,
1811
+ "use_single_loss": false,
1812
+ "iit_weight": 1.0,
1813
+ "behavior_weight": 1.0,
1814
+ "strict_weight": 0.4
1815
+ },
1816
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/meta.json",
1817
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model.pth",
1818
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/edges.pkl"
1819
  }
1820
  ]
1821
  }
benchmark_metadata_croissant.json CHANGED
@@ -412,7 +412,7 @@
412
  "@id": "transformer_cfg.n_layers",
413
  "name": "transformer_cfg.n_layers",
414
  "description": "Column 'transformer_cfg.n_layers' from the parquet file describing all the cases in the benchmark.",
415
- "dataType": "sc:Float",
416
  "source": {
417
  "fileSet": {
418
  "@id": "benchmark-cases-parquet"
@@ -427,7 +427,7 @@
427
  "@id": "transformer_cfg.d_model",
428
  "name": "transformer_cfg.d_model",
429
  "description": "Column 'transformer_cfg.d_model' from the parquet file describing all the cases in the benchmark.",
430
- "dataType": "sc:Float",
431
  "source": {
432
  "fileSet": {
433
  "@id": "benchmark-cases-parquet"
@@ -442,7 +442,7 @@
442
  "@id": "transformer_cfg.n_ctx",
443
  "name": "transformer_cfg.n_ctx",
444
  "description": "Column 'transformer_cfg.n_ctx' from the parquet file describing all the cases in the benchmark.",
445
- "dataType": "sc:Float",
446
  "source": {
447
  "fileSet": {
448
  "@id": "benchmark-cases-parquet"
@@ -457,7 +457,7 @@
457
  "@id": "transformer_cfg.d_head",
458
  "name": "transformer_cfg.d_head",
459
  "description": "Column 'transformer_cfg.d_head' from the parquet file describing all the cases in the benchmark.",
460
- "dataType": "sc:Float",
461
  "source": {
462
  "fileSet": {
463
  "@id": "benchmark-cases-parquet"
@@ -487,7 +487,7 @@
487
  "@id": "transformer_cfg.n_heads",
488
  "name": "transformer_cfg.n_heads",
489
  "description": "Column 'transformer_cfg.n_heads' from the parquet file describing all the cases in the benchmark.",
490
- "dataType": "sc:Float",
491
  "source": {
492
  "fileSet": {
493
  "@id": "benchmark-cases-parquet"
@@ -502,7 +502,7 @@
502
  "@id": "transformer_cfg.d_mlp",
503
  "name": "transformer_cfg.d_mlp",
504
  "description": "Column 'transformer_cfg.d_mlp' from the parquet file describing all the cases in the benchmark.",
505
- "dataType": "sc:Float",
506
  "source": {
507
  "fileSet": {
508
  "@id": "benchmark-cases-parquet"
@@ -532,7 +532,7 @@
532
  "@id": "transformer_cfg.d_vocab",
533
  "name": "transformer_cfg.d_vocab",
534
  "description": "Column 'transformer_cfg.d_vocab' from the parquet file describing all the cases in the benchmark.",
535
- "dataType": "sc:Float",
536
  "source": {
537
  "fileSet": {
538
  "@id": "benchmark-cases-parquet"
@@ -652,7 +652,7 @@
652
  "@id": "transformer_cfg.original_architecture",
653
  "name": "transformer_cfg.original_architecture",
654
  "description": "Column 'transformer_cfg.original_architecture' from the parquet file describing all the cases in the benchmark.",
655
- "dataType": "sc:Float",
656
  "source": {
657
  "fileSet": {
658
  "@id": "benchmark-cases-parquet"
@@ -677,57 +677,12 @@
677
  }
678
  }
679
  },
680
- {
681
- "@type": "cr:Field",
682
- "@id": "transformer_cfg.checkpoint_index",
683
- "name": "transformer_cfg.checkpoint_index",
684
- "description": "Column 'transformer_cfg.checkpoint_index' from the parquet file describing all the cases in the benchmark.",
685
- "dataType": "sc:Float",
686
- "source": {
687
- "fileSet": {
688
- "@id": "benchmark-cases-parquet"
689
- },
690
- "extract": {
691
- "column": "transformer_cfg.checkpoint_index"
692
- }
693
- }
694
- },
695
- {
696
- "@type": "cr:Field",
697
- "@id": "transformer_cfg.checkpoint_label_type",
698
- "name": "transformer_cfg.checkpoint_label_type",
699
- "description": "Column 'transformer_cfg.checkpoint_label_type' from the parquet file describing all the cases in the benchmark.",
700
- "dataType": "sc:Float",
701
- "source": {
702
- "fileSet": {
703
- "@id": "benchmark-cases-parquet"
704
- },
705
- "extract": {
706
- "column": "transformer_cfg.checkpoint_label_type"
707
- }
708
- }
709
- },
710
- {
711
- "@type": "cr:Field",
712
- "@id": "transformer_cfg.checkpoint_value",
713
- "name": "transformer_cfg.checkpoint_value",
714
- "description": "Column 'transformer_cfg.checkpoint_value' from the parquet file describing all the cases in the benchmark.",
715
- "dataType": "sc:Float",
716
- "source": {
717
- "fileSet": {
718
- "@id": "benchmark-cases-parquet"
719
- },
720
- "extract": {
721
- "column": "transformer_cfg.checkpoint_value"
722
- }
723
- }
724
- },
725
  {
726
  "@type": "cr:Field",
727
  "@id": "transformer_cfg.tokenizer_name",
728
  "name": "transformer_cfg.tokenizer_name",
729
  "description": "Column 'transformer_cfg.tokenizer_name' from the parquet file describing all the cases in the benchmark.",
730
- "dataType": "sc:Float",
731
  "source": {
732
  "fileSet": {
733
  "@id": "benchmark-cases-parquet"
@@ -737,36 +692,6 @@
737
  }
738
  }
739
  },
740
- {
741
- "@type": "cr:Field",
742
- "@id": "transformer_cfg.window_size",
743
- "name": "transformer_cfg.window_size",
744
- "description": "Column 'transformer_cfg.window_size' from the parquet file describing all the cases in the benchmark.",
745
- "dataType": "sc:Float",
746
- "source": {
747
- "fileSet": {
748
- "@id": "benchmark-cases-parquet"
749
- },
750
- "extract": {
751
- "column": "transformer_cfg.window_size"
752
- }
753
- }
754
- },
755
- {
756
- "@type": "cr:Field",
757
- "@id": "transformer_cfg.attn_types",
758
- "name": "transformer_cfg.attn_types",
759
- "description": "Column 'transformer_cfg.attn_types' from the parquet file describing all the cases in the benchmark.",
760
- "dataType": "sc:Float",
761
- "source": {
762
- "fileSet": {
763
- "@id": "benchmark-cases-parquet"
764
- },
765
- "extract": {
766
- "column": "transformer_cfg.attn_types"
767
- }
768
- }
769
- },
770
  {
771
  "@type": "cr:Field",
772
  "@id": "transformer_cfg.init_mode",
@@ -787,28 +712,13 @@
787
  "@id": "transformer_cfg.normalization_type",
788
  "name": "transformer_cfg.normalization_type",
789
  "description": "Column 'transformer_cfg.normalization_type' from the parquet file describing all the cases in the benchmark.",
790
- "dataType": "sc:Float",
791
- "source": {
792
- "fileSet": {
793
- "@id": "benchmark-cases-parquet"
794
- },
795
- "extract": {
796
- "column": "transformer_cfg.normalization_type"
797
- }
798
- }
799
- },
800
- {
801
- "@type": "cr:Field",
802
- "@id": "transformer_cfg.device",
803
- "name": "transformer_cfg.device",
804
- "description": "Column 'transformer_cfg.device' from the parquet file describing all the cases in the benchmark.",
805
  "dataType": "sc:Text",
806
  "source": {
807
  "fileSet": {
808
  "@id": "benchmark-cases-parquet"
809
  },
810
  "extract": {
811
- "column": "transformer_cfg.device"
812
  }
813
  }
814
  },
@@ -817,7 +727,7 @@
817
  "@id": "transformer_cfg.n_devices",
818
  "name": "transformer_cfg.n_devices",
819
  "description": "Column 'transformer_cfg.n_devices' from the parquet file describing all the cases in the benchmark.",
820
- "dataType": "sc:Float",
821
  "source": {
822
  "fileSet": {
823
  "@id": "benchmark-cases-parquet"
@@ -952,7 +862,7 @@
952
  "@id": "transformer_cfg.d_vocab_out",
953
  "name": "transformer_cfg.d_vocab_out",
954
  "description": "Column 'transformer_cfg.d_vocab_out' from the parquet file describing all the cases in the benchmark.",
955
- "dataType": "sc:Float",
956
  "source": {
957
  "fileSet": {
958
  "@id": "benchmark-cases-parquet"
@@ -977,27 +887,12 @@
977
  }
978
  }
979
  },
980
- {
981
- "@type": "cr:Field",
982
- "@id": "transformer_cfg.rotary_dim",
983
- "name": "transformer_cfg.rotary_dim",
984
- "description": "Column 'transformer_cfg.rotary_dim' from the parquet file describing all the cases in the benchmark.",
985
- "dataType": "sc:Float",
986
- "source": {
987
- "fileSet": {
988
- "@id": "benchmark-cases-parquet"
989
- },
990
- "extract": {
991
- "column": "transformer_cfg.rotary_dim"
992
- }
993
- }
994
- },
995
  {
996
  "@type": "cr:Field",
997
  "@id": "transformer_cfg.n_params",
998
  "name": "transformer_cfg.n_params",
999
  "description": "Column 'transformer_cfg.n_params' from the parquet file describing all the cases in the benchmark.",
1000
- "dataType": "sc:Float",
1001
  "source": {
1002
  "fileSet": {
1003
  "@id": "benchmark-cases-parquet"
@@ -1072,7 +967,7 @@
1072
  "@id": "transformer_cfg.tokenizer_prepends_bos",
1073
  "name": "transformer_cfg.tokenizer_prepends_bos",
1074
  "description": "Column 'transformer_cfg.tokenizer_prepends_bos' from the parquet file describing all the cases in the benchmark.",
1075
- "dataType": "sc:Float",
1076
  "source": {
1077
  "fileSet": {
1078
  "@id": "benchmark-cases-parquet"
@@ -1082,21 +977,6 @@
1082
  }
1083
  }
1084
  },
1085
- {
1086
- "@type": "cr:Field",
1087
- "@id": "transformer_cfg.n_key_value_heads",
1088
- "name": "transformer_cfg.n_key_value_heads",
1089
- "description": "Column 'transformer_cfg.n_key_value_heads' from the parquet file describing all the cases in the benchmark.",
1090
- "dataType": "sc:Float",
1091
- "source": {
1092
- "fileSet": {
1093
- "@id": "benchmark-cases-parquet"
1094
- },
1095
- "extract": {
1096
- "column": "transformer_cfg.n_key_value_heads"
1097
- }
1098
- }
1099
- },
1100
  {
1101
  "@type": "cr:Field",
1102
  "@id": "transformer_cfg.post_embedding_ln",
@@ -1117,7 +997,7 @@
1117
  "@id": "transformer_cfg.rotary_base",
1118
  "name": "transformer_cfg.rotary_base",
1119
  "description": "Column 'transformer_cfg.rotary_base' from the parquet file describing all the cases in the benchmark.",
1120
- "dataType": "sc:Float",
1121
  "source": {
1122
  "fileSet": {
1123
  "@id": "benchmark-cases-parquet"
@@ -1156,6 +1036,141 @@
1156
  "column": "transformer_cfg.rotary_adjacent_pairs"
1157
  }
1158
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1159
  }
1160
  ]
1161
  }
 
412
  "@id": "transformer_cfg.n_layers",
413
  "name": "transformer_cfg.n_layers",
414
  "description": "Column 'transformer_cfg.n_layers' from the parquet file describing all the cases in the benchmark.",
415
+ "dataType": "sc:Integer",
416
  "source": {
417
  "fileSet": {
418
  "@id": "benchmark-cases-parquet"
 
427
  "@id": "transformer_cfg.d_model",
428
  "name": "transformer_cfg.d_model",
429
  "description": "Column 'transformer_cfg.d_model' from the parquet file describing all the cases in the benchmark.",
430
+ "dataType": "sc:Integer",
431
  "source": {
432
  "fileSet": {
433
  "@id": "benchmark-cases-parquet"
 
442
  "@id": "transformer_cfg.n_ctx",
443
  "name": "transformer_cfg.n_ctx",
444
  "description": "Column 'transformer_cfg.n_ctx' from the parquet file describing all the cases in the benchmark.",
445
+ "dataType": "sc:Integer",
446
  "source": {
447
  "fileSet": {
448
  "@id": "benchmark-cases-parquet"
 
457
  "@id": "transformer_cfg.d_head",
458
  "name": "transformer_cfg.d_head",
459
  "description": "Column 'transformer_cfg.d_head' from the parquet file describing all the cases in the benchmark.",
460
+ "dataType": "sc:Integer",
461
  "source": {
462
  "fileSet": {
463
  "@id": "benchmark-cases-parquet"
 
487
  "@id": "transformer_cfg.n_heads",
488
  "name": "transformer_cfg.n_heads",
489
  "description": "Column 'transformer_cfg.n_heads' from the parquet file describing all the cases in the benchmark.",
490
+ "dataType": "sc:Integer",
491
  "source": {
492
  "fileSet": {
493
  "@id": "benchmark-cases-parquet"
 
502
  "@id": "transformer_cfg.d_mlp",
503
  "name": "transformer_cfg.d_mlp",
504
  "description": "Column 'transformer_cfg.d_mlp' from the parquet file describing all the cases in the benchmark.",
505
+ "dataType": "sc:Integer",
506
  "source": {
507
  "fileSet": {
508
  "@id": "benchmark-cases-parquet"
 
532
  "@id": "transformer_cfg.d_vocab",
533
  "name": "transformer_cfg.d_vocab",
534
  "description": "Column 'transformer_cfg.d_vocab' from the parquet file describing all the cases in the benchmark.",
535
+ "dataType": "sc:Integer",
536
  "source": {
537
  "fileSet": {
538
  "@id": "benchmark-cases-parquet"
 
652
  "@id": "transformer_cfg.original_architecture",
653
  "name": "transformer_cfg.original_architecture",
654
  "description": "Column 'transformer_cfg.original_architecture' from the parquet file describing all the cases in the benchmark.",
655
+ "dataType": "sc:Text",
656
  "source": {
657
  "fileSet": {
658
  "@id": "benchmark-cases-parquet"
 
677
  }
678
  }
679
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  {
681
  "@type": "cr:Field",
682
  "@id": "transformer_cfg.tokenizer_name",
683
  "name": "transformer_cfg.tokenizer_name",
684
  "description": "Column 'transformer_cfg.tokenizer_name' from the parquet file describing all the cases in the benchmark.",
685
+ "dataType": "sc:Text",
686
  "source": {
687
  "fileSet": {
688
  "@id": "benchmark-cases-parquet"
 
692
  }
693
  }
694
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
695
  {
696
  "@type": "cr:Field",
697
  "@id": "transformer_cfg.init_mode",
 
712
  "@id": "transformer_cfg.normalization_type",
713
  "name": "transformer_cfg.normalization_type",
714
  "description": "Column 'transformer_cfg.normalization_type' from the parquet file describing all the cases in the benchmark.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715
  "dataType": "sc:Text",
716
  "source": {
717
  "fileSet": {
718
  "@id": "benchmark-cases-parquet"
719
  },
720
  "extract": {
721
+ "column": "transformer_cfg.normalization_type"
722
  }
723
  }
724
  },
 
727
  "@id": "transformer_cfg.n_devices",
728
  "name": "transformer_cfg.n_devices",
729
  "description": "Column 'transformer_cfg.n_devices' from the parquet file describing all the cases in the benchmark.",
730
+ "dataType": "sc:Integer",
731
  "source": {
732
  "fileSet": {
733
  "@id": "benchmark-cases-parquet"
 
862
  "@id": "transformer_cfg.d_vocab_out",
863
  "name": "transformer_cfg.d_vocab_out",
864
  "description": "Column 'transformer_cfg.d_vocab_out' from the parquet file describing all the cases in the benchmark.",
865
+ "dataType": "sc:Integer",
866
  "source": {
867
  "fileSet": {
868
  "@id": "benchmark-cases-parquet"
 
887
  }
888
  }
889
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
  {
891
  "@type": "cr:Field",
892
  "@id": "transformer_cfg.n_params",
893
  "name": "transformer_cfg.n_params",
894
  "description": "Column 'transformer_cfg.n_params' from the parquet file describing all the cases in the benchmark.",
895
+ "dataType": "sc:Integer",
896
  "source": {
897
  "fileSet": {
898
  "@id": "benchmark-cases-parquet"
 
967
  "@id": "transformer_cfg.tokenizer_prepends_bos",
968
  "name": "transformer_cfg.tokenizer_prepends_bos",
969
  "description": "Column 'transformer_cfg.tokenizer_prepends_bos' from the parquet file describing all the cases in the benchmark.",
970
+ "dataType": "sc:Boolean",
971
  "source": {
972
  "fileSet": {
973
  "@id": "benchmark-cases-parquet"
 
977
  }
978
  }
979
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
980
  {
981
  "@type": "cr:Field",
982
  "@id": "transformer_cfg.post_embedding_ln",
 
997
  "@id": "transformer_cfg.rotary_base",
998
  "name": "transformer_cfg.rotary_base",
999
  "description": "Column 'transformer_cfg.rotary_base' from the parquet file describing all the cases in the benchmark.",
1000
+ "dataType": "sc:Integer",
1001
  "source": {
1002
  "fileSet": {
1003
  "@id": "benchmark-cases-parquet"
 
1036
  "column": "transformer_cfg.rotary_adjacent_pairs"
1037
  }
1038
  }
1039
+ },
1040
+ {
1041
+ "@type": "cr:Field",
1042
+ "@id": "training_args.model_pair",
1043
+ "name": "training_args.model_pair",
1044
+ "description": "Column 'training_args.model_pair' from the parquet file describing all the cases in the benchmark.",
1045
+ "dataType": "sc:Text",
1046
+ "source": {
1047
+ "fileSet": {
1048
+ "@id": "benchmark-cases-parquet"
1049
+ },
1050
+ "extract": {
1051
+ "column": "training_args.model_pair"
1052
+ }
1053
+ }
1054
+ },
1055
+ {
1056
+ "@type": "cr:Field",
1057
+ "@id": "training_args.next_token",
1058
+ "name": "training_args.next_token",
1059
+ "description": "Column 'training_args.next_token' from the parquet file describing all the cases in the benchmark.",
1060
+ "dataType": "sc:Boolean",
1061
+ "source": {
1062
+ "fileSet": {
1063
+ "@id": "benchmark-cases-parquet"
1064
+ },
1065
+ "extract": {
1066
+ "column": "training_args.next_token"
1067
+ }
1068
+ }
1069
+ },
1070
+ {
1071
+ "@type": "cr:Field",
1072
+ "@id": "training_args.non_ioi_thresh",
1073
+ "name": "training_args.non_ioi_thresh",
1074
+ "description": "Column 'training_args.non_ioi_thresh' from the parquet file describing all the cases in the benchmark.",
1075
+ "dataType": "sc:Float",
1076
+ "source": {
1077
+ "fileSet": {
1078
+ "@id": "benchmark-cases-parquet"
1079
+ },
1080
+ "extract": {
1081
+ "column": "training_args.non_ioi_thresh"
1082
+ }
1083
+ }
1084
+ },
1085
+ {
1086
+ "@type": "cr:Field",
1087
+ "@id": "training_args.use_per_token_check",
1088
+ "name": "training_args.use_per_token_check",
1089
+ "description": "Column 'training_args.use_per_token_check' from the parquet file describing all the cases in the benchmark.",
1090
+ "dataType": "sc:Boolean",
1091
+ "source": {
1092
+ "fileSet": {
1093
+ "@id": "benchmark-cases-parquet"
1094
+ },
1095
+ "extract": {
1096
+ "column": "training_args.use_per_token_check"
1097
+ }
1098
+ }
1099
+ },
1100
+ {
1101
+ "@type": "cr:Field",
1102
+ "@id": "training_args.batch_size",
1103
+ "name": "training_args.batch_size",
1104
+ "description": "Column 'training_args.batch_size' from the parquet file describing all the cases in the benchmark.",
1105
+ "dataType": "sc:Float",
1106
+ "source": {
1107
+ "fileSet": {
1108
+ "@id": "benchmark-cases-parquet"
1109
+ },
1110
+ "extract": {
1111
+ "column": "training_args.batch_size"
1112
+ }
1113
+ }
1114
+ },
1115
+ {
1116
+ "@type": "cr:Field",
1117
+ "@id": "training_args.num_workers",
1118
+ "name": "training_args.num_workers",
1119
+ "description": "Column 'training_args.num_workers' from the parquet file describing all the cases in the benchmark.",
1120
+ "dataType": "sc:Float",
1121
+ "source": {
1122
+ "fileSet": {
1123
+ "@id": "benchmark-cases-parquet"
1124
+ },
1125
+ "extract": {
1126
+ "column": "training_args.num_workers"
1127
+ }
1128
+ }
1129
+ },
1130
+ {
1131
+ "@type": "cr:Field",
1132
+ "@id": "training_args.early_stop",
1133
+ "name": "training_args.early_stop",
1134
+ "description": "Column 'training_args.early_stop' from the parquet file describing all the cases in the benchmark.",
1135
+ "dataType": "sc:Boolean",
1136
+ "source": {
1137
+ "fileSet": {
1138
+ "@id": "benchmark-cases-parquet"
1139
+ },
1140
+ "extract": {
1141
+ "column": "training_args.early_stop"
1142
+ }
1143
+ }
1144
+ },
1145
+ {
1146
+ "@type": "cr:Field",
1147
+ "@id": "training_args.scheduler_val_metric",
1148
+ "name": "training_args.scheduler_val_metric",
1149
+ "description": "Column 'training_args.scheduler_val_metric' from the parquet file describing all the cases in the benchmark.",
1150
+ "dataType": "sc:Text",
1151
+ "source": {
1152
+ "fileSet": {
1153
+ "@id": "benchmark-cases-parquet"
1154
+ },
1155
+ "extract": {
1156
+ "column": "training_args.scheduler_val_metric"
1157
+ }
1158
+ }
1159
+ },
1160
+ {
1161
+ "@type": "cr:Field",
1162
+ "@id": "training_args.scheduler_mode",
1163
+ "name": "training_args.scheduler_mode",
1164
+ "description": "Column 'training_args.scheduler_mode' from the parquet file describing all the cases in the benchmark.",
1165
+ "dataType": "sc:Text",
1166
+ "source": {
1167
+ "fileSet": {
1168
+ "@id": "benchmark-cases-parquet"
1169
+ },
1170
+ "extract": {
1171
+ "column": "training_args.scheduler_mode"
1172
+ }
1173
+ }
1174
  }
1175
  ]
1176
  }