cybershiptrooper iarcuschin commited on
Commit
3064d6d
1 Parent(s): 9db87a4

Update metadata (#4)

Browse files

- Update metadata (5525660b40b58af34144774473149ed4b21abeef)


Co-authored-by: Ivan Arcuschin <[email protected]>

benchmark_cases_metadata.csv CHANGED
@@ -1,19 +1,19 @@
1
- case_id,url,task_description,max_seq_len,min_seq_len,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.checkpoint_index,transformer_cfg.checkpoint_label_type,transformer_cfg.checkpoint_value,transformer_cfg.tokenizer_name,transformer_cfg.window_size,transformer_cfg.attn_types,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.device,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.rotary_dim,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.n_key_value_heads,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs
2
- 11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1460593486680443,True,False,standard,False,5.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
3
- 13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
4
- 18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
5
- 19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,32.0,15.0,8.0,custom,4.0,128.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.15689290811054724,True,False,standard,False,3.0,False,,24576.0,False,False,True,torch.float32,,,False,10000.0,False,False
6
- 20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,14.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.16,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
7
- 21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
8
- 24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
9
- 3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,12.0,5.0,3.0,custom,4.0,48.0,gelu,6.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.22188007849009167,True,False,standard,False,1.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
10
- 33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17457431218879393,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
11
- 34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.16329931618554522,True,False,standard,False,5.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
12
- 35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
13
- 36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.19402850002906638,True,False,standard,False,3.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
14
- 37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
15
- 38,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/38,Checks if tokens alternate between two types.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,2.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
16
- 4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17056057308448835,True,False,standard,False,1.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
17
- 8,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8,Fills gaps between tokens with a specified filler.,10,4,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.13333333333333333,True,False,standard,False,8.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
18
- ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect object identification,16,16,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
19
- ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,Indirect object identification,16,16,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
 
1
+ case_id,url,task_description,max_seq_len,min_seq_len,transformer_cfg_file_url,training_args_file_url,weights_file_url,circuit_file_url,training_args.atol,training_args.lr,training_args.use_single_loss,training_args.iit_weight,training_args.behavior_weight,training_args.strict_weight,training_args.epochs,training_args.act_fn,training_args.clip_grad_norm,training_args.lr_scheduler,transformer_cfg.n_layers,transformer_cfg.d_model,transformer_cfg.n_ctx,transformer_cfg.d_head,transformer_cfg.model_name,transformer_cfg.n_heads,transformer_cfg.d_mlp,transformer_cfg.act_fn,transformer_cfg.d_vocab,transformer_cfg.eps,transformer_cfg.use_attn_result,transformer_cfg.use_attn_scale,transformer_cfg.use_split_qkv_input,transformer_cfg.use_hook_mlp_in,transformer_cfg.use_attn_in,transformer_cfg.use_local_attn,transformer_cfg.original_architecture,transformer_cfg.from_checkpoint,transformer_cfg.checkpoint_index,transformer_cfg.checkpoint_label_type,transformer_cfg.checkpoint_value,transformer_cfg.tokenizer_name,transformer_cfg.window_size,transformer_cfg.attn_types,transformer_cfg.init_mode,transformer_cfg.normalization_type,transformer_cfg.device,transformer_cfg.n_devices,transformer_cfg.attention_dir,transformer_cfg.attn_only,transformer_cfg.seed,transformer_cfg.initializer_range,transformer_cfg.init_weights,transformer_cfg.scale_attn_by_inverse_layer_idx,transformer_cfg.positional_embedding_type,transformer_cfg.final_rms,transformer_cfg.d_vocab_out,transformer_cfg.parallel_attn_mlp,transformer_cfg.rotary_dim,transformer_cfg.n_params,transformer_cfg.use_hook_tokens,transformer_cfg.gated_mlp,transformer_cfg.default_prepend_bos,transformer_cfg.dtype,transformer_cfg.tokenizer_prepends_bos,transformer_cfg.n_key_value_heads,transformer_cfg.post_embedding_ln,transformer_cfg.rotary_base,transformer_cfg.trust_remote_code,transformer_cfg.rotary_adjacent_pairs
2
+ 11,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/11,Counts the number of words in a sequence based on their length.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1460593486680443,True,False,standard,False,5.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
3
+ 13,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/13,"Analyzes the trend (increasing, decreasing, constant) of numeric tokens.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.1460593486680443,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
4
+ 18,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/18,"Classify each token based on its frequency as 'rare', 'common', or 'frequent'.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,12.0,10.0,3.0,custom,4.0,48.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,bidirectional,False,0.0,0.12344267996967354,True,False,standard,False,3.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
5
+ 19,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/19,Removes consecutive duplicate tokens from a sequence.,15,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,32.0,15.0,8.0,custom,4.0,128.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.15689290811054724,True,False,standard,False,3.0,False,,24576.0,False,False,True,torch.float32,,,False,10000.0,False,False
6
+ 20,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/20,Detect spam messages based on appearance of spam keywords.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg_1110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta_1110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_1110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl,0.05,0.001,False,1.0,1.0,1.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,14.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.16,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
7
+ 21,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/21,Extract unique tokens from a string,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
8
+ 24,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/24,Identifies the first occurrence of each token in a sequence.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1885618083164127,True,False,standard,False,3.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
9
+ 3,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/3,Returns the fraction of 'x' in the input up to the i-th position for all i.,5,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg_10110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta_10110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_10110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,12.0,5.0,3.0,custom,4.0,48.0,gelu,6.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.22188007849009167,True,False,standard,False,1.0,False,,3456.0,False,False,True,torch.float32,,,False,10000.0,False,False
10
+ 33,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/33,Checks if each token's length is odd or even.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17457431218879393,True,False,standard,False,2.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
11
+ 34,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/34,Calculate the ratio of vowels to consonants in each word.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.16329931618554522,True,False,standard,False,5.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
12
+ 35,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/35,Alternates capitalization of each character in words.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
13
+ 36,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/36,"Classifies each token as 'positive', 'negative', or 'neutral' based on emojis.",10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg_10110.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta_10110.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_10110.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl,0.05,0.001,False,1.0,1.0,10.0,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cuda,1.0,causal,False,0.0,0.19402850002906638,True,False,standard,False,3.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
14
+ 37,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/37,Reverses each word in the sequence except for specified exclusions.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,4.0,10.0,1.0,custom,4.0,16.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,8.0,False,,384.0,False,False,True,torch.float32,,,False,10000.0,False,False
15
+ 38,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/38,Checks if tokens alternate between two types.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,5.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.1539600717839002,True,False,standard,False,2.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
16
+ 4,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/4,Return fraction of previous open tokens minus the fraction of close tokens.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl,0.05,0.001,False,1.0,1.0,0.4,2000.0,gelu,0.1,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,7.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.17056057308448835,True,False,standard,False,1.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
17
+ 8,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/8,Fills gaps between tokens with a specified filler.,10,4,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg_510.pkl,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta_510.json,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_510.pth,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl,0.05,0.01,False,1.0,1.0,0.4,500.0,gelu,1.0,,2.0,20.0,10.0,5.0,custom,4.0,80.0,gelu,10.0,1e-05,True,True,True,True,False,False,,False,,,,,,,gpt2,,cpu,1.0,causal,False,0.0,0.13333333333333333,True,False,standard,False,8.0,False,,9600.0,False,False,True,torch.float32,,,False,10000.0,False,False
18
+ ioi,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi,Indirect object identification,16,16,,,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth,,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
19
+ ioi_next_token,https://huggingface.co/cybershiptrooper/InterpBench/tree/main/ioi_next_token,Indirect object identification,16,16,,,https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_100_100_40.pth,,,,True,,,,,,,,,,,,,,,,,,True,True,True,True,True,True,,True,,,,,,,,,,,,True,,,True,True,,True,,True,,,True,True,True,,,,True,,True,True
benchmark_cases_metadata.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12652b82dbded2521f44e1219ade14c88e6bd787db5a2141803db257fb375e87
3
- size 51034
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d42203edfeb52102b4df24aecf54b5a51c9c4f547b6cede024422c898564f69f
3
+ size 56701
benchmark_metadata.json CHANGED
@@ -91,6 +91,7 @@
91
  "trust_remote_code": false,
92
  "rotary_adjacent_pairs": false
93
  },
 
94
  "training_args": {
95
  "atol": 0.05,
96
  "lr": 0.01,
@@ -102,7 +103,10 @@
102
  "act_fn": "gelu",
103
  "clip_grad_norm": 1.0,
104
  "lr_scheduler": ""
105
- }
 
 
 
106
  },
107
  {
108
  "case_id": "13",
@@ -185,6 +189,7 @@
185
  "trust_remote_code": false,
186
  "rotary_adjacent_pairs": false
187
  },
 
188
  "training_args": {
189
  "atol": 0.05,
190
  "lr": 0.01,
@@ -196,7 +201,10 @@
196
  "act_fn": "gelu",
197
  "clip_grad_norm": 1.0,
198
  "lr_scheduler": ""
199
- }
 
 
 
200
  },
201
  {
202
  "case_id": "18",
@@ -281,6 +289,7 @@
281
  "trust_remote_code": false,
282
  "rotary_adjacent_pairs": false
283
  },
 
284
  "training_args": {
285
  "atol": 0.05,
286
  "lr": 0.001,
@@ -292,7 +301,10 @@
292
  "act_fn": "gelu",
293
  "clip_grad_norm": 0.1,
294
  "lr_scheduler": ""
295
- }
 
 
 
296
  },
297
  {
298
  "case_id": "19",
@@ -375,6 +387,7 @@
375
  "trust_remote_code": false,
376
  "rotary_adjacent_pairs": false
377
  },
 
378
  "training_args": {
379
  "atol": 0.05,
380
  "lr": 0.001,
@@ -386,7 +399,10 @@
386
  "act_fn": "gelu",
387
  "clip_grad_norm": 0.1,
388
  "lr_scheduler": ""
389
- }
 
 
 
390
  },
391
  {
392
  "case_id": "20",
@@ -478,6 +494,7 @@
478
  "trust_remote_code": false,
479
  "rotary_adjacent_pairs": false
480
  },
 
481
  "training_args": {
482
  "atol": 0.05,
483
  "lr": 0.001,
@@ -489,7 +506,10 @@
489
  "act_fn": "gelu",
490
  "clip_grad_norm": 0.1,
491
  "lr_scheduler": ""
492
- }
 
 
 
493
  },
494
  {
495
  "case_id": "21",
@@ -572,6 +592,7 @@
572
  "trust_remote_code": false,
573
  "rotary_adjacent_pairs": false
574
  },
 
575
  "training_args": {
576
  "atol": 0.05,
577
  "lr": 0.01,
@@ -583,7 +604,10 @@
583
  "act_fn": "gelu",
584
  "clip_grad_norm": 1.0,
585
  "lr_scheduler": ""
586
- }
 
 
 
587
  },
588
  {
589
  "case_id": "24",
@@ -666,6 +690,7 @@
666
  "trust_remote_code": false,
667
  "rotary_adjacent_pairs": false
668
  },
 
669
  "training_args": {
670
  "atol": 0.05,
671
  "lr": 0.01,
@@ -677,7 +702,10 @@
677
  "act_fn": "gelu",
678
  "clip_grad_norm": 1.0,
679
  "lr_scheduler": ""
680
- }
 
 
 
681
  },
682
  {
683
  "case_id": "3",
@@ -761,6 +789,7 @@
761
  "trust_remote_code": false,
762
  "rotary_adjacent_pairs": false
763
  },
 
764
  "training_args": {
765
  "atol": 0.05,
766
  "lr": 0.001,
@@ -772,7 +801,10 @@
772
  "act_fn": "gelu",
773
  "clip_grad_norm": 0.1,
774
  "lr_scheduler": ""
775
- }
 
 
 
776
  },
777
  {
778
  "case_id": "33",
@@ -860,6 +892,7 @@
860
  "trust_remote_code": false,
861
  "rotary_adjacent_pairs": false
862
  },
 
863
  "training_args": {
864
  "atol": 0.05,
865
  "lr": 0.001,
@@ -871,7 +904,10 @@
871
  "act_fn": "gelu",
872
  "clip_grad_norm": 0.1,
873
  "lr_scheduler": ""
874
- }
 
 
 
875
  },
876
  {
877
  "case_id": "34",
@@ -959,6 +995,7 @@
959
  "trust_remote_code": false,
960
  "rotary_adjacent_pairs": false
961
  },
 
962
  "training_args": {
963
  "atol": 0.05,
964
  "lr": 0.001,
@@ -970,7 +1007,10 @@
970
  "act_fn": "gelu",
971
  "clip_grad_norm": 0.1,
972
  "lr_scheduler": ""
973
- }
 
 
 
974
  },
975
  {
976
  "case_id": "35",
@@ -1058,6 +1098,7 @@
1058
  "trust_remote_code": false,
1059
  "rotary_adjacent_pairs": false
1060
  },
 
1061
  "training_args": {
1062
  "atol": 0.05,
1063
  "lr": 0.001,
@@ -1069,7 +1110,10 @@
1069
  "act_fn": "gelu",
1070
  "clip_grad_norm": 0.1,
1071
  "lr_scheduler": ""
1072
- }
 
 
 
1073
  },
1074
  {
1075
  "case_id": "36",
@@ -1152,6 +1196,7 @@
1152
  "trust_remote_code": false,
1153
  "rotary_adjacent_pairs": false
1154
  },
 
1155
  "training_args": {
1156
  "atol": 0.05,
1157
  "lr": 0.001,
@@ -1163,7 +1208,10 @@
1163
  "act_fn": "gelu",
1164
  "clip_grad_norm": 0.1,
1165
  "lr_scheduler": ""
1166
- }
 
 
 
1167
  },
1168
  {
1169
  "case_id": "37",
@@ -1251,6 +1299,7 @@
1251
  "trust_remote_code": false,
1252
  "rotary_adjacent_pairs": false
1253
  },
 
1254
  "training_args": {
1255
  "atol": 0.05,
1256
  "lr": 0.001,
@@ -1262,7 +1311,10 @@
1262
  "act_fn": "gelu",
1263
  "clip_grad_norm": 0.1,
1264
  "lr_scheduler": ""
1265
- }
 
 
 
1266
  },
1267
  {
1268
  "case_id": "38",
@@ -1345,6 +1397,7 @@
1345
  "trust_remote_code": false,
1346
  "rotary_adjacent_pairs": false
1347
  },
 
1348
  "training_args": {
1349
  "atol": 0.05,
1350
  "lr": 0.001,
@@ -1356,7 +1409,10 @@
1356
  "act_fn": "gelu",
1357
  "clip_grad_norm": 0.1,
1358
  "lr_scheduler": ""
1359
- }
 
 
 
1360
  },
1361
  {
1362
  "case_id": "4",
@@ -1441,6 +1497,7 @@
1441
  "trust_remote_code": false,
1442
  "rotary_adjacent_pairs": false
1443
  },
 
1444
  "training_args": {
1445
  "atol": 0.05,
1446
  "lr": 0.001,
@@ -1452,7 +1509,10 @@
1452
  "act_fn": "gelu",
1453
  "clip_grad_norm": 0.1,
1454
  "lr_scheduler": ""
1455
- }
 
 
 
1456
  },
1457
  {
1458
  "case_id": "8",
@@ -1540,6 +1600,7 @@
1540
  "trust_remote_code": false,
1541
  "rotary_adjacent_pairs": false
1542
  },
 
1543
  "training_args": {
1544
  "atol": 0.05,
1545
  "lr": 0.01,
@@ -1551,7 +1612,10 @@
1551
  "act_fn": "gelu",
1552
  "clip_grad_norm": 1.0,
1553
  "lr_scheduler": ""
1554
- }
 
 
 
1555
  },
1556
  {
1557
  "case_id": "ioi",
@@ -1568,7 +1632,8 @@
1568
  "file_name": "ll_model_100_100_40.pth",
1569
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
1570
  }
1571
- ]
 
1572
  },
1573
  {
1574
  "case_id": "ioi_next_token",
@@ -1589,7 +1654,8 @@
1589
  "file_name": "training_args.json",
1590
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/training_args.json"
1591
  }
1592
- ]
 
1593
  }
1594
  ]
1595
  }
 
91
  "trust_remote_code": false,
92
  "rotary_adjacent_pairs": false
93
  },
94
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_cfg_510.pkl",
95
  "training_args": {
96
  "atol": 0.05,
97
  "lr": 0.01,
 
103
  "act_fn": "gelu",
104
  "clip_grad_norm": 1.0,
105
  "lr_scheduler": ""
106
+ },
107
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/meta_510.json",
108
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/ll_model_510.pth",
109
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/11/edges.pkl"
110
  },
111
  {
112
  "case_id": "13",
 
189
  "trust_remote_code": false,
190
  "rotary_adjacent_pairs": false
191
  },
192
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_cfg_510.pkl",
193
  "training_args": {
194
  "atol": 0.05,
195
  "lr": 0.01,
 
201
  "act_fn": "gelu",
202
  "clip_grad_norm": 1.0,
203
  "lr_scheduler": ""
204
+ },
205
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/meta_510.json",
206
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/ll_model_510.pth",
207
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/13/edges.pkl"
208
  },
209
  {
210
  "case_id": "18",
 
289
  "trust_remote_code": false,
290
  "rotary_adjacent_pairs": false
291
  },
292
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_cfg_510.pkl",
293
  "training_args": {
294
  "atol": 0.05,
295
  "lr": 0.001,
 
301
  "act_fn": "gelu",
302
  "clip_grad_norm": 0.1,
303
  "lr_scheduler": ""
304
+ },
305
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/meta_510.json",
306
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/ll_model_510.pth",
307
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/18/edges.pkl"
308
  },
309
  {
310
  "case_id": "19",
 
387
  "trust_remote_code": false,
388
  "rotary_adjacent_pairs": false
389
  },
390
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_cfg_510.pkl",
391
  "training_args": {
392
  "atol": 0.05,
393
  "lr": 0.001,
 
399
  "act_fn": "gelu",
400
  "clip_grad_norm": 0.1,
401
  "lr_scheduler": ""
402
+ },
403
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/meta_510.json",
404
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/ll_model_510.pth",
405
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/19/edges.pkl"
406
  },
407
  {
408
  "case_id": "20",
 
494
  "trust_remote_code": false,
495
  "rotary_adjacent_pairs": false
496
  },
497
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_cfg_1110.pkl",
498
  "training_args": {
499
  "atol": 0.05,
500
  "lr": 0.001,
 
506
  "act_fn": "gelu",
507
  "clip_grad_norm": 0.1,
508
  "lr_scheduler": ""
509
+ },
510
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/meta_1110.json",
511
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/ll_model_1110.pth",
512
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/20/edges.pkl"
513
  },
514
  {
515
  "case_id": "21",
 
592
  "trust_remote_code": false,
593
  "rotary_adjacent_pairs": false
594
  },
595
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_cfg_510.pkl",
596
  "training_args": {
597
  "atol": 0.05,
598
  "lr": 0.01,
 
604
  "act_fn": "gelu",
605
  "clip_grad_norm": 1.0,
606
  "lr_scheduler": ""
607
+ },
608
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/meta_510.json",
609
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/ll_model_510.pth",
610
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/21/edges.pkl"
611
  },
612
  {
613
  "case_id": "24",
 
690
  "trust_remote_code": false,
691
  "rotary_adjacent_pairs": false
692
  },
693
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_cfg_510.pkl",
694
  "training_args": {
695
  "atol": 0.05,
696
  "lr": 0.01,
 
702
  "act_fn": "gelu",
703
  "clip_grad_norm": 1.0,
704
  "lr_scheduler": ""
705
+ },
706
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/meta_510.json",
707
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/ll_model_510.pth",
708
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/24/edges.pkl"
709
  },
710
  {
711
  "case_id": "3",
 
789
  "trust_remote_code": false,
790
  "rotary_adjacent_pairs": false
791
  },
792
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_cfg_10110.pkl",
793
  "training_args": {
794
  "atol": 0.05,
795
  "lr": 0.001,
 
801
  "act_fn": "gelu",
802
  "clip_grad_norm": 0.1,
803
  "lr_scheduler": ""
804
+ },
805
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/meta_10110.json",
806
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/ll_model_10110.pth",
807
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/3/edges.pkl"
808
  },
809
  {
810
  "case_id": "33",
 
892
  "trust_remote_code": false,
893
  "rotary_adjacent_pairs": false
894
  },
895
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_cfg_510.pkl",
896
  "training_args": {
897
  "atol": 0.05,
898
  "lr": 0.001,
 
904
  "act_fn": "gelu",
905
  "clip_grad_norm": 0.1,
906
  "lr_scheduler": ""
907
+ },
908
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/meta_510.json",
909
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/ll_model_510.pth",
910
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/33/edges.pkl"
911
  },
912
  {
913
  "case_id": "34",
 
995
  "trust_remote_code": false,
996
  "rotary_adjacent_pairs": false
997
  },
998
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_cfg_510.pkl",
999
  "training_args": {
1000
  "atol": 0.05,
1001
  "lr": 0.001,
 
1007
  "act_fn": "gelu",
1008
  "clip_grad_norm": 0.1,
1009
  "lr_scheduler": ""
1010
+ },
1011
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/meta_510.json",
1012
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/ll_model_510.pth",
1013
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/34/edges.pkl"
1014
  },
1015
  {
1016
  "case_id": "35",
 
1098
  "trust_remote_code": false,
1099
  "rotary_adjacent_pairs": false
1100
  },
1101
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_cfg_510.pkl",
1102
  "training_args": {
1103
  "atol": 0.05,
1104
  "lr": 0.001,
 
1110
  "act_fn": "gelu",
1111
  "clip_grad_norm": 0.1,
1112
  "lr_scheduler": ""
1113
+ },
1114
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/meta_510.json",
1115
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/ll_model_510.pth",
1116
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/35/edges.pkl"
1117
  },
1118
  {
1119
  "case_id": "36",
 
1196
  "trust_remote_code": false,
1197
  "rotary_adjacent_pairs": false
1198
  },
1199
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_cfg_10110.pkl",
1200
  "training_args": {
1201
  "atol": 0.05,
1202
  "lr": 0.001,
 
1208
  "act_fn": "gelu",
1209
  "clip_grad_norm": 0.1,
1210
  "lr_scheduler": ""
1211
+ },
1212
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/meta_10110.json",
1213
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/ll_model_10110.pth",
1214
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/36/edges.pkl"
1215
  },
1216
  {
1217
  "case_id": "37",
 
1299
  "trust_remote_code": false,
1300
  "rotary_adjacent_pairs": false
1301
  },
1302
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_cfg_510.pkl",
1303
  "training_args": {
1304
  "atol": 0.05,
1305
  "lr": 0.001,
 
1311
  "act_fn": "gelu",
1312
  "clip_grad_norm": 0.1,
1313
  "lr_scheduler": ""
1314
+ },
1315
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/meta_510.json",
1316
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/ll_model_510.pth",
1317
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/37/edges.pkl"
1318
  },
1319
  {
1320
  "case_id": "38",
 
1397
  "trust_remote_code": false,
1398
  "rotary_adjacent_pairs": false
1399
  },
1400
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_cfg_510.pkl",
1401
  "training_args": {
1402
  "atol": 0.05,
1403
  "lr": 0.001,
 
1409
  "act_fn": "gelu",
1410
  "clip_grad_norm": 0.1,
1411
  "lr_scheduler": ""
1412
+ },
1413
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/meta_510.json",
1414
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/ll_model_510.pth",
1415
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/38/edges.pkl"
1416
  },
1417
  {
1418
  "case_id": "4",
 
1497
  "trust_remote_code": false,
1498
  "rotary_adjacent_pairs": false
1499
  },
1500
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_cfg_510.pkl",
1501
  "training_args": {
1502
  "atol": 0.05,
1503
  "lr": 0.001,
 
1509
  "act_fn": "gelu",
1510
  "clip_grad_norm": 0.1,
1511
  "lr_scheduler": ""
1512
+ },
1513
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/meta_510.json",
1514
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/ll_model_510.pth",
1515
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/4/edges.pkl"
1516
  },
1517
  {
1518
  "case_id": "8",
 
1600
  "trust_remote_code": false,
1601
  "rotary_adjacent_pairs": false
1602
  },
1603
+ "transformer_cfg_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_cfg_510.pkl",
1604
  "training_args": {
1605
  "atol": 0.05,
1606
  "lr": 0.01,
 
1612
  "act_fn": "gelu",
1613
  "clip_grad_norm": 1.0,
1614
  "lr_scheduler": ""
1615
+ },
1616
+ "training_args_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/meta_510.json",
1617
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/ll_model_510.pth",
1618
+ "circuit_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/8/edges.pkl"
1619
  },
1620
  {
1621
  "case_id": "ioi",
 
1632
  "file_name": "ll_model_100_100_40.pth",
1633
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
1634
  }
1635
+ ],
1636
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi/ll_model_100_100_40.pth"
1637
  },
1638
  {
1639
  "case_id": "ioi_next_token",
 
1654
  "file_name": "training_args.json",
1655
  "url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/training_args.json"
1656
  }
1657
+ ],
1658
+ "weights_file_url": "https://huggingface.co/cybershiptrooper/InterpBench/blob/main/ioi_next_token/ll_model_100_100_40.pth"
1659
  }
1660
  ]
1661
  }
benchmark_metadata_croissant.json CHANGED
@@ -197,6 +197,66 @@
197
  }
198
  }
199
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  {
201
  "@type": "cr:Field",
202
  "@id": "training_args.atol",
 
197
  }
198
  }
199
  },
200
+ {
201
+ "@type": "cr:Field",
202
+ "@id": "transformer_cfg_file_url",
203
+ "name": "transformer_cfg_file_url",
204
+ "description": "Column 'transformer_cfg_file_url' from the parquet file describing all the cases in the benchmark.",
205
+ "dataType": "sc:Text",
206
+ "source": {
207
+ "fileSet": {
208
+ "@id": "benchmark-cases-parquet"
209
+ },
210
+ "extract": {
211
+ "column": "transformer_cfg_file_url"
212
+ }
213
+ }
214
+ },
215
+ {
216
+ "@type": "cr:Field",
217
+ "@id": "training_args_file_url",
218
+ "name": "training_args_file_url",
219
+ "description": "Column 'training_args_file_url' from the parquet file describing all the cases in the benchmark.",
220
+ "dataType": "sc:Text",
221
+ "source": {
222
+ "fileSet": {
223
+ "@id": "benchmark-cases-parquet"
224
+ },
225
+ "extract": {
226
+ "column": "training_args_file_url"
227
+ }
228
+ }
229
+ },
230
+ {
231
+ "@type": "cr:Field",
232
+ "@id": "weights_file_url",
233
+ "name": "weights_file_url",
234
+ "description": "Column 'weights_file_url' from the parquet file describing all the cases in the benchmark.",
235
+ "dataType": "sc:Text",
236
+ "source": {
237
+ "fileSet": {
238
+ "@id": "benchmark-cases-parquet"
239
+ },
240
+ "extract": {
241
+ "column": "weights_file_url"
242
+ }
243
+ }
244
+ },
245
+ {
246
+ "@type": "cr:Field",
247
+ "@id": "circuit_file_url",
248
+ "name": "circuit_file_url",
249
+ "description": "Column 'circuit_file_url' from the parquet file describing all the cases in the benchmark.",
250
+ "dataType": "sc:Text",
251
+ "source": {
252
+ "fileSet": {
253
+ "@id": "benchmark-cases-parquet"
254
+ },
255
+ "extract": {
256
+ "column": "circuit_file_url"
257
+ }
258
+ }
259
+ },
260
  {
261
  "@type": "cr:Field",
262
  "@id": "training_args.atol",