iarcuschin commited on
Commit
82a8da7
1 Parent(s): 16ebc0b

Add new models

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
101/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
101/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8fd5916d51408c7bd9536f5f7384aee78c453e6b3ea6c16714b9b92d21c7386
3
+ size 14890
101/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cd0768a4f665f6e168d8f8dcd31ee2ef833847442d11558cb1fdb65b71fab66
3
+ size 1093
101/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train", "wandb_name": "case-101-seed-67-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
103/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4df9e5eb129553e810673c7ea9f138df364f9461861df969e00972518748db4b
3
+ size 1256
103/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2deec61e135c0e16e0f0c1884c91141e8486825d8c697045e32c422cc2862bb
3
+ size 106310
103/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7628439ed419829404ac8619bc94b8424d1eb5f6e00ce2384a591ef1771ab98
3
+ size 1100
103/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train", "wandb_name": "case-103-seed-67-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
110/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce0c25059854bda5408738429c5404c6ae0931901591143f73367b3f0ae29f00
3
+ size 598
110/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2aab75c41c36e2d937b1707d84aec6d2fadf74696ed059c6d3667b6e7e868eaf
3
+ size 54890
110/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20f14b516d1a8a2e436ddabe3bf7a1b3d5ef10df0bcf62228affc607ec26c56e
3
+ size 1100
110/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train", "wandb_name": "case-110-seed-67-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
111/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f9324e9a3dab349582cbae63c998b4251b92b875e49caa79826b450eadc9dcb
3
+ size 1251
111/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c512e7e3b1fca95ffd88f36b14824ddd5b8127c76ea6819ca21a4e16273ed36e
3
+ size 106310
111/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c689fcff94029e6296340b4c2a5909a422b8d3fc7b26005911b3d760538c56e
3
+ size 1100
111/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train", "wandb_name": "case-111-seed-67-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
113/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aaf72d92f0df42e61bc20fd40672c902ce2e227f0df638b4b38111a09c06c93
3
+ size 2847
113/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea7ec0d5b5e4ba6da236fedcb698feeef7c6987a179c5f31f8fd78a367ccf4a7
3
+ size 2688376
113/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d64e11240c419a545b94fea7aabfae5ed77782f4f4de712846bc7ba453c7c9
3
+ size 1103
113/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train", "wandb_name": "case-113-seed-67-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
114/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
114/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:941f9c19ab892cf7546c8535cd3728477616d467c294886c82425a87d1b9bd93
3
+ size 14698
114/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d5209f9d9e7a92d79c658406442cc48849fe4256a1186472614bf141344c7d5
3
+ size 1093
114/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train", "wandb_name": "case-114-seed-67-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
122/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2cb02e0518a50c2b53be59ea3c3788ec80c0d3c6c1e70070bd7115abdfae77
3
+ size 113
122/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c428334ca09901df8c1a557fb8ff91f94faad58da12c8cec5cc54aba42ee5cd
3
+ size 14890
122/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cd0768a4f665f6e168d8f8dcd31ee2ef833847442d11558cb1fdb65b71fab66
3
+ size 1093
122/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train", "wandb_name": "case-122-seed-67-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
124/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94f304942c0a1b31c525866b45e6cb08898c68c8babe262aee1720e0d7ae703f
3
+ size 1110
124/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3204efb7c2c92f7a8807743e44c1e6c9c65d6828a9aaa22606b0874ccbe3293
3
+ size 105478
124/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d9b99b585bf898a743c98e402a9c1ef1600b3d43d8e79eeadd11b86b7d2c626
3
+ size 1100
124/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train", "wandb_name": "case-124-seed-67-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
129/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b01d4b9b1e5740fd2934079065cb57785bb111918826a8e8ad7b5b4af00c7bcc
3
+ size 677
129/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f497c70d4f781598a2d86469fc884a7aee35013d2ca2851e03a40077224fc6ab
3
+ size 21190
129/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c611ecd0f8c57d44f555697540a82578e20f4dbe88bb1bd25132bd06244fa2
3
+ size 1100
129/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train", "wandb_name": "case-129-seed-67-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
14/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3156b2e517b8c45e76ea28f8c2ccbcda226f636cd44a809c908e167871d2cdab
3
+ size 546
14/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83ce6aabb3fdb734d8e602755d05c15332bd422572385cbcb33cd0e41191c637
3
+ size 19754
14/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27866dc79bdac527552f8e0d39e80579033fc10ab193f1bdedef3660205608dd
3
+ size 1100
14/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train", "wandb_name": "case-14-seed-67-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
2/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd8ee919e31a1590d5e02e7d6aa4625e2d9e3002e3bbb74402f5a49e7d0be36b
3
+ size 1251
2/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b2ef568269ef820d7d6975fed90fabf517f044f26f9b10a8a3724a7da3c4f83
3
+ size 647458
2/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:274196ba87c5a5cc25f5e3ed93a4c7dace5a7924905804c2ee6d2112c460f534
3
+ size 1102
2/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train", "wandb_name": "case-2-seed-67-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
24/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9626f34a27e68c1fd617876205908e46e8e9ca7ebce03801219894dfb4994d42
3
+ size 537
24/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd7c40f800ce7c3fc207b7693033c2fa25ff89e591c74ef8e89ae07a22bbc9b0
3
+ size 141930
24/ll_model_cfg.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3887932bc979926a0304b1ca63f6b7e05f0dae97b836c1922ce2f12445a0c383
3
+ size 1093
24/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"output_dir": "/circuits-benchmark/results", "atol": 0.05, "lr": 0.001, "use_single_loss": true, "iit_weight": 1.0, "behavior_weight": 0.4, "strict_weight": 0.4, "epochs": 1000, "early_stop_accuracy_threshold": 99.9, "act_fn": "gelu", "use_wandb": true, "wandb_project": "iit-train", "wandb_name": "case-24-seed-67-s-0.4-b-0.4-iit-1", "save_model_to_wandb": true, "device": "cuda", "clip_grad_norm": 0.1, "lr_scheduler": "linear", "model_pair": "strict", "same_size": false, "seed": 67, "batch_size": 256, "include_mlp": false, "detach_while_caching": true, "scheduler_val_metric": ["val/accuracy", "val/IIA", "val/strict_accuracy"], "siit_sampling": "sample_all", "val_iia_sampling": "all"}
25/edges.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd358562dc56469dbf03912dac0c5d3d5489b0f2dadcb85a7cd7c24f9582a74a
3
+ size 1260
25/ll_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e72ecd925276352d1240da0b40019b38ef254cbfdad61b234d96672fc4ae2e2
3
+ size 398954