jpata commited on
Commit
4c74277
·
1 Parent(s): 0c10fab

added May 2024 model

Browse files
cms/2024_05_16_attn_model21M/checkpoint-25-17.631161.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d8a593d279ca3fd58fe3836d49145f634238fc4a7cf808c5df580c4a0bdefb5
3
+ size 255895346
cms/2024_05_16_attn_model21M/model_fp32.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fddd236376ab0a119f2db393b19e5fd20269f03ab60525b961ec5157d2a94ce5
3
+ size 85479699
cms/2024_05_16_attn_model21M/overridden_config.yaml ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ backend: pytorch
2
+ checkpoint_freq: 1
3
+ comet: true
4
+ comet_name: particleflow-pt
5
+ comet_offline: false
6
+ comet_step_freq: 10
7
+ config: parameters/pytorch/pyg-cms.yaml
8
+ conv_type: attention
9
+ data_dir: /scratch/persistent/joosep/tensorflow_datasets
10
+ dataset: cms
11
+ dtype: bfloat16
12
+ gpu_batch_multiplier: 40
13
+ gpus: 1
14
+ load: experiments/pyg-cms_20240430_094836_751206/checkpoints/checkpoint-12-17.861217.pth
15
+ lr: 5.0e-05
16
+ lr_schedule: cosinedecay
17
+ lr_schedule_config:
18
+ onecycle:
19
+ pct_start: 0.3
20
+ model:
21
+ attention:
22
+ activation: relu
23
+ attention_type: flash
24
+ conv_type: attention
25
+ dropout_conv_id_ff: 0.0
26
+ dropout_conv_id_mha: 0.0
27
+ dropout_conv_reg_ff: 0.0
28
+ dropout_conv_reg_mha: 0.0
29
+ dropout_ff: 0.0
30
+ head_dim: 16
31
+ num_convs: 6
32
+ num_heads: 32
33
+ cos_phi_mode: linear
34
+ energy_mode: linear
35
+ eta_mode: linear
36
+ gnn_lsh:
37
+ activation: elu
38
+ bin_size: 640
39
+ conv_type: gnn_lsh
40
+ distance_dim: 128
41
+ dropout_ff: 0.0
42
+ embedding_dim: 512
43
+ ffn_dist_hidden_dim: 128
44
+ ffn_dist_num_layers: 2
45
+ layernorm: true
46
+ max_num_bins: 200
47
+ num_convs: 3
48
+ num_node_messages: 2
49
+ width: 512
50
+ input_encoding: joint
51
+ learned_representation_mode: last
52
+ mamba:
53
+ activation: elu
54
+ conv_type: mamba
55
+ d_conv: 4
56
+ d_state: 32
57
+ dropout_ff: 0.0
58
+ embedding_dim: 1024
59
+ expand: 2
60
+ num_convs: 4
61
+ width: 1024
62
+ pt_mode: linear
63
+ sin_phi_mode: linear
64
+ trainable: all
65
+ ntest: null
66
+ ntrain: null
67
+ num_epochs: 100
68
+ num_workers: 4
69
+ nvalid: null
70
+ patience: 20
71
+ prefetch_factor: 50
72
+ ray_train: false
73
+ raytune:
74
+ asha:
75
+ brackets: 1
76
+ grace_period: 10
77
+ max_t: 200
78
+ reduction_factor: 4
79
+ default_metric: val_loss
80
+ default_mode: min
81
+ hyperband:
82
+ max_t: 200
83
+ reduction_factor: 4
84
+ hyperopt:
85
+ n_random_steps: 10
86
+ local_dir: null
87
+ nevergrad:
88
+ n_random_steps: 10
89
+ sched: asha
90
+ search_alg: hyperopt
91
+ sort_data: true
92
+ test_dataset:
93
+ cms_pf_qcd:
94
+ version: 1.7.1
95
+ cms_pf_ttbar:
96
+ version: 1.7.1
97
+ cms_pf_ztt:
98
+ version: 1.7.1
99
+ test_datasets: []
100
+ train: true
101
+ train_dataset:
102
+ cms:
103
+ physical:
104
+ batch_size: 1
105
+ samples:
106
+ cms_pf_qcd:
107
+ version: 1.7.1
108
+ cms_pf_ttbar:
109
+ version: 1.7.1
110
+ cms_pf_ztt:
111
+ version: 1.7.1
112
+ val_freq: null
113
+ valid_dataset:
114
+ cms:
115
+ physical:
116
+ batch_size: 1
117
+ samples:
118
+ cms_pf_qcd:
119
+ version: 1.7.1
120
+ cms_pf_ttbar:
121
+ version: 1.7.1
122
+ cms_pf_ztt:
123
+ version: 1.7.1