File size: 2,582 Bytes
1742c16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
backend: pytorch
checkpoint_freq: null
comet: false
comet_name: particleflow-pt
comet_offline: false
comet_step_freq: 100
config: parameters/pytorch/pyg-clic.yaml
conv_type: attention
data_dir: /scratch/persistent/joosep/tensorflow_datasets
dataset: clic
dtype: bfloat16
gpu_batch_multiplier: 100
gpus: 1
load: experiments/pyg-clic_20241001_215132_345408/checkpoints/checkpoint-26-2.004527.pth
lr: 0.0001
lr_schedule: cosinedecay
lr_schedule_config:
  onecycle:
    pct_start: 0.3
make_plots: true
model:
  attention:
    activation: gelu
    attention_type: math
    conv_type: attention
    dropout_conv_id_ff: 0.0
    dropout_conv_id_mha: 0.0
    dropout_conv_reg_ff: 0.1
    dropout_conv_reg_mha: 0.1
    dropout_ff: 0.1
    head_dim: 32
    num_convs: 4
    num_heads: 32
    use_pre_layernorm: true
  cos_phi_mode: linear
  energy_mode: direct-elemtype-split
  eta_mode: linear
  gnn_lsh:
    activation: elu
    bin_size: 32
    conv_type: gnn_lsh
    distance_dim: 128
    embedding_dim: 512
    ffn_dist_hidden_dim: 128
    ffn_dist_num_layers: 2
    layernorm: true
    max_num_bins: 200
    num_convs: 8
    num_node_messages: 2
    width: 512
  input_encoding: split
  learned_representation_mode: last
  mamba:
    activation: elu
    conv_type: mamba
    d_conv: 4
    d_state: 16
    dropout: 0.0
    embedding_dim: 128
    expand: 2
    num_convs: 2
    num_heads: 2
    width: 128
  pt_mode: direct-elemtype-split
  sin_phi_mode: linear
  trainable: all
ntest: null
ntrain: null
num_epochs: 100
num_workers: 8
nvalid: null
patience: 20
prefetch_factor: 10
ray_train: false
raytune:
  asha:
    brackets: 1
    grace_period: 10
    max_t: 200
    reduction_factor: 4
  default_metric: val_loss
  default_mode: min
  hyperband:
    max_t: 200
    reduction_factor: 4
  hyperopt:
    n_random_steps: 10
  local_dir: null
  nevergrad:
    n_random_steps: 10
  sched: null
  search_alg: null
save_attention: true
sort_data: false
test: true
test_dataset:
  clic_edm_qq_pf:
    version: 2.2.0
  clic_edm_ttbar_pf:
    version: 2.2.0
  clic_edm_ww_fullhad_pf:
    version: 2.2.0
test_datasets: []
train_dataset:
  clic:
    physical:
      batch_size: 1
      samples:
        clic_edm_qq_pf:
          version: 2.2.0
        clic_edm_ttbar_pf:
          version: 2.2.0
        clic_edm_ww_fullhad_pf:
          version: 2.2.0
val_freq: null
valid_dataset:
  clic:
    physical:
      batch_size: 1
      samples:
        clic_edm_qq_pf:
          version: 2.2.0
        clic_edm_ttbar_pf:
          version: 2.2.0
        clic_edm_ww_fullhad_pf:
          version: 2.2.0