File size: 2,582 Bytes
1742c16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
backend: pytorch
checkpoint_freq: null
comet: false
comet_name: particleflow-pt
comet_offline: false
comet_step_freq: 100
config: parameters/pytorch/pyg-clic.yaml
conv_type: attention
data_dir: /scratch/persistent/joosep/tensorflow_datasets
dataset: clic
dtype: bfloat16
gpu_batch_multiplier: 100
gpus: 1
load: experiments/pyg-clic_20241001_215132_345408/checkpoints/checkpoint-26-2.004527.pth
lr: 0.0001
lr_schedule: cosinedecay
lr_schedule_config:
onecycle:
pct_start: 0.3
make_plots: true
model:
attention:
activation: gelu
attention_type: math
conv_type: attention
dropout_conv_id_ff: 0.0
dropout_conv_id_mha: 0.0
dropout_conv_reg_ff: 0.1
dropout_conv_reg_mha: 0.1
dropout_ff: 0.1
head_dim: 32
num_convs: 4
num_heads: 32
use_pre_layernorm: true
cos_phi_mode: linear
energy_mode: direct-elemtype-split
eta_mode: linear
gnn_lsh:
activation: elu
bin_size: 32
conv_type: gnn_lsh
distance_dim: 128
embedding_dim: 512
ffn_dist_hidden_dim: 128
ffn_dist_num_layers: 2
layernorm: true
max_num_bins: 200
num_convs: 8
num_node_messages: 2
width: 512
input_encoding: split
learned_representation_mode: last
mamba:
activation: elu
conv_type: mamba
d_conv: 4
d_state: 16
dropout: 0.0
embedding_dim: 128
expand: 2
num_convs: 2
num_heads: 2
width: 128
pt_mode: direct-elemtype-split
sin_phi_mode: linear
trainable: all
ntest: null
ntrain: null
num_epochs: 100
num_workers: 8
nvalid: null
patience: 20
prefetch_factor: 10
ray_train: false
raytune:
asha:
brackets: 1
grace_period: 10
max_t: 200
reduction_factor: 4
default_metric: val_loss
default_mode: min
hyperband:
max_t: 200
reduction_factor: 4
hyperopt:
n_random_steps: 10
local_dir: null
nevergrad:
n_random_steps: 10
sched: null
search_alg: null
save_attention: true
sort_data: false
test: true
test_dataset:
clic_edm_qq_pf:
version: 2.2.0
clic_edm_ttbar_pf:
version: 2.2.0
clic_edm_ww_fullhad_pf:
version: 2.2.0
test_datasets: []
train_dataset:
clic:
physical:
batch_size: 1
samples:
clic_edm_qq_pf:
version: 2.2.0
clic_edm_ttbar_pf:
version: 2.2.0
clic_edm_ww_fullhad_pf:
version: 2.2.0
val_freq: null
valid_dataset:
clic:
physical:
batch_size: 1
samples:
clic_edm_qq_pf:
version: 2.2.0
clic_edm_ttbar_pf:
version: 2.2.0
clic_edm_ww_fullhad_pf:
version: 2.2.0
|