File size: 834 Bytes
dcf87c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
mode: pt
device: gpu
precision: bf16
eval_only: false
predict_only: false
seed: 2137
model:
klass: hf_t5
name: /home/jovyan/bert-train/nanot5/templates/base_slavic_120k_sub
overwrite:
dropout_rate: 0.1
add_config:
is_bf16: true
checkpoint_path: ''
random_init: true
compile: false
data:
input_length: 512
mlm_probability: 0.15
mean_noise_span_length: 3.0
num_workers: 8
optim:
name: adafactor
base_lr: 0.02
batch_size: 128
total_steps: 120000
epochs: -1
warmup_steps: 10000
lr_scheduler: legacy
weight_decay: 0.0
grad_clip: 1.0
grad_acc: 8
final_cosine: 1.0e-05
eval:
every_steps: 5000
steps: 500
checkpoint:
every_steps: 10000
logging:
neptune: false
neptune_creds:
project: null
api_token: null
tags: ''
every_steps: 100
grad_l2: true
weights_l2: true
|