mode: pt | |
device: gpu | |
precision: bf16 | |
eval_only: false | |
predict_only: false | |
seed: 2137 | |
model: | |
klass: hf_t5 | |
name: /home/jovyan/bert-train/nanot5/templates/base_slavic_120k_sub | |
overwrite: | |
dropout_rate: 0.1 | |
add_config: | |
is_bf16: true | |
checkpoint_path: '' | |
random_init: true | |
compile: false | |
data: | |
input_length: 512 | |
mlm_probability: 0.15 | |
mean_noise_span_length: 3.0 | |
num_workers: 8 | |
optim: | |
name: adafactor | |
base_lr: 0.02 | |
batch_size: 128 | |
total_steps: 120000 | |
epochs: -1 | |
warmup_steps: 10000 | |
lr_scheduler: legacy | |
weight_decay: 0.0 | |
grad_clip: 1.0 | |
grad_acc: 8 | |
final_cosine: 1.0e-05 | |
eval: | |
every_steps: 5000 | |
steps: 500 | |
checkpoint: | |
every_steps: 10000 | |
logging: | |
neptune: false | |
neptune_creds: | |
project: null | |
api_token: null | |
tags: '' | |
every_steps: 100 | |
grad_l2: true | |
weights_l2: true | |