lama / configs /training /trainer /any_gpu_large_ssim_ddp_final.yaml
AK391
files
d380b77
raw
history blame
916 Bytes
# @package _group_
kwargs:
gpus: -1
accelerator: ddp
max_epochs: 40
gradient_clip_val: 1
log_gpu_memory: None # set to min_max or all for debug
limit_train_batches: 25000
val_check_interval: ${trainer.kwargs.limit_train_batches}
# fast_dev_run: True # uncomment for faster debug
# track_grad_norm: 2 # uncomment to track L2 gradients norm
log_every_n_steps: 250
precision: 32
# precision: 16
# amp_backend: native
# amp_level: O1
# resume_from_checkpoint: path # override via command line trainer.resume_from_checkpoint=path_to_checkpoint
terminate_on_nan: False
# auto_scale_batch_size: True # uncomment to find largest batch size
check_val_every_n_epoch: 1
num_sanity_val_steps: 8
# limit_val_batches: 1000000
replace_sampler_ddp: False
checkpoint_kwargs:
verbose: True
save_top_k: 5
save_last: True
period: 1
monitor: val_ssim_fid100_f1_total_mean
mode: max