--- {} --- # TRAINING ARGS: {'per_device_train_batch_size': 16, 'warmup_ratio': 0.1, 'num_train_epochs': 1} # OPTIMIZER ARGS: {'lr': 0.001, 'eps': (1e-30, 0.001), 'clip_threshold': 1.0, 'decay_rate': 0.0, 'beta1': None, 'weight_decay': 0.0, 'scale_parameter': False, 'relative_step': False, 'warmup_init': False, 'differentiable': False} # DEVICE: NVIDIA A100 80GB PCIe