{"train/loss": 0.0645, "train/grad_norm": 2.141892433166504, "train/learning_rate": 0.0, "train/epoch": 20.0, "train/global_step": 220, "_timestamp": 1717452024.9213681, "_runtime": 14735.882298231125, "_step": 50, "train_runtime": 4192.5433, "train_samples_per_second": 0.196, "train_steps_per_second": 0.052, "total_flos": 4.4695391805696e+16, "train_loss": 0.23985157879916105} |