{"train/loss": 15.5728, "train/grad_norm": 719.929443359375, "train/learning_rate": 0.0, "train/epoch": 1.0, "train/global_step": 10, "_timestamp": 1721444757.2328217, "_runtime": 77.77619981765747, "_step": 21, "train_runtime": 59.3281, "train_samples_per_second": 0.169, "train_steps_per_second": 0.169, "total_flos": 0.0, "train_loss": 15.215684986114502}