{"train/loss": 12.6508, "train/grad_norm": 331.41705322265625, "train/learning_rate": 0.0, "train/epoch": 1.0, "train/global_step": 10, "_timestamp": 1721466687.523135, "_runtime": 70.82246494293213, "_step": 21, "train_runtime": 51.2027, "train_samples_per_second": 0.195, "train_steps_per_second": 0.195, "total_flos": 0.0, "train_loss": 13.596752548217774}