[ { "step": 30, "loss": 1.4062, "type": "train", "grad_norm": 8.482284545898438, "learning_rate": 1.8347107438016528e-06, "epoch": 0.2475502836513667 }, { "step": 30, "loss": 1.1563361883163452, "type": "eval", "eval_runtime": 238.6436, "eval_samples_per_second": 2.891, "eval_steps_per_second": 2.891, "epoch": 0.2475502836513667 }, { "step": 60, "loss": 1.1662, "type": "train", "grad_norm": 6.751504898071289, "learning_rate": 1.6694214876033058e-06, "epoch": 0.4951005673027334 }, { "step": 60, "loss": 1.0914983749389648, "type": "eval", "eval_runtime": 248.6513, "eval_samples_per_second": 2.775, "eval_steps_per_second": 2.775, "epoch": 0.4951005673027334 }, { "step": 90, "loss": 1.1109, "type": "train", "grad_norm": 4.135236740112305, "learning_rate": 1.5041322314049587e-06, "epoch": 0.7426508509541001 }, { "step": 90, "loss": 1.0496803522109985, "type": "eval", "eval_runtime": 239.5109, "eval_samples_per_second": 2.881, "eval_steps_per_second": 2.881, "epoch": 0.7426508509541001 }, { "step": 120, "loss": 1.0639, "type": "train", "grad_norm": 5.695450782775879, "learning_rate": 1.3388429752066116e-06, "epoch": 0.9902011346054668 }, { "step": 120, "loss": 1.0101323127746582, "type": "eval", "eval_runtime": 234.2087, "eval_samples_per_second": 2.946, "eval_steps_per_second": 2.946, "epoch": 0.9902011346054668 }, { "step": 150, "loss": 0.9416, "type": "train", "grad_norm": 7.571918487548828, "learning_rate": 1.1735537190082645e-06, "epoch": 1.2377514182568334 }, { "step": 150, "loss": 1.0040597915649414, "type": "eval", "eval_runtime": 247.1746, "eval_samples_per_second": 2.792, "eval_steps_per_second": 2.792, "epoch": 1.2377514182568334 }, { "step": 180, "loss": 0.925, "type": "train", "grad_norm": 4.53548002243042, "learning_rate": 1.0082644628099172e-06, "epoch": 1.4853017019082002 }, { "step": 180, "loss": 0.9938931465148926, "type": "eval", "eval_runtime": 234.8252, "eval_samples_per_second": 2.938, "eval_steps_per_second": 2.938, "epoch": 1.4853017019082002 }, { "step": 210, "loss": 0.9033, "type": "train", "grad_norm": 5.361794948577881, "learning_rate": 8.429752066115701e-07, "epoch": 1.7328519855595668 }, { "step": 210, "loss": 0.9868502020835876, "type": "eval", "eval_runtime": 234.8772, "eval_samples_per_second": 2.938, "eval_steps_per_second": 2.938, "epoch": 1.7328519855595668 }, { "step": 240, "loss": 0.9087, "type": "train", "grad_norm": 4.525313377380371, "learning_rate": 6.776859504132231e-07, "epoch": 1.9804022692109333 }, { "step": 240, "loss": 0.9824326634407043, "type": "eval", "eval_runtime": 234.8918, "eval_samples_per_second": 2.938, "eval_steps_per_second": 2.938, "epoch": 1.9804022692109333 }, { "step": 270, "loss": 0.8166, "type": "train", "grad_norm": 4.654973030090332, "learning_rate": 5.12396694214876e-07, "epoch": 2.2279525528623 }, { "step": 270, "loss": 0.9874295592308044, "type": "eval", "eval_runtime": 243.7953, "eval_samples_per_second": 2.83, "eval_steps_per_second": 2.83, "epoch": 2.2279525528623 }, { "step": 300, "loss": 0.8226, "type": "train", "grad_norm": 5.9346442222595215, "learning_rate": 3.471074380165289e-07, "epoch": 2.475502836513667 }, { "step": 300, "loss": 0.9854046106338501, "type": "eval", "eval_runtime": 243.3847, "eval_samples_per_second": 2.835, "eval_steps_per_second": 2.835, "epoch": 2.475502836513667 }, { "step": 330, "loss": 0.8289, "type": "train", "grad_norm": 4.637845516204834, "learning_rate": 1.818181818181818e-07, "epoch": 2.7230531201650336 }, { "step": 330, "loss": 0.9841367602348328, "type": "eval", "eval_runtime": 242.6797, "eval_samples_per_second": 2.843, "eval_steps_per_second": 2.843, "epoch": 2.7230531201650336 }, { "step": 360, "loss": 0.8137, "type": "train", "grad_norm": 4.132049560546875, "learning_rate": 1.652892561983471e-08, "epoch": 2.9706034038164004 }, { "step": 360, "loss": 0.9834251403808594, "type": "eval", "eval_runtime": 242.7445, "eval_samples_per_second": 2.842, "eval_steps_per_second": 2.842, "epoch": 2.9706034038164004 }, { "step": 363, "train_runtime": 22924.4691, "train_samples_per_second": 0.507, "train_steps_per_second": 0.016, "total_flos": 7877706776576.0, "train_loss": 0.9745982139892158, "epoch": 2.9953584321815367 } ]