|
[ |
|
{ |
|
"step": 30, |
|
"loss": 1.4062, |
|
"type": "train", |
|
"grad_norm": 8.482284545898438, |
|
"learning_rate": 1.8347107438016528e-06, |
|
"epoch": 0.2475502836513667 |
|
}, |
|
{ |
|
"step": 30, |
|
"loss": 1.1563361883163452, |
|
"type": "eval", |
|
"eval_runtime": 238.6436, |
|
"eval_samples_per_second": 2.891, |
|
"eval_steps_per_second": 2.891, |
|
"epoch": 0.2475502836513667 |
|
}, |
|
{ |
|
"step": 60, |
|
"loss": 1.1662, |
|
"type": "train", |
|
"grad_norm": 6.751504898071289, |
|
"learning_rate": 1.6694214876033058e-06, |
|
"epoch": 0.4951005673027334 |
|
}, |
|
{ |
|
"step": 60, |
|
"loss": 1.0914983749389648, |
|
"type": "eval", |
|
"eval_runtime": 248.6513, |
|
"eval_samples_per_second": 2.775, |
|
"eval_steps_per_second": 2.775, |
|
"epoch": 0.4951005673027334 |
|
}, |
|
{ |
|
"step": 90, |
|
"loss": 1.1109, |
|
"type": "train", |
|
"grad_norm": 4.135236740112305, |
|
"learning_rate": 1.5041322314049587e-06, |
|
"epoch": 0.7426508509541001 |
|
}, |
|
{ |
|
"step": 90, |
|
"loss": 1.0496803522109985, |
|
"type": "eval", |
|
"eval_runtime": 239.5109, |
|
"eval_samples_per_second": 2.881, |
|
"eval_steps_per_second": 2.881, |
|
"epoch": 0.7426508509541001 |
|
}, |
|
{ |
|
"step": 120, |
|
"loss": 1.0639, |
|
"type": "train", |
|
"grad_norm": 5.695450782775879, |
|
"learning_rate": 1.3388429752066116e-06, |
|
"epoch": 0.9902011346054668 |
|
}, |
|
{ |
|
"step": 120, |
|
"loss": 1.0101323127746582, |
|
"type": "eval", |
|
"eval_runtime": 234.2087, |
|
"eval_samples_per_second": 2.946, |
|
"eval_steps_per_second": 2.946, |
|
"epoch": 0.9902011346054668 |
|
}, |
|
{ |
|
"step": 150, |
|
"loss": 0.9416, |
|
"type": "train", |
|
"grad_norm": 7.571918487548828, |
|
"learning_rate": 1.1735537190082645e-06, |
|
"epoch": 1.2377514182568334 |
|
}, |
|
{ |
|
"step": 150, |
|
"loss": 1.0040597915649414, |
|
"type": "eval", |
|
"eval_runtime": 247.1746, |
|
"eval_samples_per_second": 2.792, |
|
"eval_steps_per_second": 2.792, |
|
"epoch": 1.2377514182568334 |
|
}, |
|
{ |
|
"step": 180, |
|
"loss": 0.925, |
|
"type": "train", |
|
"grad_norm": 4.53548002243042, |
|
"learning_rate": 1.0082644628099172e-06, |
|
"epoch": 1.4853017019082002 |
|
}, |
|
{ |
|
"step": 180, |
|
"loss": 0.9938931465148926, |
|
"type": "eval", |
|
"eval_runtime": 234.8252, |
|
"eval_samples_per_second": 2.938, |
|
"eval_steps_per_second": 2.938, |
|
"epoch": 1.4853017019082002 |
|
}, |
|
{ |
|
"step": 210, |
|
"loss": 0.9033, |
|
"type": "train", |
|
"grad_norm": 5.361794948577881, |
|
"learning_rate": 8.429752066115701e-07, |
|
"epoch": 1.7328519855595668 |
|
}, |
|
{ |
|
"step": 210, |
|
"loss": 0.9868502020835876, |
|
"type": "eval", |
|
"eval_runtime": 234.8772, |
|
"eval_samples_per_second": 2.938, |
|
"eval_steps_per_second": 2.938, |
|
"epoch": 1.7328519855595668 |
|
}, |
|
{ |
|
"step": 240, |
|
"loss": 0.9087, |
|
"type": "train", |
|
"grad_norm": 4.525313377380371, |
|
"learning_rate": 6.776859504132231e-07, |
|
"epoch": 1.9804022692109333 |
|
}, |
|
{ |
|
"step": 240, |
|
"loss": 0.9824326634407043, |
|
"type": "eval", |
|
"eval_runtime": 234.8918, |
|
"eval_samples_per_second": 2.938, |
|
"eval_steps_per_second": 2.938, |
|
"epoch": 1.9804022692109333 |
|
}, |
|
{ |
|
"step": 270, |
|
"loss": 0.8166, |
|
"type": "train", |
|
"grad_norm": 4.654973030090332, |
|
"learning_rate": 5.12396694214876e-07, |
|
"epoch": 2.2279525528623 |
|
}, |
|
{ |
|
"step": 270, |
|
"loss": 0.9874295592308044, |
|
"type": "eval", |
|
"eval_runtime": 243.7953, |
|
"eval_samples_per_second": 2.83, |
|
"eval_steps_per_second": 2.83, |
|
"epoch": 2.2279525528623 |
|
}, |
|
{ |
|
"step": 300, |
|
"loss": 0.8226, |
|
"type": "train", |
|
"grad_norm": 5.9346442222595215, |
|
"learning_rate": 3.471074380165289e-07, |
|
"epoch": 2.475502836513667 |
|
}, |
|
{ |
|
"step": 300, |
|
"loss": 0.9854046106338501, |
|
"type": "eval", |
|
"eval_runtime": 243.3847, |
|
"eval_samples_per_second": 2.835, |
|
"eval_steps_per_second": 2.835, |
|
"epoch": 2.475502836513667 |
|
}, |
|
{ |
|
"step": 330, |
|
"loss": 0.8289, |
|
"type": "train", |
|
"grad_norm": 4.637845516204834, |
|
"learning_rate": 1.818181818181818e-07, |
|
"epoch": 2.7230531201650336 |
|
}, |
|
{ |
|
"step": 330, |
|
"loss": 0.9841367602348328, |
|
"type": "eval", |
|
"eval_runtime": 242.6797, |
|
"eval_samples_per_second": 2.843, |
|
"eval_steps_per_second": 2.843, |
|
"epoch": 2.7230531201650336 |
|
}, |
|
{ |
|
"step": 360, |
|
"loss": 0.8137, |
|
"type": "train", |
|
"grad_norm": 4.132049560546875, |
|
"learning_rate": 1.652892561983471e-08, |
|
"epoch": 2.9706034038164004 |
|
}, |
|
{ |
|
"step": 360, |
|
"loss": 0.9834251403808594, |
|
"type": "eval", |
|
"eval_runtime": 242.7445, |
|
"eval_samples_per_second": 2.842, |
|
"eval_steps_per_second": 2.842, |
|
"epoch": 2.9706034038164004 |
|
}, |
|
{ |
|
"step": 363, |
|
"train_runtime": 22924.4691, |
|
"train_samples_per_second": 0.507, |
|
"train_steps_per_second": 0.016, |
|
"total_flos": 7877706776576.0, |
|
"train_loss": 0.9745982139892158, |
|
"epoch": 2.9953584321815367 |
|
} |
|
] |