|
[ |
|
{ |
|
"loss": 1.8184, |
|
"grad_norm": 0.774284303188324, |
|
"learning_rate": 8.923190911336132e-05, |
|
"epoch": 0.2218976306523778, |
|
"step": 451 |
|
}, |
|
{ |
|
"eval_loss": 1.749639630317688, |
|
"eval_runtime": 1083.3199, |
|
"eval_samples_per_second": 26.684, |
|
"eval_steps_per_second": 6.671, |
|
"epoch": 0.2218976306523778, |
|
"step": 451 |
|
}, |
|
{ |
|
"loss": 1.7064, |
|
"grad_norm": 0.7226008772850037, |
|
"learning_rate": 7.809335638429242e-05, |
|
"epoch": 0.4437952613047556, |
|
"step": 902 |
|
}, |
|
{ |
|
"eval_loss": 1.6820720434188843, |
|
"eval_runtime": 1087.8354, |
|
"eval_samples_per_second": 26.573, |
|
"eval_steps_per_second": 6.643, |
|
"epoch": 0.4437952613047556, |
|
"step": 902 |
|
}, |
|
{ |
|
"loss": 1.6623, |
|
"grad_norm": 0.7210889458656311, |
|
"learning_rate": 6.695480365522352e-05, |
|
"epoch": 0.6656928919571334, |
|
"step": 1353 |
|
}, |
|
{ |
|
"eval_loss": 1.6484241485595703, |
|
"eval_runtime": 1084.0066, |
|
"eval_samples_per_second": 26.667, |
|
"eval_steps_per_second": 6.667, |
|
"epoch": 0.6656928919571334, |
|
"step": 1353 |
|
}, |
|
{ |
|
"loss": 1.6362, |
|
"grad_norm": 0.7425829172134399, |
|
"learning_rate": 5.581625092615461e-05, |
|
"epoch": 0.8875905226095112, |
|
"step": 1804 |
|
}, |
|
{ |
|
"eval_loss": 1.6261749267578125, |
|
"eval_runtime": 1085.6726, |
|
"eval_samples_per_second": 26.626, |
|
"eval_steps_per_second": 6.657, |
|
"epoch": 0.8875905226095112, |
|
"step": 1804 |
|
}, |
|
{ |
|
"loss": 1.5914, |
|
"grad_norm": 0.7476176023483276, |
|
"learning_rate": 4.4677698197085704e-05, |
|
"epoch": 1.109488153261889, |
|
"step": 2255 |
|
}, |
|
{ |
|
"eval_loss": 1.6124544143676758, |
|
"eval_runtime": 1084.6953, |
|
"eval_samples_per_second": 26.65, |
|
"eval_steps_per_second": 6.663, |
|
"epoch": 1.109488153261889, |
|
"step": 2255 |
|
}, |
|
{ |
|
"loss": 1.5557, |
|
"grad_norm": 0.7473255395889282, |
|
"learning_rate": 3.3539145468016795e-05, |
|
"epoch": 1.3313857839142669, |
|
"step": 2706 |
|
}, |
|
{ |
|
"eval_loss": 1.6011990308761597, |
|
"eval_runtime": 1084.5737, |
|
"eval_samples_per_second": 26.653, |
|
"eval_steps_per_second": 6.663, |
|
"epoch": 1.3313857839142669, |
|
"step": 2706 |
|
}, |
|
{ |
|
"loss": 1.5468, |
|
"grad_norm": 0.750347375869751, |
|
"learning_rate": 2.240059273894789e-05, |
|
"epoch": 1.5532834145666445, |
|
"step": 3157 |
|
}, |
|
{ |
|
"eval_loss": 1.592125415802002, |
|
"eval_runtime": 1086.1382, |
|
"eval_samples_per_second": 26.614, |
|
"eval_steps_per_second": 6.654, |
|
"epoch": 1.5532834145666445, |
|
"step": 3157 |
|
}, |
|
{ |
|
"loss": 1.539, |
|
"grad_norm": 0.7452530860900879, |
|
"learning_rate": 1.1262040009878982e-05, |
|
"epoch": 1.7751810452190224, |
|
"step": 3608 |
|
}, |
|
{ |
|
"eval_loss": 1.5855711698532104, |
|
"eval_runtime": 1084.064, |
|
"eval_samples_per_second": 26.665, |
|
"eval_steps_per_second": 6.667, |
|
"epoch": 1.7751810452190224, |
|
"step": 3608 |
|
}, |
|
{ |
|
"loss": 1.5373, |
|
"grad_norm": 0.7762174606323242, |
|
"learning_rate": 1.2348728081007656e-07, |
|
"epoch": 1.9970786758714003, |
|
"step": 4059 |
|
}, |
|
{ |
|
"eval_loss": 1.5827687978744507, |
|
"eval_runtime": 1086.4478, |
|
"eval_samples_per_second": 26.607, |
|
"eval_steps_per_second": 6.652, |
|
"epoch": 1.9970786758714003, |
|
"step": 4059 |
|
}, |
|
{ |
|
"train_runtime": 69830.4547, |
|
"train_samples_per_second": 7.451, |
|
"train_steps_per_second": 0.058, |
|
"total_flos": 9.47865607059515e+18, |
|
"train_loss": 1.5162640926171476, |
|
"epoch": 1.9995387382954841, |
|
"step": 4064 |
|
} |
|
] |