|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.995722277064824, |
|
"global_step": 569, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0, |
|
"loss": 1.6786, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.998088142969587e-06, |
|
"loss": 1.64, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.985017573980262e-06, |
|
"loss": 1.06, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.956012654497073e-06, |
|
"loss": 0.486, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.911850333228427e-06, |
|
"loss": 0.2374, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.865696363132769e-06, |
|
"loss": 0.1837, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.7946219754852e-06, |
|
"loss": 0.1587, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.708883781112711e-06, |
|
"loss": 0.1517, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.629910009876223e-06, |
|
"loss": 0.1553, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.51846738818602e-06, |
|
"loss": 0.1469, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.39320554720971e-06, |
|
"loss": 0.145, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 0.05455470830202103, |
|
"eval_runtime": 85.5684, |
|
"eval_samples_per_second": 11.383, |
|
"eval_steps_per_second": 1.426, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.297503309182422e-06, |
|
"loss": 0.1549, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.149650063920841e-06, |
|
"loss": 0.1435, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 8.989105585268073e-06, |
|
"loss": 0.1414, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.816360880276967e-06, |
|
"loss": 0.1421, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.631944269006895e-06, |
|
"loss": 0.1391, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.436419768716853e-06, |
|
"loss": 0.1386, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.230385368882732e-06, |
|
"loss": 0.1382, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.014471202314443e-06, |
|
"loss": 0.1382, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.789337617966275e-06, |
|
"loss": 0.1357, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 7.5794033237905e-06, |
|
"loss": 0.1408, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 0.048922911286354065, |
|
"eval_runtime": 85.5902, |
|
"eval_samples_per_second": 11.38, |
|
"eval_steps_per_second": 1.425, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 7.338671397287409e-06, |
|
"loss": 0.1341, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 7.1158757783214904e-06, |
|
"loss": 0.1391, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 6.862201608610134e-06, |
|
"loss": 0.1343, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.602832106793113e-06, |
|
"loss": 0.1316, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.338560525031794e-06, |
|
"loss": 0.1316, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.0701951079422615e-06, |
|
"loss": 0.1313, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.825844147403353e-06, |
|
"loss": 0.1347, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.579447229838992e-06, |
|
"loss": 0.1337, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5.3040158758857886e-06, |
|
"loss": 0.1315, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5.027654723907197e-06, |
|
"loss": 0.1309, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 0.04729650914669037, |
|
"eval_runtime": 85.6206, |
|
"eval_samples_per_second": 11.376, |
|
"eval_steps_per_second": 1.425, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.751208993096637e-06, |
|
"loss": 0.1307, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.475524161322288e-06, |
|
"loss": 0.1305, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.2014433793290435e-06, |
|
"loss": 0.1298, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.92980489205774e-06, |
|
"loss": 0.1308, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.6614394749682057e-06, |
|
"loss": 0.13, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.3971678932068875e-06, |
|
"loss": 0.1296, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.1377983913898673e-06, |
|
"loss": 0.1289, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.8841242216785116e-06, |
|
"loss": 0.129, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.6369212177078306e-06, |
|
"loss": 0.1291, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.3969454217874325e-06, |
|
"loss": 0.1277, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 0.046792980283498764, |
|
"eval_runtime": 85.593, |
|
"eval_samples_per_second": 11.379, |
|
"eval_steps_per_second": 1.425, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.164930772631996e-06, |
|
"loss": 0.1276, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.94158686069306e-06, |
|
"loss": 0.1275, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.7275967579572427e-06, |
|
"loss": 0.1296, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.5236149288481428e-06, |
|
"loss": 0.1275, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.3302652286212397e-06, |
|
"loss": 0.1287, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.148138995373459e-06, |
|
"loss": 0.1267, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.777932415027608e-07, |
|
"loss": 0.1282, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.197489501489924e-07, |
|
"loss": 0.1249, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 6.744894818261311e-07, |
|
"loss": 0.127, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 5.424590961190474e-07, |
|
"loss": 0.1253, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 0.046507786959409714, |
|
"eval_runtime": 85.5909, |
|
"eval_samples_per_second": 11.38, |
|
"eval_steps_per_second": 1.425, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 4.240615929660341e-07, |
|
"loss": 0.1279, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.1965907768255035e-07, |
|
"loss": 0.128, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.2957085350325092e-07, |
|
"loss": 0.1288, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.540724450293035e-07, |
|
"loss": 0.1264, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.339475556770006e-08, |
|
"loss": 0.1274, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.77233609397082e-08, |
|
"loss": 0.1277, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 569, |
|
"total_flos": 1.6024405879092675e+18, |
|
"train_loss": 0.18567189055712655, |
|
"train_runtime": 15471.5941, |
|
"train_samples_per_second": 4.707, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"max_steps": 569, |
|
"num_train_epochs": 4, |
|
"total_flos": 1.6024405879092675e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|