|
{ |
|
"best_metric": 3.029510498046875, |
|
"best_model_checkpoint": "trained_models/microsoftDialoGPTmedium_crd3/checkpoint-9912", |
|
"epoch": 2.957040572792363, |
|
"eval_steps": 168, |
|
"global_step": 9912, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.949880668257757e-05, |
|
"loss": 3.805, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 3.3926753997802734, |
|
"eval_runtime": 142.7844, |
|
"eval_samples_per_second": 29.653, |
|
"eval_steps_per_second": 3.712, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.899761336515513e-05, |
|
"loss": 3.3486, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 3.2977957725524902, |
|
"eval_runtime": 142.7066, |
|
"eval_samples_per_second": 29.669, |
|
"eval_steps_per_second": 3.714, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.84964200477327e-05, |
|
"loss": 3.2539, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 3.2512104511260986, |
|
"eval_runtime": 142.7113, |
|
"eval_samples_per_second": 29.668, |
|
"eval_steps_per_second": 3.714, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.7995226730310264e-05, |
|
"loss": 3.2009, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 3.221975564956665, |
|
"eval_runtime": 142.7296, |
|
"eval_samples_per_second": 29.664, |
|
"eval_steps_per_second": 3.713, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.749403341288783e-05, |
|
"loss": 3.1685, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 3.197237014770508, |
|
"eval_runtime": 142.735, |
|
"eval_samples_per_second": 29.663, |
|
"eval_steps_per_second": 3.713, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.6992840095465395e-05, |
|
"loss": 3.1332, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 3.1779494285583496, |
|
"eval_runtime": 142.7443, |
|
"eval_samples_per_second": 29.661, |
|
"eval_steps_per_second": 3.713, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.649164677804296e-05, |
|
"loss": 3.1231, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 3.163996696472168, |
|
"eval_runtime": 142.7827, |
|
"eval_samples_per_second": 29.653, |
|
"eval_steps_per_second": 3.712, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.5990453460620526e-05, |
|
"loss": 3.1005, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 3.1513357162475586, |
|
"eval_runtime": 142.6836, |
|
"eval_samples_per_second": 29.674, |
|
"eval_steps_per_second": 3.715, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.548926014319809e-05, |
|
"loss": 3.0952, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 3.1407930850982666, |
|
"eval_runtime": 142.6804, |
|
"eval_samples_per_second": 29.675, |
|
"eval_steps_per_second": 3.715, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.498806682577566e-05, |
|
"loss": 3.0903, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 3.1297414302825928, |
|
"eval_runtime": 142.7084, |
|
"eval_samples_per_second": 29.669, |
|
"eval_steps_per_second": 3.714, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.448687350835322e-05, |
|
"loss": 3.0737, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 3.120724678039551, |
|
"eval_runtime": 142.7344, |
|
"eval_samples_per_second": 29.663, |
|
"eval_steps_per_second": 3.713, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.398568019093079e-05, |
|
"loss": 3.0681, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 3.1170568466186523, |
|
"eval_runtime": 142.7306, |
|
"eval_samples_per_second": 29.664, |
|
"eval_steps_per_second": 3.713, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.348448687350836e-05, |
|
"loss": 3.034, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 3.1076815128326416, |
|
"eval_runtime": 142.7258, |
|
"eval_samples_per_second": 29.665, |
|
"eval_steps_per_second": 3.713, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.298329355608592e-05, |
|
"loss": 3.0319, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 3.1014297008514404, |
|
"eval_runtime": 142.7172, |
|
"eval_samples_per_second": 29.667, |
|
"eval_steps_per_second": 3.714, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.2482100238663484e-05, |
|
"loss": 3.0075, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 3.0964250564575195, |
|
"eval_runtime": 142.7047, |
|
"eval_samples_per_second": 29.67, |
|
"eval_steps_per_second": 3.714, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.1980906921241056e-05, |
|
"loss": 3.0282, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 3.0913186073303223, |
|
"eval_runtime": 142.7245, |
|
"eval_samples_per_second": 29.666, |
|
"eval_steps_per_second": 3.713, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.1479713603818615e-05, |
|
"loss": 3.0055, |
|
"step": 2856 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 3.0834176540374756, |
|
"eval_runtime": 142.7795, |
|
"eval_samples_per_second": 29.654, |
|
"eval_steps_per_second": 3.712, |
|
"step": 2856 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.097852028639618e-05, |
|
"loss": 3.0101, |
|
"step": 3024 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 3.0793018341064453, |
|
"eval_runtime": 142.7762, |
|
"eval_samples_per_second": 29.655, |
|
"eval_steps_per_second": 3.712, |
|
"step": 3024 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.047732696897375e-05, |
|
"loss": 3.0017, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 3.075371742248535, |
|
"eval_runtime": 142.7297, |
|
"eval_samples_per_second": 29.664, |
|
"eval_steps_per_second": 3.713, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.997613365155131e-05, |
|
"loss": 2.977, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 3.0743861198425293, |
|
"eval_runtime": 142.7454, |
|
"eval_samples_per_second": 29.661, |
|
"eval_steps_per_second": 3.713, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.9474940334128877e-05, |
|
"loss": 2.8771, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 3.0757055282592773, |
|
"eval_runtime": 142.7589, |
|
"eval_samples_per_second": 29.658, |
|
"eval_steps_per_second": 3.713, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.897374701670645e-05, |
|
"loss": 2.8633, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 3.0775437355041504, |
|
"eval_runtime": 142.7329, |
|
"eval_samples_per_second": 29.664, |
|
"eval_steps_per_second": 3.713, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.8472553699284014e-05, |
|
"loss": 2.8591, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 3.0708892345428467, |
|
"eval_runtime": 142.7479, |
|
"eval_samples_per_second": 29.661, |
|
"eval_steps_per_second": 3.713, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.797136038186157e-05, |
|
"loss": 2.8625, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 3.0683975219726562, |
|
"eval_runtime": 142.757, |
|
"eval_samples_per_second": 29.659, |
|
"eval_steps_per_second": 3.713, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.7470167064439145e-05, |
|
"loss": 2.8605, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 3.066983222961426, |
|
"eval_runtime": 142.718, |
|
"eval_samples_per_second": 29.667, |
|
"eval_steps_per_second": 3.714, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.696897374701671e-05, |
|
"loss": 2.8466, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 3.0646440982818604, |
|
"eval_runtime": 142.755, |
|
"eval_samples_per_second": 29.659, |
|
"eval_steps_per_second": 3.713, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.6467780429594276e-05, |
|
"loss": 2.8398, |
|
"step": 4536 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 3.0618984699249268, |
|
"eval_runtime": 142.7437, |
|
"eval_samples_per_second": 29.662, |
|
"eval_steps_per_second": 3.713, |
|
"step": 4536 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.596658711217184e-05, |
|
"loss": 2.8502, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 3.059511661529541, |
|
"eval_runtime": 142.746, |
|
"eval_samples_per_second": 29.661, |
|
"eval_steps_per_second": 3.713, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.546539379474941e-05, |
|
"loss": 2.8523, |
|
"step": 4872 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 3.0564985275268555, |
|
"eval_runtime": 142.7041, |
|
"eval_samples_per_second": 29.67, |
|
"eval_steps_per_second": 3.714, |
|
"step": 4872 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.496420047732697e-05, |
|
"loss": 2.8545, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 3.0538723468780518, |
|
"eval_runtime": 142.7089, |
|
"eval_samples_per_second": 29.669, |
|
"eval_steps_per_second": 3.714, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.446300715990454e-05, |
|
"loss": 2.8431, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 3.053504705429077, |
|
"eval_runtime": 142.7279, |
|
"eval_samples_per_second": 29.665, |
|
"eval_steps_per_second": 3.713, |
|
"step": 5208 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.39618138424821e-05, |
|
"loss": 2.865, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 3.0489370822906494, |
|
"eval_runtime": 142.7484, |
|
"eval_samples_per_second": 29.661, |
|
"eval_steps_per_second": 3.713, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.346062052505967e-05, |
|
"loss": 2.8412, |
|
"step": 5544 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 3.048715114593506, |
|
"eval_runtime": 142.7028, |
|
"eval_samples_per_second": 29.67, |
|
"eval_steps_per_second": 3.714, |
|
"step": 5544 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.2959427207637234e-05, |
|
"loss": 2.8382, |
|
"step": 5712 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 3.0453927516937256, |
|
"eval_runtime": 142.6981, |
|
"eval_samples_per_second": 29.671, |
|
"eval_steps_per_second": 3.714, |
|
"step": 5712 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.24582338902148e-05, |
|
"loss": 2.8356, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 3.0460968017578125, |
|
"eval_runtime": 142.6846, |
|
"eval_samples_per_second": 29.674, |
|
"eval_steps_per_second": 3.714, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.1957040572792365e-05, |
|
"loss": 2.8265, |
|
"step": 6048 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 3.042219400405884, |
|
"eval_runtime": 142.7165, |
|
"eval_samples_per_second": 29.667, |
|
"eval_steps_per_second": 3.714, |
|
"step": 6048 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.145584725536993e-05, |
|
"loss": 2.8301, |
|
"step": 6216 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 3.0397562980651855, |
|
"eval_runtime": 142.7319, |
|
"eval_samples_per_second": 29.664, |
|
"eval_steps_per_second": 3.713, |
|
"step": 6216 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.0954653937947496e-05, |
|
"loss": 2.8286, |
|
"step": 6384 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 3.038638114929199, |
|
"eval_runtime": 142.7562, |
|
"eval_samples_per_second": 29.659, |
|
"eval_steps_per_second": 3.713, |
|
"step": 6384 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.045346062052506e-05, |
|
"loss": 2.8416, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 3.035384178161621, |
|
"eval_runtime": 142.669, |
|
"eval_samples_per_second": 29.677, |
|
"eval_steps_per_second": 3.715, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.9952267303102627e-05, |
|
"loss": 2.8179, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 3.040719509124756, |
|
"eval_runtime": 142.723, |
|
"eval_samples_per_second": 29.666, |
|
"eval_steps_per_second": 3.713, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.9451073985680195e-05, |
|
"loss": 2.6965, |
|
"step": 6888 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 3.0478203296661377, |
|
"eval_runtime": 142.7498, |
|
"eval_samples_per_second": 29.66, |
|
"eval_steps_per_second": 3.713, |
|
"step": 6888 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.8949880668257757e-05, |
|
"loss": 2.7437, |
|
"step": 7056 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 3.048868179321289, |
|
"eval_runtime": 142.7646, |
|
"eval_samples_per_second": 29.657, |
|
"eval_steps_per_second": 3.712, |
|
"step": 7056 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.8448687350835323e-05, |
|
"loss": 2.7252, |
|
"step": 7224 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 3.045444965362549, |
|
"eval_runtime": 142.7306, |
|
"eval_samples_per_second": 29.664, |
|
"eval_steps_per_second": 3.713, |
|
"step": 7224 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.794749403341289e-05, |
|
"loss": 2.718, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_loss": 3.04612398147583, |
|
"eval_runtime": 142.7393, |
|
"eval_samples_per_second": 29.662, |
|
"eval_steps_per_second": 3.713, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.7446300715990454e-05, |
|
"loss": 2.7246, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 3.0447964668273926, |
|
"eval_runtime": 142.6751, |
|
"eval_samples_per_second": 29.676, |
|
"eval_steps_per_second": 3.715, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.694510739856802e-05, |
|
"loss": 2.7365, |
|
"step": 7728 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 3.0433876514434814, |
|
"eval_runtime": 142.7111, |
|
"eval_samples_per_second": 29.668, |
|
"eval_steps_per_second": 3.714, |
|
"step": 7728 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.6443914081145588e-05, |
|
"loss": 2.7242, |
|
"step": 7896 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 3.044072151184082, |
|
"eval_runtime": 142.6648, |
|
"eval_samples_per_second": 29.678, |
|
"eval_steps_per_second": 3.715, |
|
"step": 7896 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.594272076372315e-05, |
|
"loss": 2.7343, |
|
"step": 8064 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 3.0415401458740234, |
|
"eval_runtime": 142.737, |
|
"eval_samples_per_second": 29.663, |
|
"eval_steps_per_second": 3.713, |
|
"step": 8064 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.5441527446300715e-05, |
|
"loss": 2.7312, |
|
"step": 8232 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 3.0406272411346436, |
|
"eval_runtime": 142.7448, |
|
"eval_samples_per_second": 29.661, |
|
"eval_steps_per_second": 3.713, |
|
"step": 8232 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.494033412887828e-05, |
|
"loss": 2.7359, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 3.0369086265563965, |
|
"eval_runtime": 142.71, |
|
"eval_samples_per_second": 29.669, |
|
"eval_steps_per_second": 3.714, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.443914081145585e-05, |
|
"loss": 2.714, |
|
"step": 8568 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 3.039921522140503, |
|
"eval_runtime": 142.7829, |
|
"eval_samples_per_second": 29.653, |
|
"eval_steps_per_second": 3.712, |
|
"step": 8568 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.3937947494033415e-05, |
|
"loss": 2.7311, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 3.0377085208892822, |
|
"eval_runtime": 142.7212, |
|
"eval_samples_per_second": 29.666, |
|
"eval_steps_per_second": 3.714, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.3436754176610977e-05, |
|
"loss": 2.7191, |
|
"step": 8904 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 3.036844253540039, |
|
"eval_runtime": 142.7078, |
|
"eval_samples_per_second": 29.669, |
|
"eval_steps_per_second": 3.714, |
|
"step": 8904 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.2935560859188546e-05, |
|
"loss": 2.7246, |
|
"step": 9072 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 3.0361039638519287, |
|
"eval_runtime": 142.7607, |
|
"eval_samples_per_second": 29.658, |
|
"eval_steps_per_second": 3.713, |
|
"step": 9072 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.243436754176611e-05, |
|
"loss": 2.721, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 3.0351145267486572, |
|
"eval_runtime": 142.7366, |
|
"eval_samples_per_second": 29.663, |
|
"eval_steps_per_second": 3.713, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.1933174224343677e-05, |
|
"loss": 2.71, |
|
"step": 9408 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 3.032681941986084, |
|
"eval_runtime": 142.729, |
|
"eval_samples_per_second": 29.665, |
|
"eval_steps_per_second": 3.713, |
|
"step": 9408 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.1431980906921242e-05, |
|
"loss": 2.7252, |
|
"step": 9576 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 3.033348798751831, |
|
"eval_runtime": 142.7408, |
|
"eval_samples_per_second": 29.662, |
|
"eval_steps_per_second": 3.713, |
|
"step": 9576 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.0930787589498808e-05, |
|
"loss": 2.7181, |
|
"step": 9744 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 3.0330820083618164, |
|
"eval_runtime": 142.7512, |
|
"eval_samples_per_second": 29.66, |
|
"eval_steps_per_second": 3.713, |
|
"step": 9744 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.0429594272076373e-05, |
|
"loss": 2.7141, |
|
"step": 9912 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 3.029510498046875, |
|
"eval_runtime": 142.7075, |
|
"eval_samples_per_second": 29.669, |
|
"eval_steps_per_second": 3.714, |
|
"step": 9912 |
|
} |
|
], |
|
"logging_steps": 168, |
|
"max_steps": 16760, |
|
"num_train_epochs": 5, |
|
"save_steps": 168, |
|
"total_flos": 3.221291229039821e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|