|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 41090, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9391579459722564e-05, |
|
"loss": 1.7982, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.878315891944512e-05, |
|
"loss": 1.2914, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.817473837916768e-05, |
|
"loss": 1.1456, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.756631783889024e-05, |
|
"loss": 1.0345, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.6957897298612804e-05, |
|
"loss": 0.9665, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.6349476758335365e-05, |
|
"loss": 0.9272, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.574105621805793e-05, |
|
"loss": 0.8571, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.513263567778048e-05, |
|
"loss": 0.8139, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.4524215137503043e-05, |
|
"loss": 0.6986, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.3915794597225605e-05, |
|
"loss": 0.6375, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.330737405694817e-05, |
|
"loss": 0.6407, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.269895351667073e-05, |
|
"loss": 0.6139, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.209053297639329e-05, |
|
"loss": 0.5973, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.1482112436115845e-05, |
|
"loss": 0.6094, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.0873691895838406e-05, |
|
"loss": 0.5824, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.026527135556097e-05, |
|
"loss": 0.5459, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.965685081528352e-05, |
|
"loss": 0.4764, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.9048430275006084e-05, |
|
"loss": 0.427, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.8440009734728646e-05, |
|
"loss": 0.4192, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.783158919445121e-05, |
|
"loss": 0.4278, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.722316865417377e-05, |
|
"loss": 0.4157, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.6614748113896324e-05, |
|
"loss": 0.4156, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.6006327573618886e-05, |
|
"loss": 0.4137, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.539790703334145e-05, |
|
"loss": 0.3731, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.4789486493064e-05, |
|
"loss": 0.3516, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.418106595278657e-05, |
|
"loss": 0.3098, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 3.357264541250913e-05, |
|
"loss": 0.3051, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3.296422487223169e-05, |
|
"loss": 0.3029, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.235580433195425e-05, |
|
"loss": 0.3107, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.174738379167681e-05, |
|
"loss": 0.2969, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 3.1138963251399365e-05, |
|
"loss": 0.2886, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 3.053054271112193e-05, |
|
"loss": 0.2887, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 2.9922122170844492e-05, |
|
"loss": 0.2803, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 2.9313701630567047e-05, |
|
"loss": 0.2256, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.8705281090289608e-05, |
|
"loss": 0.2209, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.8096860550012173e-05, |
|
"loss": 0.2197, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.7488440009734728e-05, |
|
"loss": 0.2305, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 2.688001946945729e-05, |
|
"loss": 0.2252, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.627159892917985e-05, |
|
"loss": 0.207, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 2.566317838890241e-05, |
|
"loss": 0.2187, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.505475784862497e-05, |
|
"loss": 0.207, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 2.444633730834753e-05, |
|
"loss": 0.1685, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 2.383791676807009e-05, |
|
"loss": 0.1772, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 2.3229496227792653e-05, |
|
"loss": 0.1728, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 2.262107568751521e-05, |
|
"loss": 0.1678, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 2.201265514723777e-05, |
|
"loss": 0.1642, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 2.1404234606960334e-05, |
|
"loss": 0.1664, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 2.0795814066682892e-05, |
|
"loss": 0.1575, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 2.018739352640545e-05, |
|
"loss": 0.1599, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.9578972986128012e-05, |
|
"loss": 0.1249, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 1.8970552445850574e-05, |
|
"loss": 0.1266, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1.8362131905573132e-05, |
|
"loss": 0.1304, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 1.7753711365295694e-05, |
|
"loss": 0.1161, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 1.7145290825018255e-05, |
|
"loss": 0.1253, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 1.6536870284740814e-05, |
|
"loss": 0.1215, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 1.5928449744463375e-05, |
|
"loss": 0.1158, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.5320029204185933e-05, |
|
"loss": 0.1219, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.4711608663908493e-05, |
|
"loss": 0.1039, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 1.4103188123631053e-05, |
|
"loss": 0.081, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1.3494767583353615e-05, |
|
"loss": 0.0922, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 1.2886347043076175e-05, |
|
"loss": 0.0844, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 1.2277926502798735e-05, |
|
"loss": 0.0759, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 1.1669505962521295e-05, |
|
"loss": 0.0882, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 1.1061085422243855e-05, |
|
"loss": 0.0856, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 1.0452664881966416e-05, |
|
"loss": 0.0881, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 9.844244341688976e-06, |
|
"loss": 0.0708, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 9.235823801411536e-06, |
|
"loss": 0.0538, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 8.627403261134098e-06, |
|
"loss": 0.0554, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 8.018982720856656e-06, |
|
"loss": 0.06, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 7.410562180579217e-06, |
|
"loss": 0.0514, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 6.802141640301777e-06, |
|
"loss": 0.0536, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 6.193721100024337e-06, |
|
"loss": 0.0583, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 5.585300559746897e-06, |
|
"loss": 0.052, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.976880019469458e-06, |
|
"loss": 0.0481, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 4.368459479192018e-06, |
|
"loss": 0.0378, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 3.760038938914578e-06, |
|
"loss": 0.0296, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 3.151618398637138e-06, |
|
"loss": 0.037, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 2.543197858359698e-06, |
|
"loss": 0.0339, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 1.9347773180822585e-06, |
|
"loss": 0.0369, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 1.3263567778048189e-06, |
|
"loss": 0.0344, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 7.17936237527379e-07, |
|
"loss": 0.0301, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 1.0951569724993917e-07, |
|
"loss": 0.0325, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 41090, |
|
"total_flos": 1.1825796643443088e+17, |
|
"train_loss": 0.30929526851019973, |
|
"train_runtime": 55900.09, |
|
"train_samples_per_second": 11.761, |
|
"train_steps_per_second": 0.735 |
|
} |
|
], |
|
"max_steps": 41090, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.1825796643443088e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|