|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 2167, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00046146746654360867, |
|
"grad_norm": 9.51411938145824, |
|
"learning_rate": 9.216589861751152e-08, |
|
"loss": 1.1509, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023073373327180432, |
|
"grad_norm": 9.594129890510615, |
|
"learning_rate": 4.608294930875577e-07, |
|
"loss": 1.1369, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0046146746654360865, |
|
"grad_norm": 5.144220521250804, |
|
"learning_rate": 9.216589861751154e-07, |
|
"loss": 1.0949, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00692201199815413, |
|
"grad_norm": 3.40382562328973, |
|
"learning_rate": 1.382488479262673e-06, |
|
"loss": 1.0191, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.009229349330872173, |
|
"grad_norm": 3.215915212337555, |
|
"learning_rate": 1.8433179723502307e-06, |
|
"loss": 1.0296, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.011536686663590217, |
|
"grad_norm": 2.650742433924389, |
|
"learning_rate": 2.3041474654377884e-06, |
|
"loss": 0.976, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01384402399630826, |
|
"grad_norm": 2.7902393320247687, |
|
"learning_rate": 2.764976958525346e-06, |
|
"loss": 1.0089, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.016151361329026302, |
|
"grad_norm": 2.4654209985126148, |
|
"learning_rate": 3.225806451612903e-06, |
|
"loss": 0.9951, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.018458698661744346, |
|
"grad_norm": 2.527650306992279, |
|
"learning_rate": 3.6866359447004615e-06, |
|
"loss": 0.9988, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02076603599446239, |
|
"grad_norm": 2.655474988385568, |
|
"learning_rate": 4.147465437788019e-06, |
|
"loss": 1.004, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.023073373327180433, |
|
"grad_norm": 2.537803872777302, |
|
"learning_rate": 4.608294930875577e-06, |
|
"loss": 0.9863, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.025380710659898477, |
|
"grad_norm": 2.6464740488754366, |
|
"learning_rate": 5.0691244239631346e-06, |
|
"loss": 0.9694, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.02768804799261652, |
|
"grad_norm": 2.7507142522196566, |
|
"learning_rate": 5.529953917050692e-06, |
|
"loss": 0.9688, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.029995385325334564, |
|
"grad_norm": 2.836749382191462, |
|
"learning_rate": 5.9907834101382485e-06, |
|
"loss": 0.9968, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.032302722658052604, |
|
"grad_norm": 3.048439725741993, |
|
"learning_rate": 6.451612903225806e-06, |
|
"loss": 1.0023, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03461005999077065, |
|
"grad_norm": 3.072103382083384, |
|
"learning_rate": 6.912442396313365e-06, |
|
"loss": 0.9909, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03691739732348869, |
|
"grad_norm": 2.4206649546182386, |
|
"learning_rate": 7.373271889400923e-06, |
|
"loss": 1.0277, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03922473465620674, |
|
"grad_norm": 2.6546178755277254, |
|
"learning_rate": 7.83410138248848e-06, |
|
"loss": 1.0123, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04153207198892478, |
|
"grad_norm": 2.3888921486796058, |
|
"learning_rate": 8.294930875576038e-06, |
|
"loss": 0.9688, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.043839409321642826, |
|
"grad_norm": 2.32499977135365, |
|
"learning_rate": 8.755760368663595e-06, |
|
"loss": 0.9903, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.046146746654360866, |
|
"grad_norm": 2.392443248966377, |
|
"learning_rate": 9.216589861751153e-06, |
|
"loss": 0.9893, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.048454083987078914, |
|
"grad_norm": 2.680320857358668, |
|
"learning_rate": 9.67741935483871e-06, |
|
"loss": 0.9846, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.050761421319796954, |
|
"grad_norm": 2.532469504703905, |
|
"learning_rate": 1.0138248847926269e-05, |
|
"loss": 1.0089, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.053068758652515, |
|
"grad_norm": 3.1046898569172945, |
|
"learning_rate": 1.0599078341013826e-05, |
|
"loss": 1.0266, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.05537609598523304, |
|
"grad_norm": 2.5574603903328743, |
|
"learning_rate": 1.1059907834101385e-05, |
|
"loss": 1.002, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05768343331795108, |
|
"grad_norm": 2.452071743693235, |
|
"learning_rate": 1.152073732718894e-05, |
|
"loss": 1.03, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05999077065066913, |
|
"grad_norm": 2.418236004711402, |
|
"learning_rate": 1.1981566820276497e-05, |
|
"loss": 1.006, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06229810798338717, |
|
"grad_norm": 2.378674843033103, |
|
"learning_rate": 1.2442396313364056e-05, |
|
"loss": 0.9717, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06460544531610521, |
|
"grad_norm": 2.288433336347559, |
|
"learning_rate": 1.2903225806451613e-05, |
|
"loss": 1.0247, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06691278264882326, |
|
"grad_norm": 2.7611308296401282, |
|
"learning_rate": 1.3364055299539171e-05, |
|
"loss": 1.003, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0692201199815413, |
|
"grad_norm": 2.421569626628109, |
|
"learning_rate": 1.382488479262673e-05, |
|
"loss": 1.0197, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07152745731425934, |
|
"grad_norm": 2.3978303307399247, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.9898, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.07383479464697738, |
|
"grad_norm": 2.980112762291027, |
|
"learning_rate": 1.4746543778801846e-05, |
|
"loss": 1.0275, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07614213197969544, |
|
"grad_norm": 2.6176775912790404, |
|
"learning_rate": 1.5207373271889403e-05, |
|
"loss": 1.0382, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.07844946931241348, |
|
"grad_norm": 2.44164739077761, |
|
"learning_rate": 1.566820276497696e-05, |
|
"loss": 1.0236, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08075680664513152, |
|
"grad_norm": 2.404929271261824, |
|
"learning_rate": 1.6129032258064517e-05, |
|
"loss": 1.0304, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08306414397784956, |
|
"grad_norm": 2.3566906067196105, |
|
"learning_rate": 1.6589861751152075e-05, |
|
"loss": 1.0355, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0853714813105676, |
|
"grad_norm": 2.6391147388298246, |
|
"learning_rate": 1.705069124423963e-05, |
|
"loss": 1.0417, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.08767881864328565, |
|
"grad_norm": 2.684091771591401, |
|
"learning_rate": 1.751152073732719e-05, |
|
"loss": 1.0434, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08998615597600369, |
|
"grad_norm": 2.482023036660176, |
|
"learning_rate": 1.7972350230414748e-05, |
|
"loss": 1.0638, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.09229349330872173, |
|
"grad_norm": 2.4167958259613944, |
|
"learning_rate": 1.8433179723502307e-05, |
|
"loss": 1.0422, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09460083064143977, |
|
"grad_norm": 2.5336845010658586, |
|
"learning_rate": 1.8894009216589862e-05, |
|
"loss": 1.0711, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.09690816797415783, |
|
"grad_norm": 2.51621301326488, |
|
"learning_rate": 1.935483870967742e-05, |
|
"loss": 1.0881, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09921550530687587, |
|
"grad_norm": 2.4344384988965175, |
|
"learning_rate": 1.981566820276498e-05, |
|
"loss": 1.0735, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.10152284263959391, |
|
"grad_norm": 2.5052346642024728, |
|
"learning_rate": 1.9999883200175286e-05, |
|
"loss": 1.0593, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.10383017997231195, |
|
"grad_norm": 2.3275673568454986, |
|
"learning_rate": 1.9999169433349454e-05, |
|
"loss": 1.0766, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.10613751730503, |
|
"grad_norm": 2.444969349213072, |
|
"learning_rate": 1.9997806834748455e-05, |
|
"loss": 1.0805, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.10844485463774804, |
|
"grad_norm": 2.5019688689621455, |
|
"learning_rate": 1.9995795492789368e-05, |
|
"loss": 1.0795, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.11075219197046608, |
|
"grad_norm": 2.41754553967893, |
|
"learning_rate": 1.9993135537985285e-05, |
|
"loss": 1.0419, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11305952930318412, |
|
"grad_norm": 2.4632725320939297, |
|
"learning_rate": 1.9989827142936864e-05, |
|
"loss": 1.1022, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.11536686663590216, |
|
"grad_norm": 2.1283515103999004, |
|
"learning_rate": 1.9985870522321118e-05, |
|
"loss": 1.0727, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11767420396862022, |
|
"grad_norm": 2.5373992715116316, |
|
"learning_rate": 1.9981265932877486e-05, |
|
"loss": 1.0595, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.11998154130133826, |
|
"grad_norm": 2.382000156883209, |
|
"learning_rate": 1.9976013673391185e-05, |
|
"loss": 1.0585, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1222888786340563, |
|
"grad_norm": 2.4022206882693857, |
|
"learning_rate": 1.9970114084673796e-05, |
|
"loss": 1.089, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.12459621596677434, |
|
"grad_norm": 2.4577010071803707, |
|
"learning_rate": 1.996356754954119e-05, |
|
"loss": 1.0971, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12690355329949238, |
|
"grad_norm": 2.973750019978815, |
|
"learning_rate": 1.995637449278864e-05, |
|
"loss": 1.083, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12921089063221042, |
|
"grad_norm": 2.771443155668778, |
|
"learning_rate": 1.994853538116329e-05, |
|
"loss": 1.0948, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.13151822796492849, |
|
"grad_norm": 2.2945686154224902, |
|
"learning_rate": 1.9940050723333867e-05, |
|
"loss": 1.0684, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.13382556529764653, |
|
"grad_norm": 2.2402037197255864, |
|
"learning_rate": 1.9930921069857653e-05, |
|
"loss": 1.0605, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.13613290263036457, |
|
"grad_norm": 3.0321973969800955, |
|
"learning_rate": 1.9921147013144782e-05, |
|
"loss": 1.0629, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.1384402399630826, |
|
"grad_norm": 2.4295781427476215, |
|
"learning_rate": 1.991072918741978e-05, |
|
"loss": 1.0353, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14074757729580065, |
|
"grad_norm": 2.5121504192318698, |
|
"learning_rate": 1.9899668268680438e-05, |
|
"loss": 1.1156, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.1430549146285187, |
|
"grad_norm": 2.454863605026453, |
|
"learning_rate": 1.988796497465392e-05, |
|
"loss": 1.0921, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.14536225196123673, |
|
"grad_norm": 2.2532325296884532, |
|
"learning_rate": 1.98756200647502e-05, |
|
"loss": 1.0683, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.14766958929395477, |
|
"grad_norm": 2.3557373973476334, |
|
"learning_rate": 1.9862634340012796e-05, |
|
"loss": 1.0559, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1499769266266728, |
|
"grad_norm": 2.493215513816939, |
|
"learning_rate": 1.9849008643066774e-05, |
|
"loss": 1.0725, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.15228426395939088, |
|
"grad_norm": 3.2539520651857594, |
|
"learning_rate": 1.983474385806408e-05, |
|
"loss": 1.0674, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.15459160129210892, |
|
"grad_norm": 2.2486130234020165, |
|
"learning_rate": 1.9819840910626174e-05, |
|
"loss": 1.0705, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.15689893862482696, |
|
"grad_norm": 2.237162636902425, |
|
"learning_rate": 1.9804300767783958e-05, |
|
"loss": 1.0772, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.159206275957545, |
|
"grad_norm": 6.51182752032121, |
|
"learning_rate": 1.9788124437915034e-05, |
|
"loss": 1.0837, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.16151361329026304, |
|
"grad_norm": 2.322519163352806, |
|
"learning_rate": 1.9771312970678258e-05, |
|
"loss": 1.0405, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.16382095062298108, |
|
"grad_norm": 2.2704370082078773, |
|
"learning_rate": 1.9753867456945653e-05, |
|
"loss": 1.0632, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.16612828795569912, |
|
"grad_norm": 2.1581678351113602, |
|
"learning_rate": 1.9735789028731603e-05, |
|
"loss": 1.0818, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.16843562528841716, |
|
"grad_norm": 2.0342475926569583, |
|
"learning_rate": 1.971707885911941e-05, |
|
"loss": 1.0679, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.1707429626211352, |
|
"grad_norm": 2.2080262528668957, |
|
"learning_rate": 1.9697738162185163e-05, |
|
"loss": 1.0813, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.17305029995385326, |
|
"grad_norm": 2.627310254608405, |
|
"learning_rate": 1.9677768192918973e-05, |
|
"loss": 1.0682, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.1753576372865713, |
|
"grad_norm": 31.427071093107436, |
|
"learning_rate": 1.9657170247143526e-05, |
|
"loss": 1.06, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.17766497461928935, |
|
"grad_norm": 2.4322378895965016, |
|
"learning_rate": 1.9635945661430006e-05, |
|
"loss": 1.0648, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.17997231195200739, |
|
"grad_norm": 2.446435285466414, |
|
"learning_rate": 1.9614095813011366e-05, |
|
"loss": 1.0795, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.18227964928472543, |
|
"grad_norm": 2.6478307008342745, |
|
"learning_rate": 1.9591622119692953e-05, |
|
"loss": 1.0655, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.18458698661744347, |
|
"grad_norm": 3.5462973637007433, |
|
"learning_rate": 1.956852603976052e-05, |
|
"loss": 1.074, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1868943239501615, |
|
"grad_norm": 2.1952483345123834, |
|
"learning_rate": 1.9544809071885603e-05, |
|
"loss": 1.066, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.18920166128287955, |
|
"grad_norm": 2.2118543537817397, |
|
"learning_rate": 1.9520472755028256e-05, |
|
"loss": 1.0567, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1915089986155976, |
|
"grad_norm": 2.1220225112366085, |
|
"learning_rate": 1.9495518668337204e-05, |
|
"loss": 1.0485, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.19381633594831565, |
|
"grad_norm": 2.0821093984759433, |
|
"learning_rate": 1.946994843104737e-05, |
|
"loss": 1.0374, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1961236732810337, |
|
"grad_norm": 2.0190113966629433, |
|
"learning_rate": 1.944376370237481e-05, |
|
"loss": 1.064, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.19843101061375173, |
|
"grad_norm": 2.056684419876963, |
|
"learning_rate": 1.9416966181409047e-05, |
|
"loss": 1.0524, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.20073834794646978, |
|
"grad_norm": 2.366243541981115, |
|
"learning_rate": 1.9389557607002808e-05, |
|
"loss": 1.0587, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.20304568527918782, |
|
"grad_norm": 2.3366923572205356, |
|
"learning_rate": 1.9361539757659212e-05, |
|
"loss": 1.104, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.20535302261190586, |
|
"grad_norm": 2.135135120153321, |
|
"learning_rate": 1.933291445141635e-05, |
|
"loss": 1.0838, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.2076603599446239, |
|
"grad_norm": 2.1686749928093367, |
|
"learning_rate": 1.930368354572932e-05, |
|
"loss": 1.0889, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.20996769727734194, |
|
"grad_norm": 2.18295591084296, |
|
"learning_rate": 1.9273848937349712e-05, |
|
"loss": 1.0517, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.21227503461006, |
|
"grad_norm": 2.015301818923552, |
|
"learning_rate": 1.92434125622025e-05, |
|
"loss": 1.0766, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.21458237194277804, |
|
"grad_norm": 2.0741949328544984, |
|
"learning_rate": 1.9212376395260447e-05, |
|
"loss": 1.0896, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.21688970927549608, |
|
"grad_norm": 2.15451982178122, |
|
"learning_rate": 1.9180742450415962e-05, |
|
"loss": 1.0763, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.21919704660821412, |
|
"grad_norm": 2.0906260465476967, |
|
"learning_rate": 1.9148512780350384e-05, |
|
"loss": 1.086, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.22150438394093216, |
|
"grad_norm": 2.031559261836197, |
|
"learning_rate": 1.9115689476400817e-05, |
|
"loss": 1.059, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2238117212736502, |
|
"grad_norm": 2.07650174311531, |
|
"learning_rate": 1.9082274668424423e-05, |
|
"loss": 1.0679, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.22611905860636825, |
|
"grad_norm": 2.126208239890011, |
|
"learning_rate": 1.9048270524660197e-05, |
|
"loss": 1.0809, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.22842639593908629, |
|
"grad_norm": 1.9929349716624978, |
|
"learning_rate": 1.9013679251588304e-05, |
|
"loss": 1.085, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.23073373327180433, |
|
"grad_norm": 3.001431077273745, |
|
"learning_rate": 1.8978503093786882e-05, |
|
"loss": 1.0558, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2330410706045224, |
|
"grad_norm": 1.9284000626000521, |
|
"learning_rate": 1.89427443337864e-05, |
|
"loss": 1.0685, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.23534840793724043, |
|
"grad_norm": 2.156814659249471, |
|
"learning_rate": 1.890640529192155e-05, |
|
"loss": 1.0857, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.23765574526995847, |
|
"grad_norm": 2.2063349330204174, |
|
"learning_rate": 1.8869488326180682e-05, |
|
"loss": 1.092, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.23996308260267651, |
|
"grad_norm": 1.8963715836357997, |
|
"learning_rate": 1.8831995832052802e-05, |
|
"loss": 1.0694, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.24227041993539455, |
|
"grad_norm": 2.0285632136378613, |
|
"learning_rate": 1.8793930242372117e-05, |
|
"loss": 1.0795, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2445777572681126, |
|
"grad_norm": 2.099474069037447, |
|
"learning_rate": 1.8755294027160203e-05, |
|
"loss": 1.0893, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.24688509460083063, |
|
"grad_norm": 2.0358502445768165, |
|
"learning_rate": 1.8716089693465696e-05, |
|
"loss": 1.086, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.24919243193354867, |
|
"grad_norm": 2.1218454361521633, |
|
"learning_rate": 1.8676319785201617e-05, |
|
"loss": 1.0842, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.2514997692662667, |
|
"grad_norm": 2.0341225955626583, |
|
"learning_rate": 1.8635986882980325e-05, |
|
"loss": 1.0625, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.25380710659898476, |
|
"grad_norm": 2.3910625184538747, |
|
"learning_rate": 1.8595093603946053e-05, |
|
"loss": 1.0727, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2561144439317028, |
|
"grad_norm": 1.98644765469211, |
|
"learning_rate": 1.855364260160507e-05, |
|
"loss": 1.0595, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.25842178126442084, |
|
"grad_norm": 2.208738179396901, |
|
"learning_rate": 1.851163656565351e-05, |
|
"loss": 1.0936, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2607291185971389, |
|
"grad_norm": 2.0209364206754645, |
|
"learning_rate": 1.846907822180286e-05, |
|
"loss": 1.0684, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.26303645592985697, |
|
"grad_norm": 1.925369099665116, |
|
"learning_rate": 1.842597033160306e-05, |
|
"loss": 1.0669, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.265343793262575, |
|
"grad_norm": 2.169504176441067, |
|
"learning_rate": 1.8382315692263324e-05, |
|
"loss": 1.0914, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.26765113059529305, |
|
"grad_norm": 2.0095996014073503, |
|
"learning_rate": 1.8338117136470648e-05, |
|
"loss": 1.0679, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2699584679280111, |
|
"grad_norm": 2.0780448467433468, |
|
"learning_rate": 1.829337753220597e-05, |
|
"loss": 1.0823, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.27226580526072913, |
|
"grad_norm": 1.9092130149771946, |
|
"learning_rate": 1.8248099782558103e-05, |
|
"loss": 1.0485, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2745731425934472, |
|
"grad_norm": 2.2904699258286914, |
|
"learning_rate": 1.820228682553533e-05, |
|
"loss": 1.0676, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.2768804799261652, |
|
"grad_norm": 2.1592942059891884, |
|
"learning_rate": 1.8155941633874787e-05, |
|
"loss": 1.0862, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.27918781725888325, |
|
"grad_norm": 1.9056960337173154, |
|
"learning_rate": 1.810906721484954e-05, |
|
"loss": 1.027, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.2814951545916013, |
|
"grad_norm": 2.029717811241469, |
|
"learning_rate": 1.8061666610073465e-05, |
|
"loss": 1.0638, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.28380249192431933, |
|
"grad_norm": 2.0411421295106873, |
|
"learning_rate": 1.8013742895303883e-05, |
|
"loss": 1.0667, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.2861098292570374, |
|
"grad_norm": 1.924799713813513, |
|
"learning_rate": 1.7965299180241963e-05, |
|
"loss": 1.0685, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2884171665897554, |
|
"grad_norm": 2.171875799314523, |
|
"learning_rate": 1.791633860833096e-05, |
|
"loss": 1.0463, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.29072450392247345, |
|
"grad_norm": 2.068372996208825, |
|
"learning_rate": 1.7866864356552215e-05, |
|
"loss": 1.0715, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2930318412551915, |
|
"grad_norm": 1.8601211490681129, |
|
"learning_rate": 1.7816879635219028e-05, |
|
"loss": 1.0576, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.29533917858790953, |
|
"grad_norm": 1.9725316785259686, |
|
"learning_rate": 1.7766387687768338e-05, |
|
"loss": 1.0648, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2976465159206276, |
|
"grad_norm": 2.1844471268704515, |
|
"learning_rate": 1.7715391790550255e-05, |
|
"loss": 1.0637, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.2999538532533456, |
|
"grad_norm": 1.933021204525043, |
|
"learning_rate": 1.766389525261547e-05, |
|
"loss": 1.0803, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.30226119058606365, |
|
"grad_norm": 2.1351960039602695, |
|
"learning_rate": 1.7611901415500536e-05, |
|
"loss": 1.0979, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.30456852791878175, |
|
"grad_norm": 1.9611319269471612, |
|
"learning_rate": 1.7559413653011027e-05, |
|
"loss": 1.0652, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3068758652514998, |
|
"grad_norm": 2.033679368734863, |
|
"learning_rate": 1.7506435371002635e-05, |
|
"loss": 1.0749, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.30918320258421783, |
|
"grad_norm": 2.0279720872015354, |
|
"learning_rate": 1.745297000716016e-05, |
|
"loss": 1.078, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.31149053991693587, |
|
"grad_norm": 1.989733876253561, |
|
"learning_rate": 1.7399021030774443e-05, |
|
"loss": 1.0639, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.3137978772496539, |
|
"grad_norm": 1.9037569950190747, |
|
"learning_rate": 1.734459194251725e-05, |
|
"loss": 1.0721, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.31610521458237195, |
|
"grad_norm": 2.183774346551292, |
|
"learning_rate": 1.7289686274214116e-05, |
|
"loss": 1.0755, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.31841255191509, |
|
"grad_norm": 1.992668508208317, |
|
"learning_rate": 1.7234307588615177e-05, |
|
"loss": 1.0761, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.32071988924780803, |
|
"grad_norm": 1.9985850630928745, |
|
"learning_rate": 1.717845947916398e-05, |
|
"loss": 1.0575, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.3230272265805261, |
|
"grad_norm": 2.1480400724448883, |
|
"learning_rate": 1.712214556976431e-05, |
|
"loss": 1.0404, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3253345639132441, |
|
"grad_norm": 1.9503339704430334, |
|
"learning_rate": 1.7065369514545054e-05, |
|
"loss": 1.0579, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.32764190124596215, |
|
"grad_norm": 1.971699140050545, |
|
"learning_rate": 1.7008134997623066e-05, |
|
"loss": 1.0629, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3299492385786802, |
|
"grad_norm": 1.97358622805482, |
|
"learning_rate": 1.695044573286413e-05, |
|
"loss": 1.039, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.33225657591139823, |
|
"grad_norm": 1.8903289514072814, |
|
"learning_rate": 1.6892305463641967e-05, |
|
"loss": 1.0996, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3345639132441163, |
|
"grad_norm": 1.9171530806208752, |
|
"learning_rate": 1.6833717962595327e-05, |
|
"loss": 1.0587, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3368712505768343, |
|
"grad_norm": 2.0292768253738855, |
|
"learning_rate": 1.677468703138319e-05, |
|
"loss": 1.0534, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.33917858790955235, |
|
"grad_norm": 1.9857495035997068, |
|
"learning_rate": 1.6715216500438093e-05, |
|
"loss": 1.0805, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.3414859252422704, |
|
"grad_norm": 5.464414796561983, |
|
"learning_rate": 1.6655310228717565e-05, |
|
"loss": 1.0802, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3437932625749885, |
|
"grad_norm": 1.909425909480839, |
|
"learning_rate": 1.6594972103453727e-05, |
|
"loss": 1.0813, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.34610059990770653, |
|
"grad_norm": 1.9164783421961078, |
|
"learning_rate": 1.6534206039901057e-05, |
|
"loss": 1.0466, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.34840793724042457, |
|
"grad_norm": 4.300395520109931, |
|
"learning_rate": 1.647301598108234e-05, |
|
"loss": 1.0326, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.3507152745731426, |
|
"grad_norm": 2.2226539635666827, |
|
"learning_rate": 1.64114058975328e-05, |
|
"loss": 1.0824, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.35302261190586065, |
|
"grad_norm": 2.08738140836867, |
|
"learning_rate": 1.6349379787042478e-05, |
|
"loss": 1.0445, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.3553299492385787, |
|
"grad_norm": 1.7956395961308758, |
|
"learning_rate": 1.6286941674396788e-05, |
|
"loss": 1.0283, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.35763728657129673, |
|
"grad_norm": 1.9001406773036147, |
|
"learning_rate": 1.6224095611115385e-05, |
|
"loss": 1.0558, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.35994462390401477, |
|
"grad_norm": 1.8584506601925908, |
|
"learning_rate": 1.6160845675189254e-05, |
|
"loss": 1.0315, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.3622519612367328, |
|
"grad_norm": 1.8994200106765273, |
|
"learning_rate": 1.6097195970816094e-05, |
|
"loss": 1.0736, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.36455929856945085, |
|
"grad_norm": 2.396252821544053, |
|
"learning_rate": 1.603315062813401e-05, |
|
"loss": 1.0605, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.3668666359021689, |
|
"grad_norm": 1.798952489279231, |
|
"learning_rate": 1.596871380295351e-05, |
|
"loss": 1.0439, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.36917397323488693, |
|
"grad_norm": 1.8907451459454219, |
|
"learning_rate": 1.5903889676487832e-05, |
|
"loss": 1.047, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.37148131056760497, |
|
"grad_norm": 1.9272449578154556, |
|
"learning_rate": 1.5838682455081657e-05, |
|
"loss": 1.0557, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.373788647900323, |
|
"grad_norm": 1.8500068465129675, |
|
"learning_rate": 1.5773096369938125e-05, |
|
"loss": 1.0448, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.37609598523304105, |
|
"grad_norm": 2.024989082401722, |
|
"learning_rate": 1.570713567684432e-05, |
|
"loss": 1.0444, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.3784033225657591, |
|
"grad_norm": 1.864545609223796, |
|
"learning_rate": 1.5640804655895086e-05, |
|
"loss": 1.0316, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.38071065989847713, |
|
"grad_norm": 1.9810163039010853, |
|
"learning_rate": 1.557410761121532e-05, |
|
"loss": 1.0476, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.3830179972311952, |
|
"grad_norm": 1.9715732068507474, |
|
"learning_rate": 1.5507048870680668e-05, |
|
"loss": 1.0092, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.38532533456391327, |
|
"grad_norm": 1.9346233566232378, |
|
"learning_rate": 1.5439632785636707e-05, |
|
"loss": 1.0834, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.3876326718966313, |
|
"grad_norm": 2.2247085368619164, |
|
"learning_rate": 1.5371863730616586e-05, |
|
"loss": 1.0608, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.38994000922934935, |
|
"grad_norm": 1.855445999462738, |
|
"learning_rate": 1.5303746103057163e-05, |
|
"loss": 1.0311, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.3922473465620674, |
|
"grad_norm": 1.8433500481185805, |
|
"learning_rate": 1.5235284323013674e-05, |
|
"loss": 1.0513, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.39455468389478543, |
|
"grad_norm": 1.9238020550812749, |
|
"learning_rate": 1.5166482832872923e-05, |
|
"loss": 1.0611, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.39686202122750347, |
|
"grad_norm": 2.034539491931288, |
|
"learning_rate": 1.5097346097065008e-05, |
|
"loss": 1.0369, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3991693585602215, |
|
"grad_norm": 1.8719773240320596, |
|
"learning_rate": 1.5027878601773633e-05, |
|
"loss": 1.031, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.40147669589293955, |
|
"grad_norm": 1.8218549093076317, |
|
"learning_rate": 1.4958084854645018e-05, |
|
"loss": 1.027, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4037840332256576, |
|
"grad_norm": 1.908377236915306, |
|
"learning_rate": 1.4887969384495403e-05, |
|
"loss": 1.0505, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.40609137055837563, |
|
"grad_norm": 1.882874108335332, |
|
"learning_rate": 1.4817536741017153e-05, |
|
"loss": 1.0421, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.40839870789109367, |
|
"grad_norm": 2.0217065440618622, |
|
"learning_rate": 1.4746791494483584e-05, |
|
"loss": 1.0533, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.4107060452238117, |
|
"grad_norm": 1.8717323475177303, |
|
"learning_rate": 1.4675738235452352e-05, |
|
"loss": 1.0279, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.41301338255652975, |
|
"grad_norm": 1.9788825364185045, |
|
"learning_rate": 1.4604381574467616e-05, |
|
"loss": 1.042, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.4153207198892478, |
|
"grad_norm": 1.9327030504589935, |
|
"learning_rate": 1.4532726141760849e-05, |
|
"loss": 1.06, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.41762805722196583, |
|
"grad_norm": 1.8050202525178007, |
|
"learning_rate": 1.4460776586950393e-05, |
|
"loss": 1.0176, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.41993539455468387, |
|
"grad_norm": 1.7140772518888605, |
|
"learning_rate": 1.438853757873975e-05, |
|
"loss": 1.0336, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.4222427318874019, |
|
"grad_norm": 1.9381284110778458, |
|
"learning_rate": 1.4316013804614644e-05, |
|
"loss": 1.0283, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.42455006922012, |
|
"grad_norm": 1.8569863683755345, |
|
"learning_rate": 1.4243209970538846e-05, |
|
"loss": 1.0295, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.42685740655283805, |
|
"grad_norm": 1.7584125894267681, |
|
"learning_rate": 1.4170130800648814e-05, |
|
"loss": 1.0451, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.4291647438855561, |
|
"grad_norm": 1.8468563006595364, |
|
"learning_rate": 1.4096781036947159e-05, |
|
"loss": 1.0329, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.43147208121827413, |
|
"grad_norm": 1.8005410726866136, |
|
"learning_rate": 1.4023165438994933e-05, |
|
"loss": 1.0523, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.43377941855099217, |
|
"grad_norm": 1.7881203203680747, |
|
"learning_rate": 1.394928878360279e-05, |
|
"loss": 1.052, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4360867558837102, |
|
"grad_norm": 1.9402582404112974, |
|
"learning_rate": 1.3875155864521031e-05, |
|
"loss": 1.0418, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.43839409321642825, |
|
"grad_norm": 1.926002050119894, |
|
"learning_rate": 1.3800771492128537e-05, |
|
"loss": 1.0491, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4407014305491463, |
|
"grad_norm": 1.8807563751664647, |
|
"learning_rate": 1.3726140493120639e-05, |
|
"loss": 1.032, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.44300876788186433, |
|
"grad_norm": 1.9189485256851713, |
|
"learning_rate": 1.3651267710195909e-05, |
|
"loss": 1.0355, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.44531610521458237, |
|
"grad_norm": 1.9803461150155048, |
|
"learning_rate": 1.3576158001741932e-05, |
|
"loss": 1.0569, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.4476234425473004, |
|
"grad_norm": 2.018305042251882, |
|
"learning_rate": 1.3500816241520059e-05, |
|
"loss": 1.04, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.44993077988001845, |
|
"grad_norm": 1.7580864552202506, |
|
"learning_rate": 1.3425247318349137e-05, |
|
"loss": 1.0075, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.4522381172127365, |
|
"grad_norm": 1.8967387580024155, |
|
"learning_rate": 1.3349456135788298e-05, |
|
"loss": 1.0429, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 2.2301409193225985, |
|
"learning_rate": 1.3273447611818768e-05, |
|
"loss": 1.0244, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.45685279187817257, |
|
"grad_norm": 1.8218900361937265, |
|
"learning_rate": 1.3197226678524739e-05, |
|
"loss": 1.0006, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.4591601292108906, |
|
"grad_norm": 1.8705871989801575, |
|
"learning_rate": 1.3120798281773346e-05, |
|
"loss": 1.0382, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.46146746654360865, |
|
"grad_norm": 1.8993921065361903, |
|
"learning_rate": 1.3044167380893726e-05, |
|
"loss": 1.0543, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46377480387632675, |
|
"grad_norm": 1.762141098208751, |
|
"learning_rate": 1.2967338948355217e-05, |
|
"loss": 1.031, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.4660821412090448, |
|
"grad_norm": 1.8349620217027005, |
|
"learning_rate": 1.2890317969444716e-05, |
|
"loss": 1.0104, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.4683894785417628, |
|
"grad_norm": 1.9527169882770812, |
|
"learning_rate": 1.2813109441943166e-05, |
|
"loss": 1.0325, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.47069681587448087, |
|
"grad_norm": 1.7594153130967782, |
|
"learning_rate": 1.273571837580127e-05, |
|
"loss": 1.0476, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.4730041532071989, |
|
"grad_norm": 1.7823802580407797, |
|
"learning_rate": 1.2658149792814405e-05, |
|
"loss": 1.0397, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.47531149053991695, |
|
"grad_norm": 1.7288773807653248, |
|
"learning_rate": 1.258040872629676e-05, |
|
"loss": 1.0419, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.477618827872635, |
|
"grad_norm": 1.8969879276116197, |
|
"learning_rate": 1.2502500220754736e-05, |
|
"loss": 1.0538, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.47992616520535303, |
|
"grad_norm": 1.9748280209096565, |
|
"learning_rate": 1.242442933155961e-05, |
|
"loss": 1.0088, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.48223350253807107, |
|
"grad_norm": 1.8630834023430007, |
|
"learning_rate": 1.2346201124619502e-05, |
|
"loss": 1.0041, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.4845408398707891, |
|
"grad_norm": 1.93282001404706, |
|
"learning_rate": 1.2267820676050657e-05, |
|
"loss": 1.0117, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.48684817720350715, |
|
"grad_norm": 1.7732915883551568, |
|
"learning_rate": 1.2189293071848051e-05, |
|
"loss": 1.0395, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.4891555145362252, |
|
"grad_norm": 1.7668917225682153, |
|
"learning_rate": 1.2110623407555398e-05, |
|
"loss": 1.0055, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.49146285186894323, |
|
"grad_norm": 1.770548333794891, |
|
"learning_rate": 1.2031816787934465e-05, |
|
"loss": 1.0198, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.49377018920166127, |
|
"grad_norm": 1.804504292487286, |
|
"learning_rate": 1.1952878326633872e-05, |
|
"loss": 0.9925, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.4960775265343793, |
|
"grad_norm": 1.8384240011850799, |
|
"learning_rate": 1.187381314585725e-05, |
|
"loss": 1.0167, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.49838486386709735, |
|
"grad_norm": 1.8738489698458378, |
|
"learning_rate": 1.1794626376030866e-05, |
|
"loss": 1.0266, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5006922011998154, |
|
"grad_norm": 1.7963716989600227, |
|
"learning_rate": 1.1715323155470745e-05, |
|
"loss": 1.0203, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.5029995385325334, |
|
"grad_norm": 1.7950924324700734, |
|
"learning_rate": 1.163590863004922e-05, |
|
"loss": 1.0014, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5053068758652515, |
|
"grad_norm": 1.7996992785566162, |
|
"learning_rate": 1.1556387952861036e-05, |
|
"loss": 1.0147, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.5076142131979695, |
|
"grad_norm": 1.9262189643769105, |
|
"learning_rate": 1.1476766283888986e-05, |
|
"loss": 1.0176, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5099215505306876, |
|
"grad_norm": 1.738673259571015, |
|
"learning_rate": 1.1397048789669061e-05, |
|
"loss": 1.0221, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.5122288878634056, |
|
"grad_norm": 1.7993896869653003, |
|
"learning_rate": 1.1317240642955226e-05, |
|
"loss": 1.0232, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5145362251961236, |
|
"grad_norm": 1.8492729168966688, |
|
"learning_rate": 1.1237347022383747e-05, |
|
"loss": 1.0138, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.5168435625288417, |
|
"grad_norm": 1.792127722956897, |
|
"learning_rate": 1.1157373112137171e-05, |
|
"loss": 1.011, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5191508998615597, |
|
"grad_norm": 1.767761412954839, |
|
"learning_rate": 1.107732410160793e-05, |
|
"loss": 0.9917, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.5214582371942778, |
|
"grad_norm": 1.78861016845621, |
|
"learning_rate": 1.0997205185061599e-05, |
|
"loss": 1.024, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5237655745269958, |
|
"grad_norm": 1.796111964851059, |
|
"learning_rate": 1.0917021561299864e-05, |
|
"loss": 1.0094, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.5260729118597139, |
|
"grad_norm": 1.6839699201837544, |
|
"learning_rate": 1.083677843332316e-05, |
|
"loss": 1.0019, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.528380249192432, |
|
"grad_norm": 1.735381208836221, |
|
"learning_rate": 1.0756481007993063e-05, |
|
"loss": 0.9979, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.53068758652515, |
|
"grad_norm": 1.7283238773850635, |
|
"learning_rate": 1.0676134495694439e-05, |
|
"loss": 1.0127, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.5329949238578681, |
|
"grad_norm": 1.8700927823490678, |
|
"learning_rate": 1.0595744109997326e-05, |
|
"loss": 0.9897, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.5353022611905861, |
|
"grad_norm": 1.7850898978429104, |
|
"learning_rate": 1.0515315067318652e-05, |
|
"loss": 1.0155, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5376095985233041, |
|
"grad_norm": 1.8867979217718087, |
|
"learning_rate": 1.0434852586583737e-05, |
|
"loss": 0.9996, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.5399169358560222, |
|
"grad_norm": 1.8409069763382047, |
|
"learning_rate": 1.0354361888887642e-05, |
|
"loss": 1.0038, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.5422242731887402, |
|
"grad_norm": 1.9318197730416369, |
|
"learning_rate": 1.0273848197156401e-05, |
|
"loss": 0.9893, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.5445316105214583, |
|
"grad_norm": 1.7534602196952722, |
|
"learning_rate": 1.0193316735808085e-05, |
|
"loss": 0.993, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.5468389478541763, |
|
"grad_norm": 1.7436059512387687, |
|
"learning_rate": 1.0112772730413816e-05, |
|
"loss": 1.0079, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.5491462851868943, |
|
"grad_norm": 1.8076118426423142, |
|
"learning_rate": 1.0032221407358683e-05, |
|
"loss": 1.0336, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5514536225196124, |
|
"grad_norm": 1.9044420451434694, |
|
"learning_rate": 9.951667993502599e-06, |
|
"loss": 1.0152, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.5537609598523304, |
|
"grad_norm": 1.842329102136153, |
|
"learning_rate": 9.871117715841151e-06, |
|
"loss": 0.9783, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5560682971850485, |
|
"grad_norm": 1.8583622986365993, |
|
"learning_rate": 9.790575801166432e-06, |
|
"loss": 1.0054, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.5583756345177665, |
|
"grad_norm": 1.7004919095912332, |
|
"learning_rate": 9.710047475727854e-06, |
|
"loss": 1.0011, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5606829718504845, |
|
"grad_norm": 1.7280460978374188, |
|
"learning_rate": 9.629537964893063e-06, |
|
"loss": 1.0299, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.5629903091832026, |
|
"grad_norm": 1.7174011953937558, |
|
"learning_rate": 9.549052492808834e-06, |
|
"loss": 0.9946, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5652976465159206, |
|
"grad_norm": 1.8215498597720168, |
|
"learning_rate": 9.468596282062114e-06, |
|
"loss": 1.0113, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5676049838486387, |
|
"grad_norm": 1.6911500192296895, |
|
"learning_rate": 9.38817455334112e-06, |
|
"loss": 0.9855, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5699123211813567, |
|
"grad_norm": 1.8405046704539174, |
|
"learning_rate": 9.307792525096582e-06, |
|
"loss": 1.0113, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.5722196585140747, |
|
"grad_norm": 1.838204327540361, |
|
"learning_rate": 9.227455413203115e-06, |
|
"loss": 0.9947, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5745269958467928, |
|
"grad_norm": 1.656688699609939, |
|
"learning_rate": 9.147168430620788e-06, |
|
"loss": 0.9892, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.5768343331795108, |
|
"grad_norm": 1.7231036061816765, |
|
"learning_rate": 9.066936787056843e-06, |
|
"loss": 0.9944, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5791416705122289, |
|
"grad_norm": 1.645605940940624, |
|
"learning_rate": 8.986765688627652e-06, |
|
"loss": 0.9936, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.5814490078449469, |
|
"grad_norm": 1.8141527360329759, |
|
"learning_rate": 8.906660337520903e-06, |
|
"loss": 1.0096, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.583756345177665, |
|
"grad_norm": 1.856808726362016, |
|
"learning_rate": 8.82662593165804e-06, |
|
"loss": 1.0032, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.586063682510383, |
|
"grad_norm": 1.8593818092553211, |
|
"learning_rate": 8.746667664356957e-06, |
|
"loss": 1.0177, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.588371019843101, |
|
"grad_norm": 1.7318701186944272, |
|
"learning_rate": 8.666790723995043e-06, |
|
"loss": 0.9933, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.5906783571758191, |
|
"grad_norm": 1.8632249625406112, |
|
"learning_rate": 8.587000293672482e-06, |
|
"loss": 1.0278, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5929856945085371, |
|
"grad_norm": 1.8482080793994375, |
|
"learning_rate": 8.50730155087596e-06, |
|
"loss": 0.9753, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.5952930318412551, |
|
"grad_norm": 1.6654816438940703, |
|
"learning_rate": 8.427699667142681e-06, |
|
"loss": 0.9923, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5976003691739732, |
|
"grad_norm": 1.816789112483473, |
|
"learning_rate": 8.348199807724806e-06, |
|
"loss": 0.9951, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.5999077065066912, |
|
"grad_norm": 1.8671938825009406, |
|
"learning_rate": 8.268807131254288e-06, |
|
"loss": 1.0063, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6022150438394093, |
|
"grad_norm": 1.736173419625791, |
|
"learning_rate": 8.189526789408123e-06, |
|
"loss": 0.9942, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.6045223811721273, |
|
"grad_norm": 1.7397594354717327, |
|
"learning_rate": 8.110363926574088e-06, |
|
"loss": 0.9899, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6068297185048455, |
|
"grad_norm": 1.7112354026341845, |
|
"learning_rate": 8.0313236795169e-06, |
|
"loss": 0.9981, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.6091370558375635, |
|
"grad_norm": 1.7633777819452738, |
|
"learning_rate": 7.952411177044923e-06, |
|
"loss": 0.9667, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6114443931702815, |
|
"grad_norm": 1.7477692209080626, |
|
"learning_rate": 7.873631539677364e-06, |
|
"loss": 0.9979, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.6137517305029996, |
|
"grad_norm": 1.7532055508610305, |
|
"learning_rate": 7.794989879311991e-06, |
|
"loss": 0.9869, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6160590678357176, |
|
"grad_norm": 1.8525858143415055, |
|
"learning_rate": 7.716491298893443e-06, |
|
"loss": 0.9834, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.6183664051684357, |
|
"grad_norm": 1.749585519245075, |
|
"learning_rate": 7.638140892082118e-06, |
|
"loss": 1.0092, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6206737425011537, |
|
"grad_norm": 1.8420135288729067, |
|
"learning_rate": 7.559943742923626e-06, |
|
"loss": 0.9797, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.6229810798338717, |
|
"grad_norm": 1.7361527256574634, |
|
"learning_rate": 7.4819049255189215e-06, |
|
"loss": 1.0084, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.6252884171665898, |
|
"grad_norm": 1.8341519418326866, |
|
"learning_rate": 7.404029503695028e-06, |
|
"loss": 0.978, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.6275957544993078, |
|
"grad_norm": 1.8293945335237427, |
|
"learning_rate": 7.326322530676471e-06, |
|
"loss": 0.9949, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.6299030918320259, |
|
"grad_norm": 1.8042095660293147, |
|
"learning_rate": 7.248789048757368e-06, |
|
"loss": 0.9708, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.6322104291647439, |
|
"grad_norm": 1.845467719423503, |
|
"learning_rate": 7.171434088974252e-06, |
|
"loss": 0.9965, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.6345177664974619, |
|
"grad_norm": 1.6543843384272663, |
|
"learning_rate": 7.094262670779611e-06, |
|
"loss": 0.9745, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.63682510383018, |
|
"grad_norm": 1.78812671106571, |
|
"learning_rate": 7.017279801716177e-06, |
|
"loss": 0.9913, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.639132441162898, |
|
"grad_norm": 1.6947334759904245, |
|
"learning_rate": 6.940490477092004e-06, |
|
"loss": 0.9852, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.6414397784956161, |
|
"grad_norm": 1.8535301270043634, |
|
"learning_rate": 6.8638996796563275e-06, |
|
"loss": 1.007, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6437471158283341, |
|
"grad_norm": 1.7676355127694694, |
|
"learning_rate": 6.78751237927623e-06, |
|
"loss": 0.9514, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.6460544531610521, |
|
"grad_norm": 1.6769380120076558, |
|
"learning_rate": 6.711333532614168e-06, |
|
"loss": 0.9698, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6483617904937702, |
|
"grad_norm": 1.7272039849376555, |
|
"learning_rate": 6.6353680828063306e-06, |
|
"loss": 0.948, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.6506691278264882, |
|
"grad_norm": 1.7909691104530978, |
|
"learning_rate": 6.559620959141897e-06, |
|
"loss": 0.9741, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6529764651592063, |
|
"grad_norm": 1.7584119603336634, |
|
"learning_rate": 6.48409707674317e-06, |
|
"loss": 0.9825, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.6552838024919243, |
|
"grad_norm": 1.704146715339984, |
|
"learning_rate": 6.408801336246645e-06, |
|
"loss": 0.9473, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6575911398246423, |
|
"grad_norm": 1.675928516675119, |
|
"learning_rate": 6.3337386234850255e-06, |
|
"loss": 0.9726, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.6598984771573604, |
|
"grad_norm": 1.8640939079623915, |
|
"learning_rate": 6.258913809170169e-06, |
|
"loss": 0.9899, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.6622058144900784, |
|
"grad_norm": 1.7077639495220778, |
|
"learning_rate": 6.18433174857705e-06, |
|
"loss": 0.9856, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.6645131518227965, |
|
"grad_norm": 1.7756594990657744, |
|
"learning_rate": 6.1099972812287e-06, |
|
"loss": 0.9766, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.6668204891555145, |
|
"grad_norm": 1.9246917842171538, |
|
"learning_rate": 6.035915230582176e-06, |
|
"loss": 0.9802, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.6691278264882325, |
|
"grad_norm": 1.7061051671690723, |
|
"learning_rate": 5.962090403715592e-06, |
|
"loss": 0.9589, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6714351638209506, |
|
"grad_norm": 1.9209301423646885, |
|
"learning_rate": 5.8885275910161574e-06, |
|
"loss": 0.9661, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.6737425011536686, |
|
"grad_norm": 1.7522165825796936, |
|
"learning_rate": 5.815231565869377e-06, |
|
"loss": 0.9683, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6760498384863867, |
|
"grad_norm": 1.7093168808099815, |
|
"learning_rate": 5.742207084349274e-06, |
|
"loss": 0.9787, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.6783571758191047, |
|
"grad_norm": 1.7331687059615726, |
|
"learning_rate": 5.669458884909815e-06, |
|
"loss": 0.962, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6806645131518227, |
|
"grad_norm": 1.7125984142423774, |
|
"learning_rate": 5.596991688077409e-06, |
|
"loss": 0.9749, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.6829718504845408, |
|
"grad_norm": 1.7116106483633, |
|
"learning_rate": 5.5248101961446065e-06, |
|
"loss": 0.9646, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6852791878172588, |
|
"grad_norm": 1.7871177917200074, |
|
"learning_rate": 5.452919092864976e-06, |
|
"loss": 0.9869, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.687586525149977, |
|
"grad_norm": 1.7422263712914812, |
|
"learning_rate": 5.381323043149192e-06, |
|
"loss": 0.9598, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.689893862482695, |
|
"grad_norm": 1.827833481936086, |
|
"learning_rate": 5.310026692762316e-06, |
|
"loss": 0.9674, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.6922011998154131, |
|
"grad_norm": 1.8013595644003924, |
|
"learning_rate": 5.239034668022353e-06, |
|
"loss": 0.9573, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6945085371481311, |
|
"grad_norm": 1.8007495209856474, |
|
"learning_rate": 5.168351575500049e-06, |
|
"loss": 0.9719, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.6968158744808491, |
|
"grad_norm": 1.7404494760342795, |
|
"learning_rate": 5.097982001719994e-06, |
|
"loss": 0.9724, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6991232118135672, |
|
"grad_norm": 1.8207400554800481, |
|
"learning_rate": 5.027930512862976e-06, |
|
"loss": 0.9643, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.7014305491462852, |
|
"grad_norm": 1.6503774911907483, |
|
"learning_rate": 4.958201654469731e-06, |
|
"loss": 0.9718, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.7037378864790033, |
|
"grad_norm": 1.7042499652030019, |
|
"learning_rate": 4.888799951145948e-06, |
|
"loss": 0.9786, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.7060452238117213, |
|
"grad_norm": 1.691429781718496, |
|
"learning_rate": 4.8197299062687e-06, |
|
"loss": 0.9584, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7083525611444393, |
|
"grad_norm": 1.789471718965235, |
|
"learning_rate": 4.750996001694215e-06, |
|
"loss": 0.978, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.7106598984771574, |
|
"grad_norm": 1.762516310353455, |
|
"learning_rate": 4.6826026974670665e-06, |
|
"loss": 0.9536, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.7129672358098754, |
|
"grad_norm": 1.6808685507807348, |
|
"learning_rate": 4.614554431530754e-06, |
|
"loss": 0.9453, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.7152745731425935, |
|
"grad_norm": 1.695661878440997, |
|
"learning_rate": 4.546855619439734e-06, |
|
"loss": 0.9674, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.7175819104753115, |
|
"grad_norm": 1.780885617378124, |
|
"learning_rate": 4.479510654072909e-06, |
|
"loss": 0.9724, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.7198892478080295, |
|
"grad_norm": 1.7441807694903777, |
|
"learning_rate": 4.412523905348568e-06, |
|
"loss": 0.9422, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.7221965851407476, |
|
"grad_norm": 1.7666139001524914, |
|
"learning_rate": 4.345899719940844e-06, |
|
"loss": 0.9496, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.7245039224734656, |
|
"grad_norm": 1.6463010573052135, |
|
"learning_rate": 4.279642420997655e-06, |
|
"loss": 0.9635, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.7268112598061837, |
|
"grad_norm": 1.7790169033851828, |
|
"learning_rate": 4.213756307860175e-06, |
|
"loss": 0.9795, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.7291185971389017, |
|
"grad_norm": 1.709252193673288, |
|
"learning_rate": 4.148245655783869e-06, |
|
"loss": 0.9542, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.7314259344716197, |
|
"grad_norm": 1.7064646780964507, |
|
"learning_rate": 4.083114715661069e-06, |
|
"loss": 0.9494, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.7337332718043378, |
|
"grad_norm": 1.7203471522785316, |
|
"learning_rate": 4.018367713745137e-06, |
|
"loss": 0.9513, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.7360406091370558, |
|
"grad_norm": 1.7329896835019194, |
|
"learning_rate": 3.954008851376252e-06, |
|
"loss": 0.9464, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.7383479464697739, |
|
"grad_norm": 1.6668720129339225, |
|
"learning_rate": 3.890042304708758e-06, |
|
"loss": 0.9349, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7406552838024919, |
|
"grad_norm": 1.6612958616670062, |
|
"learning_rate": 3.826472224440202e-06, |
|
"loss": 0.9753, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.7429626211352099, |
|
"grad_norm": 1.689937062434287, |
|
"learning_rate": 3.763302735541987e-06, |
|
"loss": 0.9755, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.745269958467928, |
|
"grad_norm": 1.8524303075498816, |
|
"learning_rate": 3.700537936991733e-06, |
|
"loss": 0.9919, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.747577295800646, |
|
"grad_norm": 1.7330330880413027, |
|
"learning_rate": 3.6381819015072652e-06, |
|
"loss": 0.9968, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7498846331333641, |
|
"grad_norm": 1.732375990079818, |
|
"learning_rate": 3.5762386752823643e-06, |
|
"loss": 0.9598, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.7521919704660821, |
|
"grad_norm": 1.6723382538398348, |
|
"learning_rate": 3.5147122777242203e-06, |
|
"loss": 0.9826, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.7544993077988001, |
|
"grad_norm": 1.698445076932435, |
|
"learning_rate": 3.4536067011925945e-06, |
|
"loss": 0.975, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.7568066451315182, |
|
"grad_norm": 1.6756544799204833, |
|
"learning_rate": 3.3929259107407785e-06, |
|
"loss": 0.9596, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7591139824642362, |
|
"grad_norm": 1.7323687941815844, |
|
"learning_rate": 3.3326738438583116e-06, |
|
"loss": 0.9471, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.7614213197969543, |
|
"grad_norm": 1.6841658818773522, |
|
"learning_rate": 3.272854410215467e-06, |
|
"loss": 0.9478, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7637286571296723, |
|
"grad_norm": 1.7258401397718819, |
|
"learning_rate": 3.213471491409568e-06, |
|
"loss": 0.9545, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.7660359944623903, |
|
"grad_norm": 1.754250495342998, |
|
"learning_rate": 3.1545289407131128e-06, |
|
"loss": 0.9557, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7683433317951085, |
|
"grad_norm": 1.7109892895946872, |
|
"learning_rate": 3.0960305828237568e-06, |
|
"loss": 0.9649, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.7706506691278265, |
|
"grad_norm": 1.8321981237158624, |
|
"learning_rate": 3.0379802136161073e-06, |
|
"loss": 0.9612, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.7729580064605446, |
|
"grad_norm": 1.6838996787097582, |
|
"learning_rate": 2.9803815998954334e-06, |
|
"loss": 0.9701, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.7752653437932626, |
|
"grad_norm": 1.7069776476837635, |
|
"learning_rate": 2.9232384791532377e-06, |
|
"loss": 0.9724, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.7775726811259807, |
|
"grad_norm": 1.6302606016182208, |
|
"learning_rate": 2.866554559324731e-06, |
|
"loss": 0.9441, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.7798800184586987, |
|
"grad_norm": 1.6451304542174006, |
|
"learning_rate": 2.810333518548246e-06, |
|
"loss": 0.9337, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7821873557914167, |
|
"grad_norm": 1.763422741436097, |
|
"learning_rate": 2.7545790049265506e-06, |
|
"loss": 0.9542, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.7844946931241348, |
|
"grad_norm": 1.6780739225283752, |
|
"learning_rate": 2.699294636290134e-06, |
|
"loss": 0.9468, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.7868020304568528, |
|
"grad_norm": 1.7170452157015115, |
|
"learning_rate": 2.6444839999624496e-06, |
|
"loss": 0.9333, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.7891093677895709, |
|
"grad_norm": 1.6339639009427172, |
|
"learning_rate": 2.5901506525271424e-06, |
|
"loss": 0.9656, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.7914167051222889, |
|
"grad_norm": 1.7076055984466658, |
|
"learning_rate": 2.5362981195972627e-06, |
|
"loss": 0.9292, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.7937240424550069, |
|
"grad_norm": 1.694775381636099, |
|
"learning_rate": 2.4829298955865022e-06, |
|
"loss": 0.9621, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.796031379787725, |
|
"grad_norm": 1.6448376757312444, |
|
"learning_rate": 2.4300494434824373e-06, |
|
"loss": 0.9323, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.798338717120443, |
|
"grad_norm": 1.7190721711044321, |
|
"learning_rate": 2.3776601946218225e-06, |
|
"loss": 0.9536, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.8006460544531611, |
|
"grad_norm": 1.7369056682520372, |
|
"learning_rate": 2.3257655484679376e-06, |
|
"loss": 0.9474, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.8029533917858791, |
|
"grad_norm": 1.9277048905233987, |
|
"learning_rate": 2.274368872390009e-06, |
|
"loss": 0.953, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.8052607291185971, |
|
"grad_norm": 1.6988058479966548, |
|
"learning_rate": 2.2234735014446905e-06, |
|
"loss": 0.9546, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.8075680664513152, |
|
"grad_norm": 1.708926311661711, |
|
"learning_rate": 2.1730827381596643e-06, |
|
"loss": 0.9442, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.8098754037840332, |
|
"grad_norm": 1.7384137551353784, |
|
"learning_rate": 2.123199852319352e-06, |
|
"loss": 0.9415, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.8121827411167513, |
|
"grad_norm": 1.706865871432203, |
|
"learning_rate": 2.073828080752728e-06, |
|
"loss": 0.9514, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.8144900784494693, |
|
"grad_norm": 1.631591345911517, |
|
"learning_rate": 2.024970627123295e-06, |
|
"loss": 0.9593, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.8167974157821873, |
|
"grad_norm": 1.7250014472303201, |
|
"learning_rate": 1.976630661721207e-06, |
|
"loss": 0.9312, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.8191047531149054, |
|
"grad_norm": 1.7176929983275837, |
|
"learning_rate": 1.9288113212575454e-06, |
|
"loss": 0.9392, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.8214120904476234, |
|
"grad_norm": 1.7515517183747666, |
|
"learning_rate": 1.8815157086607826e-06, |
|
"loss": 0.9461, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.8237194277803415, |
|
"grad_norm": 1.7256573401014044, |
|
"learning_rate": 1.8347468928754408e-06, |
|
"loss": 0.9625, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.8260267651130595, |
|
"grad_norm": 1.6925351828448565, |
|
"learning_rate": 1.7885079086629598e-06, |
|
"loss": 0.9618, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.8283341024457775, |
|
"grad_norm": 1.6848423059711715, |
|
"learning_rate": 1.7428017564047594e-06, |
|
"loss": 0.957, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.8306414397784956, |
|
"grad_norm": 1.685378392680059, |
|
"learning_rate": 1.697631401907559e-06, |
|
"loss": 0.9405, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8329487771112136, |
|
"grad_norm": 1.779147836504438, |
|
"learning_rate": 1.6529997762109319e-06, |
|
"loss": 0.9475, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.8352561144439317, |
|
"grad_norm": 1.6862245640499274, |
|
"learning_rate": 1.6089097753971061e-06, |
|
"loss": 0.9433, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.8375634517766497, |
|
"grad_norm": 1.6387699919494911, |
|
"learning_rate": 1.565364260403055e-06, |
|
"loss": 0.9393, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.8398707891093677, |
|
"grad_norm": 1.7088328181524817, |
|
"learning_rate": 1.522366056834844e-06, |
|
"loss": 0.9322, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.8421781264420858, |
|
"grad_norm": 1.717214554275275, |
|
"learning_rate": 1.4799179547842823e-06, |
|
"loss": 0.9393, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.8444854637748038, |
|
"grad_norm": 1.717143943169584, |
|
"learning_rate": 1.4380227086478816e-06, |
|
"loss": 0.96, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.846792801107522, |
|
"grad_norm": 1.7390583641873172, |
|
"learning_rate": 1.3966830369481231e-06, |
|
"loss": 0.9487, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.84910013844024, |
|
"grad_norm": 1.6483510037137357, |
|
"learning_rate": 1.3559016221570663e-06, |
|
"loss": 0.9315, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.8514074757729581, |
|
"grad_norm": 1.7716145374153562, |
|
"learning_rate": 1.3156811105222723e-06, |
|
"loss": 0.9375, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.8537148131056761, |
|
"grad_norm": 1.7136369284376767, |
|
"learning_rate": 1.276024111895101e-06, |
|
"loss": 0.9592, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8560221504383941, |
|
"grad_norm": 1.659445261277345, |
|
"learning_rate": 1.2369331995613664e-06, |
|
"loss": 0.9466, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.8583294877711122, |
|
"grad_norm": 1.7022849150801465, |
|
"learning_rate": 1.1984109100743445e-06, |
|
"loss": 0.934, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.8606368251038302, |
|
"grad_norm": 1.7296616243070897, |
|
"learning_rate": 1.1604597430902032e-06, |
|
"loss": 0.9413, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.8629441624365483, |
|
"grad_norm": 1.70400411645417, |
|
"learning_rate": 1.123082161205775e-06, |
|
"loss": 0.9192, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8652514997692663, |
|
"grad_norm": 1.7716704719549248, |
|
"learning_rate": 1.0862805897987894e-06, |
|
"loss": 0.9313, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.8675588371019843, |
|
"grad_norm": 1.6028791905428008, |
|
"learning_rate": 1.0500574168704746e-06, |
|
"loss": 0.9222, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8698661744347024, |
|
"grad_norm": 1.6811903757943443, |
|
"learning_rate": 1.014414992890611e-06, |
|
"loss": 0.9613, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.8721735117674204, |
|
"grad_norm": 1.6582769629121996, |
|
"learning_rate": 9.793556306450125e-07, |
|
"loss": 0.9397, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.8744808491001385, |
|
"grad_norm": 1.7329477389715338, |
|
"learning_rate": 9.448816050854559e-07, |
|
"loss": 0.9456, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.8767881864328565, |
|
"grad_norm": 1.6579278218600528, |
|
"learning_rate": 9.10995153182056e-07, |
|
"loss": 0.9622, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8790955237655745, |
|
"grad_norm": 1.7059466024322731, |
|
"learning_rate": 8.776984737781135e-07, |
|
"loss": 0.9247, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.8814028610982926, |
|
"grad_norm": 1.6587934531932846, |
|
"learning_rate": 8.449937274474396e-07, |
|
"loss": 0.9287, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.8837101984310106, |
|
"grad_norm": 1.7000421816452764, |
|
"learning_rate": 8.128830363541574e-07, |
|
"loss": 0.9579, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.8860175357637287, |
|
"grad_norm": 1.5982289727068608, |
|
"learning_rate": 7.81368484114996e-07, |
|
"loss": 0.9252, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.8883248730964467, |
|
"grad_norm": 1.6595039047537794, |
|
"learning_rate": 7.504521156640854e-07, |
|
"loss": 0.9535, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.8906322104291647, |
|
"grad_norm": 1.67142320517366, |
|
"learning_rate": 7.201359371202698e-07, |
|
"loss": 0.9342, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8929395477618828, |
|
"grad_norm": 1.724210373217245, |
|
"learning_rate": 6.904219156569325e-07, |
|
"loss": 0.9537, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.8952468850946008, |
|
"grad_norm": 1.775137829218875, |
|
"learning_rate": 6.613119793743428e-07, |
|
"loss": 0.9407, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.8975542224273189, |
|
"grad_norm": 1.6829966494434596, |
|
"learning_rate": 6.32808017174551e-07, |
|
"loss": 0.9271, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.8998615597600369, |
|
"grad_norm": 1.7666732440040138, |
|
"learning_rate": 6.049118786388153e-07, |
|
"loss": 0.9299, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.9021688970927549, |
|
"grad_norm": 1.7328446243608238, |
|
"learning_rate": 5.776253739075887e-07, |
|
"loss": 0.9368, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.904476234425473, |
|
"grad_norm": 1.794453478228994, |
|
"learning_rate": 5.509502735630601e-07, |
|
"loss": 0.9584, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.906783571758191, |
|
"grad_norm": 1.8595416734645607, |
|
"learning_rate": 5.248883085142653e-07, |
|
"loss": 0.9278, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 1.8064599818138745, |
|
"learning_rate": 4.994411698847668e-07, |
|
"loss": 0.9521, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.9113982464236271, |
|
"grad_norm": 11.97761580494739, |
|
"learning_rate": 4.746105089029229e-07, |
|
"loss": 0.9353, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.9137055837563451, |
|
"grad_norm": 1.91129591540662, |
|
"learning_rate": 4.50397936794742e-07, |
|
"loss": 0.9518, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.9160129210890632, |
|
"grad_norm": 1.68892958160335, |
|
"learning_rate": 4.268050246793276e-07, |
|
"loss": 0.9417, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.9183202584217812, |
|
"grad_norm": 1.7682216031642473, |
|
"learning_rate": 4.038333034669406e-07, |
|
"loss": 0.9575, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.9206275957544993, |
|
"grad_norm": 1.6202959145852942, |
|
"learning_rate": 3.814842637596483e-07, |
|
"loss": 0.9202, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.9229349330872173, |
|
"grad_norm": 1.7355964322786217, |
|
"learning_rate": 3.5975935575461083e-07, |
|
"loss": 0.9408, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9252422704199353, |
|
"grad_norm": 1.784833888232325, |
|
"learning_rate": 3.3865998914997645e-07, |
|
"loss": 0.9451, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.9275496077526535, |
|
"grad_norm": 1.7347540917405002, |
|
"learning_rate": 3.1818753305340566e-07, |
|
"loss": 0.9503, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.9298569450853715, |
|
"grad_norm": 1.7098932522181238, |
|
"learning_rate": 2.9834331589323697e-07, |
|
"loss": 0.9648, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.9321642824180896, |
|
"grad_norm": 1.7375733049730988, |
|
"learning_rate": 2.791286253322856e-07, |
|
"loss": 0.9325, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.9344716197508076, |
|
"grad_norm": 1.7376811511996035, |
|
"learning_rate": 2.605447081842838e-07, |
|
"loss": 0.9236, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.9367789570835257, |
|
"grad_norm": 1.6285382014408727, |
|
"learning_rate": 2.425927703329856e-07, |
|
"loss": 0.9374, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.9390862944162437, |
|
"grad_norm": 1.7187090366001978, |
|
"learning_rate": 2.2527397665391026e-07, |
|
"loss": 0.9408, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.9413936317489617, |
|
"grad_norm": 1.6018424242699771, |
|
"learning_rate": 2.0858945093876315e-07, |
|
"loss": 0.9255, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.9437009690816798, |
|
"grad_norm": 1.7403672056926338, |
|
"learning_rate": 1.9254027582250588e-07, |
|
"loss": 0.9386, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.9460083064143978, |
|
"grad_norm": 1.6413885110477684, |
|
"learning_rate": 1.7712749271311392e-07, |
|
"loss": 0.9463, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.9483156437471159, |
|
"grad_norm": 1.6584652712298515, |
|
"learning_rate": 1.6235210172399373e-07, |
|
"loss": 0.9197, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.9506229810798339, |
|
"grad_norm": 1.7146073811212112, |
|
"learning_rate": 1.4821506160909492e-07, |
|
"loss": 0.9325, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.9529303184125519, |
|
"grad_norm": 1.7862356926674563, |
|
"learning_rate": 1.3471728970068986e-07, |
|
"loss": 0.9415, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.95523765574527, |
|
"grad_norm": 1.7900835474706864, |
|
"learning_rate": 1.2185966184985687e-07, |
|
"loss": 0.9516, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.957544993077988, |
|
"grad_norm": 1.61965861822062, |
|
"learning_rate": 1.0964301236963904e-07, |
|
"loss": 0.9272, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.9598523304107061, |
|
"grad_norm": 1.752477339334271, |
|
"learning_rate": 9.806813398091419e-08, |
|
"loss": 0.9231, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9621596677434241, |
|
"grad_norm": 1.6566621651012274, |
|
"learning_rate": 8.713577776095494e-08, |
|
"loss": 0.9293, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.9644670050761421, |
|
"grad_norm": 1.637994937785989, |
|
"learning_rate": 7.684665309468875e-08, |
|
"loss": 0.9539, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9667743424088602, |
|
"grad_norm": 1.6112435656292767, |
|
"learning_rate": 6.720142762867032e-08, |
|
"loss": 0.9558, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.9690816797415782, |
|
"grad_norm": 1.7629733611663445, |
|
"learning_rate": 5.820072722775849e-08, |
|
"loss": 0.9441, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9713890170742963, |
|
"grad_norm": 1.6502417380976349, |
|
"learning_rate": 4.984513593450424e-08, |
|
"loss": 0.9527, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.9736963544070143, |
|
"grad_norm": 1.697689686578805, |
|
"learning_rate": 4.2135195931249925e-08, |
|
"loss": 0.9468, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.9760036917397323, |
|
"grad_norm": 1.7391293446601994, |
|
"learning_rate": 3.50714075049563e-08, |
|
"loss": 0.932, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.9783110290724504, |
|
"grad_norm": 1.7092970910189305, |
|
"learning_rate": 2.8654229014730694e-08, |
|
"loss": 0.9377, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9806183664051684, |
|
"grad_norm": 1.6908979053958657, |
|
"learning_rate": 2.2884076862089712e-08, |
|
"loss": 0.9238, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.9829257037378865, |
|
"grad_norm": 1.634296694613385, |
|
"learning_rate": 1.7761325463937495e-08, |
|
"loss": 0.9473, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9852330410706045, |
|
"grad_norm": 1.700195091519778, |
|
"learning_rate": 1.3286307228269623e-08, |
|
"loss": 0.9491, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.9875403784033225, |
|
"grad_norm": 1.6899977441806657, |
|
"learning_rate": 9.459312532608122e-09, |
|
"loss": 0.9393, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.9898477157360406, |
|
"grad_norm": 1.6459431767651571, |
|
"learning_rate": 6.280589705153217e-09, |
|
"loss": 0.9316, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.9921550530687586, |
|
"grad_norm": 1.6824622254831971, |
|
"learning_rate": 3.750345008675105e-09, |
|
"loss": 0.9455, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9944623904014767, |
|
"grad_norm": 1.6711402676574085, |
|
"learning_rate": 1.8687426271246646e-09, |
|
"loss": 0.9299, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.9967697277341947, |
|
"grad_norm": 1.7394983815714609, |
|
"learning_rate": 6.359046549864189e-10, |
|
"loss": 0.9262, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.9990770650669127, |
|
"grad_norm": 1.9029748936658466, |
|
"learning_rate": 5.1911089347100876e-11, |
|
"loss": 0.973, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.9539673924446106, |
|
"eval_runtime": 317.6397, |
|
"eval_samples_per_second": 48.325, |
|
"eval_steps_per_second": 0.756, |
|
"step": 2167 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 2167, |
|
"total_flos": 453725713858560.0, |
|
"train_loss": 1.0076359760722753, |
|
"train_runtime": 13620.7718, |
|
"train_samples_per_second": 10.182, |
|
"train_steps_per_second": 0.159 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2167, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 453725713858560.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|