|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 6932, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005770340450086555, |
|
"grad_norm": 1.3359850523956383, |
|
"learning_rate": 2.8818443804034583e-08, |
|
"loss": 0.656, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0028851702250432777, |
|
"grad_norm": 1.415611871814656, |
|
"learning_rate": 1.4409221902017293e-07, |
|
"loss": 0.6481, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005770340450086555, |
|
"grad_norm": 1.212781925465119, |
|
"learning_rate": 2.8818443804034586e-07, |
|
"loss": 0.6386, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008655510675129832, |
|
"grad_norm": 1.2361637923329842, |
|
"learning_rate": 4.322766570605188e-07, |
|
"loss": 0.65, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01154068090017311, |
|
"grad_norm": 1.3799250131478336, |
|
"learning_rate": 5.763688760806917e-07, |
|
"loss": 0.6423, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.014425851125216388, |
|
"grad_norm": 1.303690207496262, |
|
"learning_rate": 7.204610951008646e-07, |
|
"loss": 0.6492, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.017311021350259664, |
|
"grad_norm": 1.2010767582718596, |
|
"learning_rate": 8.645533141210376e-07, |
|
"loss": 0.6254, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.020196191575302943, |
|
"grad_norm": 1.123148811050096, |
|
"learning_rate": 1.0086455331412106e-06, |
|
"loss": 0.632, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02308136180034622, |
|
"grad_norm": 1.017563618882216, |
|
"learning_rate": 1.1527377521613834e-06, |
|
"loss": 0.6176, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.025966532025389497, |
|
"grad_norm": 0.9670044654743236, |
|
"learning_rate": 1.2968299711815562e-06, |
|
"loss": 0.6237, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.028851702250432775, |
|
"grad_norm": 0.8817997118057925, |
|
"learning_rate": 1.4409221902017292e-06, |
|
"loss": 0.5945, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.031736872475476054, |
|
"grad_norm": 0.7910601022781902, |
|
"learning_rate": 1.5850144092219022e-06, |
|
"loss": 0.5666, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03462204270051933, |
|
"grad_norm": 0.698866883185103, |
|
"learning_rate": 1.7291066282420752e-06, |
|
"loss": 0.5781, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03750721292556261, |
|
"grad_norm": 0.7228128874208073, |
|
"learning_rate": 1.873198847262248e-06, |
|
"loss": 0.552, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.040392383150605886, |
|
"grad_norm": 0.6829934171012417, |
|
"learning_rate": 2.0172910662824213e-06, |
|
"loss": 0.5538, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04327755337564916, |
|
"grad_norm": 0.6501090725880212, |
|
"learning_rate": 2.1613832853025936e-06, |
|
"loss": 0.5428, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04616272360069244, |
|
"grad_norm": 0.5921206030532731, |
|
"learning_rate": 2.305475504322767e-06, |
|
"loss": 0.5523, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04904789382573572, |
|
"grad_norm": 0.5843936221943186, |
|
"learning_rate": 2.4495677233429396e-06, |
|
"loss": 0.5295, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.05193306405077899, |
|
"grad_norm": 0.4971214384044829, |
|
"learning_rate": 2.5936599423631124e-06, |
|
"loss": 0.5293, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.054818234275822275, |
|
"grad_norm": 0.502778037587032, |
|
"learning_rate": 2.7377521613832852e-06, |
|
"loss": 0.5192, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.05770340450086555, |
|
"grad_norm": 0.48028251458263926, |
|
"learning_rate": 2.8818443804034585e-06, |
|
"loss": 0.526, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.060588574725908825, |
|
"grad_norm": 0.4958418647779635, |
|
"learning_rate": 3.0259365994236312e-06, |
|
"loss": 0.507, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06347374495095211, |
|
"grad_norm": 0.4605312245170545, |
|
"learning_rate": 3.1700288184438045e-06, |
|
"loss": 0.5209, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06635891517599539, |
|
"grad_norm": 0.45269794748416303, |
|
"learning_rate": 3.3141210374639773e-06, |
|
"loss": 0.5018, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.06924408540103866, |
|
"grad_norm": 0.4518223453145298, |
|
"learning_rate": 3.4582132564841505e-06, |
|
"loss": 0.5014, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07212925562608194, |
|
"grad_norm": 0.4782643771875912, |
|
"learning_rate": 3.602305475504323e-06, |
|
"loss": 0.5152, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07501442585112522, |
|
"grad_norm": 0.4420045459990657, |
|
"learning_rate": 3.746397694524496e-06, |
|
"loss": 0.4823, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07789959607616849, |
|
"grad_norm": 0.42381033264166024, |
|
"learning_rate": 3.890489913544669e-06, |
|
"loss": 0.4956, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.08078476630121177, |
|
"grad_norm": 0.4406324685320003, |
|
"learning_rate": 4.0345821325648425e-06, |
|
"loss": 0.4746, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08366993652625505, |
|
"grad_norm": 0.43316241824454993, |
|
"learning_rate": 4.1786743515850145e-06, |
|
"loss": 0.4746, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.08655510675129832, |
|
"grad_norm": 0.4422617049175986, |
|
"learning_rate": 4.322766570605187e-06, |
|
"loss": 0.4791, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0894402769763416, |
|
"grad_norm": 0.41863788062040475, |
|
"learning_rate": 4.466858789625361e-06, |
|
"loss": 0.5032, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.09232544720138489, |
|
"grad_norm": 0.4448228403063216, |
|
"learning_rate": 4.610951008645534e-06, |
|
"loss": 0.4984, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09521061742642815, |
|
"grad_norm": 0.4280125696477632, |
|
"learning_rate": 4.7550432276657065e-06, |
|
"loss": 0.4966, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.09809578765147144, |
|
"grad_norm": 0.3924527289307632, |
|
"learning_rate": 4.899135446685879e-06, |
|
"loss": 0.4707, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.10098095787651472, |
|
"grad_norm": 0.405378064505572, |
|
"learning_rate": 5.043227665706052e-06, |
|
"loss": 0.4684, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.10386612810155799, |
|
"grad_norm": 0.3967929757933598, |
|
"learning_rate": 5.187319884726225e-06, |
|
"loss": 0.4685, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10675129832660127, |
|
"grad_norm": 0.39908256939712566, |
|
"learning_rate": 5.3314121037463985e-06, |
|
"loss": 0.4777, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.10963646855164455, |
|
"grad_norm": 0.4120577956537649, |
|
"learning_rate": 5.4755043227665705e-06, |
|
"loss": 0.4834, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11252163877668782, |
|
"grad_norm": 0.3970501930373392, |
|
"learning_rate": 5.619596541786744e-06, |
|
"loss": 0.4698, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1154068090017311, |
|
"grad_norm": 0.41521712362317653, |
|
"learning_rate": 5.763688760806917e-06, |
|
"loss": 0.4854, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11829197922677438, |
|
"grad_norm": 0.38302949433328365, |
|
"learning_rate": 5.9077809798270905e-06, |
|
"loss": 0.4603, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.12117714945181765, |
|
"grad_norm": 0.40356716686480254, |
|
"learning_rate": 6.0518731988472625e-06, |
|
"loss": 0.4617, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12406231967686093, |
|
"grad_norm": 0.4108590792560944, |
|
"learning_rate": 6.195965417867435e-06, |
|
"loss": 0.4821, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.12694748990190421, |
|
"grad_norm": 0.38579700259781685, |
|
"learning_rate": 6.340057636887609e-06, |
|
"loss": 0.4548, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1298326601269475, |
|
"grad_norm": 0.4008887277935931, |
|
"learning_rate": 6.484149855907782e-06, |
|
"loss": 0.4826, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.13271783035199078, |
|
"grad_norm": 0.4324585130384216, |
|
"learning_rate": 6.6282420749279545e-06, |
|
"loss": 0.4524, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13560300057703403, |
|
"grad_norm": 0.36547940995267386, |
|
"learning_rate": 6.772334293948127e-06, |
|
"loss": 0.4472, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.13848817080207732, |
|
"grad_norm": 0.4021304207485496, |
|
"learning_rate": 6.916426512968301e-06, |
|
"loss": 0.469, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1413733410271206, |
|
"grad_norm": 0.3986364193440875, |
|
"learning_rate": 7.060518731988473e-06, |
|
"loss": 0.4664, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.14425851125216388, |
|
"grad_norm": 0.3894372801996192, |
|
"learning_rate": 7.204610951008646e-06, |
|
"loss": 0.4596, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14714368147720716, |
|
"grad_norm": 0.3822307947159079, |
|
"learning_rate": 7.348703170028819e-06, |
|
"loss": 0.4611, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.15002885170225044, |
|
"grad_norm": 0.3770508093447163, |
|
"learning_rate": 7.492795389048992e-06, |
|
"loss": 0.4513, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.1529140219272937, |
|
"grad_norm": 0.4040420054739231, |
|
"learning_rate": 7.636887608069165e-06, |
|
"loss": 0.4404, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.15579919215233698, |
|
"grad_norm": 0.3763215501882052, |
|
"learning_rate": 7.780979827089338e-06, |
|
"loss": 0.4484, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15868436237738026, |
|
"grad_norm": 0.3823790521602452, |
|
"learning_rate": 7.92507204610951e-06, |
|
"loss": 0.4568, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.16156953260242354, |
|
"grad_norm": 0.3732843632883636, |
|
"learning_rate": 8.069164265129685e-06, |
|
"loss": 0.4389, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.16445470282746683, |
|
"grad_norm": 0.37767577343237896, |
|
"learning_rate": 8.213256484149856e-06, |
|
"loss": 0.4572, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.1673398730525101, |
|
"grad_norm": 0.3980275526344354, |
|
"learning_rate": 8.357348703170029e-06, |
|
"loss": 0.4625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1702250432775534, |
|
"grad_norm": 0.4046607258650182, |
|
"learning_rate": 8.501440922190203e-06, |
|
"loss": 0.488, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.17311021350259664, |
|
"grad_norm": 0.3759989273900334, |
|
"learning_rate": 8.645533141210375e-06, |
|
"loss": 0.4696, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17599538372763993, |
|
"grad_norm": 0.3782703272325284, |
|
"learning_rate": 8.789625360230547e-06, |
|
"loss": 0.434, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.1788805539526832, |
|
"grad_norm": 0.3906713039092043, |
|
"learning_rate": 8.933717579250722e-06, |
|
"loss": 0.4565, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1817657241777265, |
|
"grad_norm": 0.3805412690860938, |
|
"learning_rate": 9.077809798270895e-06, |
|
"loss": 0.448, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.18465089440276977, |
|
"grad_norm": 0.39402776095093084, |
|
"learning_rate": 9.221902017291067e-06, |
|
"loss": 0.4553, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.18753606462781305, |
|
"grad_norm": 0.45284451101047796, |
|
"learning_rate": 9.36599423631124e-06, |
|
"loss": 0.4447, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.1904212348528563, |
|
"grad_norm": 0.3873105480189736, |
|
"learning_rate": 9.510086455331413e-06, |
|
"loss": 0.4499, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1933064050778996, |
|
"grad_norm": 0.40199355492029853, |
|
"learning_rate": 9.654178674351586e-06, |
|
"loss": 0.454, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.19619157530294287, |
|
"grad_norm": 0.38330387655003734, |
|
"learning_rate": 9.798270893371759e-06, |
|
"loss": 0.4484, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.19907674552798615, |
|
"grad_norm": 0.3790700593281963, |
|
"learning_rate": 9.942363112391931e-06, |
|
"loss": 0.4693, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.20196191575302944, |
|
"grad_norm": 0.4091985418093552, |
|
"learning_rate": 1.0086455331412104e-05, |
|
"loss": 0.4638, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.20484708597807272, |
|
"grad_norm": 0.3816167638203252, |
|
"learning_rate": 1.0230547550432277e-05, |
|
"loss": 0.4529, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.20773225620311597, |
|
"grad_norm": 0.3705496517450964, |
|
"learning_rate": 1.037463976945245e-05, |
|
"loss": 0.4491, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.21061742642815925, |
|
"grad_norm": 0.4096345206746822, |
|
"learning_rate": 1.0518731988472624e-05, |
|
"loss": 0.4769, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.21350259665320254, |
|
"grad_norm": 0.3828300212257734, |
|
"learning_rate": 1.0662824207492797e-05, |
|
"loss": 0.446, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.21638776687824582, |
|
"grad_norm": 0.3664642342578931, |
|
"learning_rate": 1.080691642651297e-05, |
|
"loss": 0.4629, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2192729371032891, |
|
"grad_norm": 0.3701318700670979, |
|
"learning_rate": 1.0951008645533141e-05, |
|
"loss": 0.4454, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.22215810732833238, |
|
"grad_norm": 0.3765944116998475, |
|
"learning_rate": 1.1095100864553314e-05, |
|
"loss": 0.445, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.22504327755337564, |
|
"grad_norm": 0.35848655613640135, |
|
"learning_rate": 1.1239193083573488e-05, |
|
"loss": 0.4482, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.22792844777841892, |
|
"grad_norm": 0.3522345707962982, |
|
"learning_rate": 1.1383285302593661e-05, |
|
"loss": 0.4488, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.2308136180034622, |
|
"grad_norm": 0.42916841303669334, |
|
"learning_rate": 1.1527377521613834e-05, |
|
"loss": 0.4815, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23369878822850548, |
|
"grad_norm": 0.39333826210382833, |
|
"learning_rate": 1.1671469740634007e-05, |
|
"loss": 0.4479, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.23658395845354876, |
|
"grad_norm": 0.395568573594799, |
|
"learning_rate": 1.1815561959654181e-05, |
|
"loss": 0.4375, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.23946912867859205, |
|
"grad_norm": 0.36804463353795325, |
|
"learning_rate": 1.1959654178674354e-05, |
|
"loss": 0.4544, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.2423542989036353, |
|
"grad_norm": 0.4057948653659005, |
|
"learning_rate": 1.2103746397694525e-05, |
|
"loss": 0.4489, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.24523946912867858, |
|
"grad_norm": 0.4042926809090149, |
|
"learning_rate": 1.2247838616714698e-05, |
|
"loss": 0.4432, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.24812463935372187, |
|
"grad_norm": 0.3746660432560446, |
|
"learning_rate": 1.239193083573487e-05, |
|
"loss": 0.454, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2510098095787651, |
|
"grad_norm": 0.39348997561248156, |
|
"learning_rate": 1.2536023054755045e-05, |
|
"loss": 0.4647, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.25389497980380843, |
|
"grad_norm": 0.3872848928403898, |
|
"learning_rate": 1.2680115273775218e-05, |
|
"loss": 0.4535, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2567801500288517, |
|
"grad_norm": 0.3813359635416143, |
|
"learning_rate": 1.282420749279539e-05, |
|
"loss": 0.4508, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.259665320253895, |
|
"grad_norm": 0.40132785614459726, |
|
"learning_rate": 1.2968299711815563e-05, |
|
"loss": 0.4549, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.26255049047893825, |
|
"grad_norm": 0.3674608489689301, |
|
"learning_rate": 1.3112391930835735e-05, |
|
"loss": 0.4519, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.26543566070398156, |
|
"grad_norm": 0.3672381265982974, |
|
"learning_rate": 1.3256484149855909e-05, |
|
"loss": 0.4504, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2683208309290248, |
|
"grad_norm": 0.41466539433993804, |
|
"learning_rate": 1.3400576368876082e-05, |
|
"loss": 0.4509, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.27120600115406807, |
|
"grad_norm": 0.37606450867708474, |
|
"learning_rate": 1.3544668587896255e-05, |
|
"loss": 0.4646, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2740911713791114, |
|
"grad_norm": 0.3868252718025835, |
|
"learning_rate": 1.3688760806916427e-05, |
|
"loss": 0.4395, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.27697634160415463, |
|
"grad_norm": 0.36956730373024904, |
|
"learning_rate": 1.3832853025936602e-05, |
|
"loss": 0.4421, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.27986151182919794, |
|
"grad_norm": 0.3703399882804596, |
|
"learning_rate": 1.3976945244956775e-05, |
|
"loss": 0.4288, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.2827466820542412, |
|
"grad_norm": 0.4006912338102629, |
|
"learning_rate": 1.4121037463976946e-05, |
|
"loss": 0.4721, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2856318522792845, |
|
"grad_norm": 0.3443154071927776, |
|
"learning_rate": 1.4265129682997119e-05, |
|
"loss": 0.4322, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.28851702250432776, |
|
"grad_norm": 0.37915946639466674, |
|
"learning_rate": 1.4409221902017291e-05, |
|
"loss": 0.4264, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.291402192729371, |
|
"grad_norm": 0.38032216321312473, |
|
"learning_rate": 1.4553314121037466e-05, |
|
"loss": 0.4408, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.2942873629544143, |
|
"grad_norm": 0.40009286504791697, |
|
"learning_rate": 1.4697406340057639e-05, |
|
"loss": 0.4616, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2971725331794576, |
|
"grad_norm": 0.35910974753646935, |
|
"learning_rate": 1.4841498559077811e-05, |
|
"loss": 0.4306, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.3000577034045009, |
|
"grad_norm": 0.37319150994773553, |
|
"learning_rate": 1.4985590778097984e-05, |
|
"loss": 0.4497, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.30294287362954414, |
|
"grad_norm": 0.3637719085871802, |
|
"learning_rate": 1.5129682997118155e-05, |
|
"loss": 0.4551, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3058280438545874, |
|
"grad_norm": 0.3575259753574912, |
|
"learning_rate": 1.527377521613833e-05, |
|
"loss": 0.447, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3087132140796307, |
|
"grad_norm": 0.3596026864789941, |
|
"learning_rate": 1.54178674351585e-05, |
|
"loss": 0.4642, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.31159838430467396, |
|
"grad_norm": 0.3613487147969066, |
|
"learning_rate": 1.5561959654178675e-05, |
|
"loss": 0.4384, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.31448355452971727, |
|
"grad_norm": 0.5248264510992934, |
|
"learning_rate": 1.570605187319885e-05, |
|
"loss": 0.4427, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.3173687247547605, |
|
"grad_norm": 0.3648196713346363, |
|
"learning_rate": 1.585014409221902e-05, |
|
"loss": 0.4649, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.32025389497980383, |
|
"grad_norm": 0.40066795102765307, |
|
"learning_rate": 1.5994236311239196e-05, |
|
"loss": 0.439, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.3231390652048471, |
|
"grad_norm": 0.33951584959069336, |
|
"learning_rate": 1.613832853025937e-05, |
|
"loss": 0.4432, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.32602423542989034, |
|
"grad_norm": 0.3533117973734279, |
|
"learning_rate": 1.6282420749279538e-05, |
|
"loss": 0.4335, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.32890940565493365, |
|
"grad_norm": 0.37863111987341347, |
|
"learning_rate": 1.6426512968299712e-05, |
|
"loss": 0.428, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3317945758799769, |
|
"grad_norm": 0.4346675097845832, |
|
"learning_rate": 1.6570605187319887e-05, |
|
"loss": 0.449, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.3346797461050202, |
|
"grad_norm": 0.37817008201494773, |
|
"learning_rate": 1.6714697406340058e-05, |
|
"loss": 0.4433, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.33756491633006347, |
|
"grad_norm": 0.3808322285847144, |
|
"learning_rate": 1.6858789625360232e-05, |
|
"loss": 0.4466, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.3404500865551068, |
|
"grad_norm": 0.37923635129260924, |
|
"learning_rate": 1.7002881844380407e-05, |
|
"loss": 0.4476, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.34333525678015003, |
|
"grad_norm": 0.3823099849897589, |
|
"learning_rate": 1.7146974063400578e-05, |
|
"loss": 0.4323, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.3462204270051933, |
|
"grad_norm": 0.3818824327579753, |
|
"learning_rate": 1.729106628242075e-05, |
|
"loss": 0.4375, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3491055972302366, |
|
"grad_norm": 0.37210993394094566, |
|
"learning_rate": 1.7435158501440924e-05, |
|
"loss": 0.4305, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.35199076745527985, |
|
"grad_norm": 0.3465073067639098, |
|
"learning_rate": 1.7579250720461095e-05, |
|
"loss": 0.4255, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.35487593768032316, |
|
"grad_norm": 0.38378004625397016, |
|
"learning_rate": 1.772334293948127e-05, |
|
"loss": 0.4324, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.3577611079053664, |
|
"grad_norm": 0.37432578456970733, |
|
"learning_rate": 1.7867435158501444e-05, |
|
"loss": 0.4543, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.36064627813040967, |
|
"grad_norm": 0.3341493702083905, |
|
"learning_rate": 1.8011527377521615e-05, |
|
"loss": 0.4501, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.363531448355453, |
|
"grad_norm": 0.3618443471925012, |
|
"learning_rate": 1.815561959654179e-05, |
|
"loss": 0.4387, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.36641661858049623, |
|
"grad_norm": 0.3493534570885224, |
|
"learning_rate": 1.829971181556196e-05, |
|
"loss": 0.4483, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.36930178880553954, |
|
"grad_norm": 0.37866953974278367, |
|
"learning_rate": 1.8443804034582135e-05, |
|
"loss": 0.4289, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3721869590305828, |
|
"grad_norm": 0.37227424777091433, |
|
"learning_rate": 1.8587896253602306e-05, |
|
"loss": 0.4306, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.3750721292556261, |
|
"grad_norm": 0.4014872351370534, |
|
"learning_rate": 1.873198847262248e-05, |
|
"loss": 0.4414, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.37795729948066936, |
|
"grad_norm": 0.3724847659964483, |
|
"learning_rate": 1.887608069164265e-05, |
|
"loss": 0.4446, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.3808424697057126, |
|
"grad_norm": 0.3783020221193775, |
|
"learning_rate": 1.9020172910662826e-05, |
|
"loss": 0.4608, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3837276399307559, |
|
"grad_norm": 0.4307776328223203, |
|
"learning_rate": 1.9164265129683e-05, |
|
"loss": 0.4343, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.3866128101557992, |
|
"grad_norm": 0.37291676085751585, |
|
"learning_rate": 1.930835734870317e-05, |
|
"loss": 0.4263, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3894979803808425, |
|
"grad_norm": 0.3765824744528829, |
|
"learning_rate": 1.9452449567723343e-05, |
|
"loss": 0.4258, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.39238315060588574, |
|
"grad_norm": 0.354531651779117, |
|
"learning_rate": 1.9596541786743517e-05, |
|
"loss": 0.4504, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.395268320830929, |
|
"grad_norm": 0.3695419929782549, |
|
"learning_rate": 1.974063400576369e-05, |
|
"loss": 0.4663, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.3981534910559723, |
|
"grad_norm": 0.3752799655893121, |
|
"learning_rate": 1.9884726224783863e-05, |
|
"loss": 0.4519, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.40103866128101556, |
|
"grad_norm": 0.3743552771364968, |
|
"learning_rate": 1.9999998731825553e-05, |
|
"loss": 0.4466, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.4039238315060589, |
|
"grad_norm": 0.3690677017067688, |
|
"learning_rate": 1.9999954345753522e-05, |
|
"loss": 0.4672, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4068090017311021, |
|
"grad_norm": 0.3521337986777979, |
|
"learning_rate": 1.9999846551280566e-05, |
|
"loss": 0.4546, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.40969417195614544, |
|
"grad_norm": 0.371125205175818, |
|
"learning_rate": 1.99996753490902e-05, |
|
"loss": 0.4476, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.4125793421811887, |
|
"grad_norm": 0.3870027577034139, |
|
"learning_rate": 1.999944074026799e-05, |
|
"loss": 0.4528, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.41546451240623195, |
|
"grad_norm": 0.3694599366137213, |
|
"learning_rate": 1.999914272630156e-05, |
|
"loss": 0.4435, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.41834968263127525, |
|
"grad_norm": 0.39229103038261187, |
|
"learning_rate": 1.999878130908058e-05, |
|
"loss": 0.4624, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.4212348528563185, |
|
"grad_norm": 0.37197227629586244, |
|
"learning_rate": 1.999835649089675e-05, |
|
"loss": 0.4392, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.4241200230813618, |
|
"grad_norm": 0.36658551403514933, |
|
"learning_rate": 1.9997868274443787e-05, |
|
"loss": 0.4479, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.4270051933064051, |
|
"grad_norm": 0.38507395642784864, |
|
"learning_rate": 1.9997316662817403e-05, |
|
"loss": 0.4448, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4298903635314484, |
|
"grad_norm": 0.36930658832139956, |
|
"learning_rate": 1.9996701659515293e-05, |
|
"loss": 0.4338, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.43277553375649164, |
|
"grad_norm": 0.3415004408124957, |
|
"learning_rate": 1.999602326843712e-05, |
|
"loss": 0.4571, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4356607039815349, |
|
"grad_norm": 0.3616392339336591, |
|
"learning_rate": 1.999528149388447e-05, |
|
"loss": 0.4364, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.4385458742065782, |
|
"grad_norm": 0.3614440903798367, |
|
"learning_rate": 1.999447634056084e-05, |
|
"loss": 0.442, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.44143104443162146, |
|
"grad_norm": 0.3475594464185632, |
|
"learning_rate": 1.9993607813571595e-05, |
|
"loss": 0.4302, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.44431621465666477, |
|
"grad_norm": 0.38907218952119343, |
|
"learning_rate": 1.9992675918423958e-05, |
|
"loss": 0.4475, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.447201384881708, |
|
"grad_norm": 0.3532869868641272, |
|
"learning_rate": 1.999168066102695e-05, |
|
"loss": 0.4486, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.4500865551067513, |
|
"grad_norm": 0.37651731293442015, |
|
"learning_rate": 1.9990622047691372e-05, |
|
"loss": 0.4326, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4529717253317946, |
|
"grad_norm": 0.3748786150498863, |
|
"learning_rate": 1.998950008512975e-05, |
|
"loss": 0.4474, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.45585689555683784, |
|
"grad_norm": 0.39902509681503917, |
|
"learning_rate": 1.9988314780456305e-05, |
|
"loss": 0.4568, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.45874206578188115, |
|
"grad_norm": 0.3884381984087952, |
|
"learning_rate": 1.998706614118689e-05, |
|
"loss": 0.4568, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.4616272360069244, |
|
"grad_norm": 0.36054442749164206, |
|
"learning_rate": 1.9985754175238972e-05, |
|
"loss": 0.4429, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4645124062319677, |
|
"grad_norm": 0.3527339288196345, |
|
"learning_rate": 1.998437889093155e-05, |
|
"loss": 0.4437, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.46739757645701097, |
|
"grad_norm": 0.3737014283866458, |
|
"learning_rate": 1.9982940296985124e-05, |
|
"loss": 0.4469, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4702827466820542, |
|
"grad_norm": 0.3836320689851165, |
|
"learning_rate": 1.998143840252163e-05, |
|
"loss": 0.4391, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.47316791690709753, |
|
"grad_norm": 0.36567112690649783, |
|
"learning_rate": 1.997987321706438e-05, |
|
"loss": 0.4414, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4760530871321408, |
|
"grad_norm": 0.37388859283028064, |
|
"learning_rate": 1.9978244750538017e-05, |
|
"loss": 0.4408, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.4789382573571841, |
|
"grad_norm": 0.35288917666405584, |
|
"learning_rate": 1.9976553013268428e-05, |
|
"loss": 0.4432, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.48182342758222735, |
|
"grad_norm": 0.35467525608999173, |
|
"learning_rate": 1.9974798015982704e-05, |
|
"loss": 0.4319, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.4847085978072706, |
|
"grad_norm": 0.36246807797824815, |
|
"learning_rate": 1.9972979769809044e-05, |
|
"loss": 0.4421, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4875937680323139, |
|
"grad_norm": 0.38809255131157183, |
|
"learning_rate": 1.9971098286276716e-05, |
|
"loss": 0.438, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.49047893825735717, |
|
"grad_norm": 0.35620554170658003, |
|
"learning_rate": 1.996915357731596e-05, |
|
"loss": 0.4361, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4933641084824005, |
|
"grad_norm": 0.37319706651769974, |
|
"learning_rate": 1.996714565525792e-05, |
|
"loss": 0.4273, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.49624927870744373, |
|
"grad_norm": 0.3561234532855242, |
|
"learning_rate": 1.996507453283456e-05, |
|
"loss": 0.4405, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.49913444893248704, |
|
"grad_norm": 0.3599439467426677, |
|
"learning_rate": 1.9962940223178607e-05, |
|
"loss": 0.4209, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.5020196191575302, |
|
"grad_norm": 0.3708191889641566, |
|
"learning_rate": 1.9960742739823433e-05, |
|
"loss": 0.4375, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5049047893825735, |
|
"grad_norm": 0.371986303129069, |
|
"learning_rate": 1.9958482096702997e-05, |
|
"loss": 0.44, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5077899596076169, |
|
"grad_norm": 0.37613060788442565, |
|
"learning_rate": 1.995615830815173e-05, |
|
"loss": 0.444, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5106751298326602, |
|
"grad_norm": 0.35285659183130846, |
|
"learning_rate": 1.9953771388904484e-05, |
|
"loss": 0.4483, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.5135603000577034, |
|
"grad_norm": 0.3744385315940648, |
|
"learning_rate": 1.9951321354096395e-05, |
|
"loss": 0.4307, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5164454702827467, |
|
"grad_norm": 0.3527685970235997, |
|
"learning_rate": 1.9948808219262813e-05, |
|
"loss": 0.4352, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.51933064050779, |
|
"grad_norm": 0.3629898148956746, |
|
"learning_rate": 1.9946232000339192e-05, |
|
"loss": 0.4325, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5222158107328332, |
|
"grad_norm": 0.33230153297658693, |
|
"learning_rate": 1.994359271366101e-05, |
|
"loss": 0.4358, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.5251009809578765, |
|
"grad_norm": 0.36872688475457055, |
|
"learning_rate": 1.9940890375963634e-05, |
|
"loss": 0.4389, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.5279861511829198, |
|
"grad_norm": 0.3500504418913126, |
|
"learning_rate": 1.9938125004382226e-05, |
|
"loss": 0.4368, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.5308713214079631, |
|
"grad_norm": 0.38076969493807267, |
|
"learning_rate": 1.9935296616451654e-05, |
|
"loss": 0.4224, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5337564916330063, |
|
"grad_norm": 0.38809106802534876, |
|
"learning_rate": 1.9932405230106352e-05, |
|
"loss": 0.458, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.5366416618580496, |
|
"grad_norm": 0.34830821561529673, |
|
"learning_rate": 1.992945086368022e-05, |
|
"loss": 0.4427, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5395268320830929, |
|
"grad_norm": 0.36348521739778694, |
|
"learning_rate": 1.9926433535906514e-05, |
|
"loss": 0.4332, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.5424120023081361, |
|
"grad_norm": 0.3516690545778123, |
|
"learning_rate": 1.992335326591771e-05, |
|
"loss": 0.4493, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5452971725331794, |
|
"grad_norm": 0.3622130880303389, |
|
"learning_rate": 1.99202100732454e-05, |
|
"loss": 0.4399, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.5481823427582228, |
|
"grad_norm": 0.3340397775809456, |
|
"learning_rate": 1.9917003977820154e-05, |
|
"loss": 0.4155, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5510675129832661, |
|
"grad_norm": 0.3373549627806811, |
|
"learning_rate": 1.9913734999971402e-05, |
|
"loss": 0.4343, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.5539526832083093, |
|
"grad_norm": 0.34945896571648644, |
|
"learning_rate": 1.9910403160427308e-05, |
|
"loss": 0.4423, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5568378534333526, |
|
"grad_norm": 0.3477598210882681, |
|
"learning_rate": 1.990700848031463e-05, |
|
"loss": 0.4328, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.5597230236583959, |
|
"grad_norm": 0.3728272868907162, |
|
"learning_rate": 1.990355098115859e-05, |
|
"loss": 0.4407, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5626081938834391, |
|
"grad_norm": 0.36540010757747865, |
|
"learning_rate": 1.990003068488273e-05, |
|
"loss": 0.4254, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.5654933641084824, |
|
"grad_norm": 0.3491338839333341, |
|
"learning_rate": 1.9896447613808802e-05, |
|
"loss": 0.4333, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5683785343335257, |
|
"grad_norm": 0.3453766808693869, |
|
"learning_rate": 1.9892801790656575e-05, |
|
"loss": 0.4505, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.571263704558569, |
|
"grad_norm": 0.3663489965490133, |
|
"learning_rate": 1.9889093238543746e-05, |
|
"loss": 0.4308, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5741488747836122, |
|
"grad_norm": 0.3679457848990051, |
|
"learning_rate": 1.9885321980985758e-05, |
|
"loss": 0.4594, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.5770340450086555, |
|
"grad_norm": 0.3780068047990749, |
|
"learning_rate": 1.988148804189566e-05, |
|
"loss": 0.4255, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5799192152336988, |
|
"grad_norm": 0.36727910455394897, |
|
"learning_rate": 1.987759144558395e-05, |
|
"loss": 0.4378, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.582804385458742, |
|
"grad_norm": 0.36166322845028065, |
|
"learning_rate": 1.9873632216758448e-05, |
|
"loss": 0.4445, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5856895556837853, |
|
"grad_norm": 0.3412780788439251, |
|
"learning_rate": 1.9869610380524098e-05, |
|
"loss": 0.4285, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.5885747259088286, |
|
"grad_norm": 0.3642868390810403, |
|
"learning_rate": 1.986552596238284e-05, |
|
"loss": 0.426, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5914598961338718, |
|
"grad_norm": 0.3513636822125392, |
|
"learning_rate": 1.986137898823343e-05, |
|
"loss": 0.4689, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.5943450663589152, |
|
"grad_norm": 0.37061082820118274, |
|
"learning_rate": 1.985716948437129e-05, |
|
"loss": 0.4396, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5972302365839585, |
|
"grad_norm": 0.35578918227279527, |
|
"learning_rate": 1.9852897477488335e-05, |
|
"loss": 0.4347, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.6001154068090018, |
|
"grad_norm": 0.37785556076696936, |
|
"learning_rate": 1.9848562994672797e-05, |
|
"loss": 0.4272, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.603000577034045, |
|
"grad_norm": 0.36978022207552363, |
|
"learning_rate": 1.984416606340907e-05, |
|
"loss": 0.4326, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.6058857472590883, |
|
"grad_norm": 0.3791504839723979, |
|
"learning_rate": 1.9839706711577514e-05, |
|
"loss": 0.4587, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6087709174841316, |
|
"grad_norm": 0.3710313865819546, |
|
"learning_rate": 1.98351849674543e-05, |
|
"loss": 0.457, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.6116560877091748, |
|
"grad_norm": 0.35231701107663654, |
|
"learning_rate": 1.9830600859711207e-05, |
|
"loss": 0.4144, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6145412579342181, |
|
"grad_norm": 0.3615898765366417, |
|
"learning_rate": 1.982595441741547e-05, |
|
"loss": 0.4401, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.6174264281592614, |
|
"grad_norm": 0.35977723996205, |
|
"learning_rate": 1.9821245670029566e-05, |
|
"loss": 0.4309, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.6203115983843047, |
|
"grad_norm": 0.36669507423796843, |
|
"learning_rate": 1.9816474647411047e-05, |
|
"loss": 0.4383, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.6231967686093479, |
|
"grad_norm": 0.35392661751275434, |
|
"learning_rate": 1.9811641379812342e-05, |
|
"loss": 0.446, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6260819388343912, |
|
"grad_norm": 0.3409833418281117, |
|
"learning_rate": 1.9806745897880573e-05, |
|
"loss": 0.4367, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.6289671090594345, |
|
"grad_norm": 0.37150843192119876, |
|
"learning_rate": 1.9801788232657343e-05, |
|
"loss": 0.4527, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.6318522792844777, |
|
"grad_norm": 0.36569227925348624, |
|
"learning_rate": 1.9796768415578564e-05, |
|
"loss": 0.4436, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.634737449509521, |
|
"grad_norm": 0.35016739834472266, |
|
"learning_rate": 1.9791686478474235e-05, |
|
"loss": 0.4368, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6376226197345644, |
|
"grad_norm": 0.36715520059954826, |
|
"learning_rate": 1.978654245356825e-05, |
|
"loss": 0.4354, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.6405077899596077, |
|
"grad_norm": 0.3840761403084782, |
|
"learning_rate": 1.9781336373478207e-05, |
|
"loss": 0.4523, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.6433929601846509, |
|
"grad_norm": 0.3154658430275838, |
|
"learning_rate": 1.9776068271215167e-05, |
|
"loss": 0.4406, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.6462781304096942, |
|
"grad_norm": 0.3772719849710032, |
|
"learning_rate": 1.9770738180183485e-05, |
|
"loss": 0.4344, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6491633006347375, |
|
"grad_norm": 0.3381456770156173, |
|
"learning_rate": 1.9765346134180567e-05, |
|
"loss": 0.4324, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.6520484708597807, |
|
"grad_norm": 0.3498102184043816, |
|
"learning_rate": 1.975989216739667e-05, |
|
"loss": 0.4294, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.654933641084824, |
|
"grad_norm": 0.3449183171225279, |
|
"learning_rate": 1.9754376314414686e-05, |
|
"loss": 0.4139, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.6578188113098673, |
|
"grad_norm": 0.3541093995239644, |
|
"learning_rate": 1.9748798610209912e-05, |
|
"loss": 0.4295, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6607039815349106, |
|
"grad_norm": 0.36096527367909803, |
|
"learning_rate": 1.9743159090149844e-05, |
|
"loss": 0.4605, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.6635891517599538, |
|
"grad_norm": 0.334233191996567, |
|
"learning_rate": 1.9737457789993936e-05, |
|
"loss": 0.4368, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6664743219849971, |
|
"grad_norm": 0.35974161365839685, |
|
"learning_rate": 1.973169474589339e-05, |
|
"loss": 0.428, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.6693594922100404, |
|
"grad_norm": 0.3568705187506466, |
|
"learning_rate": 1.9725869994390912e-05, |
|
"loss": 0.4417, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6722446624350836, |
|
"grad_norm": 0.33809479002108944, |
|
"learning_rate": 1.971998357242049e-05, |
|
"loss": 0.4413, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.6751298326601269, |
|
"grad_norm": 0.3502193821967476, |
|
"learning_rate": 1.971403551730715e-05, |
|
"loss": 0.4564, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6780150028851702, |
|
"grad_norm": 0.35976087067679435, |
|
"learning_rate": 1.9708025866766735e-05, |
|
"loss": 0.4263, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.6809001731102136, |
|
"grad_norm": 0.34825014902921947, |
|
"learning_rate": 1.9701954658905652e-05, |
|
"loss": 0.4324, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6837853433352568, |
|
"grad_norm": 0.3515962597578983, |
|
"learning_rate": 1.9695821932220632e-05, |
|
"loss": 0.4347, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.6866705135603001, |
|
"grad_norm": 0.3518698997384743, |
|
"learning_rate": 1.9689627725598495e-05, |
|
"loss": 0.4141, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6895556837853434, |
|
"grad_norm": 0.3406825005378319, |
|
"learning_rate": 1.9683372078315892e-05, |
|
"loss": 0.4294, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.6924408540103866, |
|
"grad_norm": 0.3555369233516162, |
|
"learning_rate": 1.9677055030039067e-05, |
|
"loss": 0.4389, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6953260242354299, |
|
"grad_norm": 0.4609971974027847, |
|
"learning_rate": 1.967067662082359e-05, |
|
"loss": 0.4479, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.6982111944604732, |
|
"grad_norm": 0.3533102697284043, |
|
"learning_rate": 1.966423689111412e-05, |
|
"loss": 0.4594, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.7010963646855164, |
|
"grad_norm": 0.41163003146138266, |
|
"learning_rate": 1.965773588174414e-05, |
|
"loss": 0.4211, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.7039815349105597, |
|
"grad_norm": 0.36355249316504007, |
|
"learning_rate": 1.9651173633935702e-05, |
|
"loss": 0.4326, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.706866705135603, |
|
"grad_norm": 0.361512663082872, |
|
"learning_rate": 1.964455018929916e-05, |
|
"loss": 0.4605, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7097518753606463, |
|
"grad_norm": 0.3395334945396585, |
|
"learning_rate": 1.9637865589832902e-05, |
|
"loss": 0.4552, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.7126370455856895, |
|
"grad_norm": 0.3358166664046763, |
|
"learning_rate": 1.963111987792311e-05, |
|
"loss": 0.4418, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.7155222158107328, |
|
"grad_norm": 0.35952927832201437, |
|
"learning_rate": 1.9624313096343445e-05, |
|
"loss": 0.4528, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.7184073860357761, |
|
"grad_norm": 0.3516252150445922, |
|
"learning_rate": 1.961744528825483e-05, |
|
"loss": 0.4444, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.7212925562608193, |
|
"grad_norm": 0.37486390370155487, |
|
"learning_rate": 1.961051649720513e-05, |
|
"loss": 0.4405, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7241777264858626, |
|
"grad_norm": 0.347168899689223, |
|
"learning_rate": 1.96035267671289e-05, |
|
"loss": 0.4197, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.727062896710906, |
|
"grad_norm": 0.3553448619523881, |
|
"learning_rate": 1.9596476142347114e-05, |
|
"loss": 0.4273, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.7299480669359493, |
|
"grad_norm": 0.3747743065569822, |
|
"learning_rate": 1.958936466756685e-05, |
|
"loss": 0.4318, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.7328332371609925, |
|
"grad_norm": 0.36946705338179187, |
|
"learning_rate": 1.9582192387881037e-05, |
|
"loss": 0.439, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.7357184073860358, |
|
"grad_norm": 0.3535008834153477, |
|
"learning_rate": 1.9574959348768173e-05, |
|
"loss": 0.4215, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.7386035776110791, |
|
"grad_norm": 0.3528743884422751, |
|
"learning_rate": 1.9567665596092e-05, |
|
"loss": 0.4451, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.7414887478361223, |
|
"grad_norm": 0.36361502852826977, |
|
"learning_rate": 1.9560311176101253e-05, |
|
"loss": 0.4488, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.7443739180611656, |
|
"grad_norm": 0.3393383444681419, |
|
"learning_rate": 1.9552896135429346e-05, |
|
"loss": 0.4602, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.7472590882862089, |
|
"grad_norm": 0.3504869096195613, |
|
"learning_rate": 1.9545420521094082e-05, |
|
"loss": 0.4213, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.7501442585112522, |
|
"grad_norm": 0.33568870619524627, |
|
"learning_rate": 1.953788438049735e-05, |
|
"loss": 0.4159, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7530294287362954, |
|
"grad_norm": 0.349599888573523, |
|
"learning_rate": 1.953028776142483e-05, |
|
"loss": 0.4245, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.7559145989613387, |
|
"grad_norm": 0.36044335412116235, |
|
"learning_rate": 1.9522630712045695e-05, |
|
"loss": 0.4253, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.758799769186382, |
|
"grad_norm": 0.33120277929093034, |
|
"learning_rate": 1.9514913280912284e-05, |
|
"loss": 0.4196, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.7616849394114252, |
|
"grad_norm": 0.3556004564806594, |
|
"learning_rate": 1.9507135516959824e-05, |
|
"loss": 0.4319, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7645701096364685, |
|
"grad_norm": 0.357624065192376, |
|
"learning_rate": 1.949929746950609e-05, |
|
"loss": 0.4464, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.7674552798615119, |
|
"grad_norm": 0.3429194439395415, |
|
"learning_rate": 1.9491399188251123e-05, |
|
"loss": 0.4384, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7703404500865552, |
|
"grad_norm": 0.35807803811180317, |
|
"learning_rate": 1.9483440723276887e-05, |
|
"loss": 0.4274, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.7732256203115984, |
|
"grad_norm": 0.3516485111182853, |
|
"learning_rate": 1.9475422125046962e-05, |
|
"loss": 0.4314, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7761107905366417, |
|
"grad_norm": 0.3448017580215496, |
|
"learning_rate": 1.9467343444406234e-05, |
|
"loss": 0.4305, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.778995960761685, |
|
"grad_norm": 0.33719191373319685, |
|
"learning_rate": 1.9459204732580557e-05, |
|
"loss": 0.4209, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7818811309867282, |
|
"grad_norm": 0.357112111873714, |
|
"learning_rate": 1.9451006041176437e-05, |
|
"loss": 0.4614, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.7847663012117715, |
|
"grad_norm": 0.3638827997486545, |
|
"learning_rate": 1.9442747422180704e-05, |
|
"loss": 0.4392, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7876514714368148, |
|
"grad_norm": 0.3779229355078506, |
|
"learning_rate": 1.9434428927960177e-05, |
|
"loss": 0.4175, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.790536641661858, |
|
"grad_norm": 0.33870833738392225, |
|
"learning_rate": 1.9426050611261343e-05, |
|
"loss": 0.4378, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7934218118869013, |
|
"grad_norm": 0.3407658218095889, |
|
"learning_rate": 1.9417612525210007e-05, |
|
"loss": 0.4244, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.7963069821119446, |
|
"grad_norm": 0.3587456861468758, |
|
"learning_rate": 1.9409114723310967e-05, |
|
"loss": 0.4296, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7991921523369879, |
|
"grad_norm": 0.3526548403605731, |
|
"learning_rate": 1.9400557259447673e-05, |
|
"loss": 0.4334, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.8020773225620311, |
|
"grad_norm": 0.35126996708797126, |
|
"learning_rate": 1.9391940187881885e-05, |
|
"loss": 0.4455, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.8049624927870744, |
|
"grad_norm": 0.35018120101424355, |
|
"learning_rate": 1.9383263563253322e-05, |
|
"loss": 0.4432, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.8078476630121177, |
|
"grad_norm": 0.3375686338902311, |
|
"learning_rate": 1.937452744057933e-05, |
|
"loss": 0.4453, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8107328332371609, |
|
"grad_norm": 0.34947963167148055, |
|
"learning_rate": 1.936573187525451e-05, |
|
"loss": 0.4392, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.8136180034622043, |
|
"grad_norm": 0.329777575614968, |
|
"learning_rate": 1.9356876923050395e-05, |
|
"loss": 0.4554, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.8165031736872476, |
|
"grad_norm": 0.3481917738275054, |
|
"learning_rate": 1.9347962640115078e-05, |
|
"loss": 0.4172, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.8193883439122909, |
|
"grad_norm": 0.3488032875220557, |
|
"learning_rate": 1.933898908297286e-05, |
|
"loss": 0.4471, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.8222735141373341, |
|
"grad_norm": 0.35259907680989855, |
|
"learning_rate": 1.9329956308523886e-05, |
|
"loss": 0.4265, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.8251586843623774, |
|
"grad_norm": 0.36772785974509603, |
|
"learning_rate": 1.9320864374043805e-05, |
|
"loss": 0.4346, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.8280438545874207, |
|
"grad_norm": 0.33906704838185237, |
|
"learning_rate": 1.9311713337183375e-05, |
|
"loss": 0.4222, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.8309290248124639, |
|
"grad_norm": 0.3197548387537902, |
|
"learning_rate": 1.9302503255968127e-05, |
|
"loss": 0.4242, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.8338141950375072, |
|
"grad_norm": 0.3790505826213643, |
|
"learning_rate": 1.929323418879798e-05, |
|
"loss": 0.444, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.8366993652625505, |
|
"grad_norm": 0.356548976452809, |
|
"learning_rate": 1.928390619444687e-05, |
|
"loss": 0.4389, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8395845354875938, |
|
"grad_norm": 0.3238309837351344, |
|
"learning_rate": 1.9274519332062393e-05, |
|
"loss": 0.4428, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.842469705712637, |
|
"grad_norm": 0.3328195979317483, |
|
"learning_rate": 1.9265073661165407e-05, |
|
"loss": 0.4418, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.8453548759376803, |
|
"grad_norm": 0.34611626618940344, |
|
"learning_rate": 1.9255569241649685e-05, |
|
"loss": 0.4311, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.8482400461627236, |
|
"grad_norm": 0.34500455296149757, |
|
"learning_rate": 1.9246006133781496e-05, |
|
"loss": 0.4307, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.8511252163877668, |
|
"grad_norm": 0.3473701533278901, |
|
"learning_rate": 1.9236384398199262e-05, |
|
"loss": 0.4484, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.8540103866128101, |
|
"grad_norm": 0.3409397737914092, |
|
"learning_rate": 1.9226704095913143e-05, |
|
"loss": 0.4247, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.8568955568378535, |
|
"grad_norm": 0.36709716826008937, |
|
"learning_rate": 1.9216965288304675e-05, |
|
"loss": 0.4454, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.8597807270628968, |
|
"grad_norm": 0.32825230009567846, |
|
"learning_rate": 1.9207168037126352e-05, |
|
"loss": 0.4246, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.86266589728794, |
|
"grad_norm": 0.33313005166969667, |
|
"learning_rate": 1.919731240450127e-05, |
|
"loss": 0.4204, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.8655510675129833, |
|
"grad_norm": 0.34672496584019735, |
|
"learning_rate": 1.91873984529227e-05, |
|
"loss": 0.4469, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8684362377380266, |
|
"grad_norm": 0.3342264609614132, |
|
"learning_rate": 1.917742624525371e-05, |
|
"loss": 0.4231, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.8713214079630698, |
|
"grad_norm": 0.3612492143077761, |
|
"learning_rate": 1.9167395844726763e-05, |
|
"loss": 0.4454, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8742065781881131, |
|
"grad_norm": 0.3407789646703214, |
|
"learning_rate": 1.915730731494332e-05, |
|
"loss": 0.4293, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.8770917484131564, |
|
"grad_norm": 0.33592086055664455, |
|
"learning_rate": 1.914716071987342e-05, |
|
"loss": 0.4358, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8799769186381996, |
|
"grad_norm": 0.36425558440653516, |
|
"learning_rate": 1.91369561238553e-05, |
|
"loss": 0.4378, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.8828620888632429, |
|
"grad_norm": 0.3522122082887795, |
|
"learning_rate": 1.912669359159496e-05, |
|
"loss": 0.4295, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8857472590882862, |
|
"grad_norm": 0.324930415843359, |
|
"learning_rate": 1.911637318816578e-05, |
|
"loss": 0.4251, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.8886324293133295, |
|
"grad_norm": 0.3466989166716833, |
|
"learning_rate": 1.9105994979008083e-05, |
|
"loss": 0.4328, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8915175995383727, |
|
"grad_norm": 0.3603001072615171, |
|
"learning_rate": 1.9095559029928735e-05, |
|
"loss": 0.4331, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.894402769763416, |
|
"grad_norm": 0.35998678640671994, |
|
"learning_rate": 1.9085065407100723e-05, |
|
"loss": 0.4247, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8972879399884593, |
|
"grad_norm": 0.3353610544242477, |
|
"learning_rate": 1.9074514177062734e-05, |
|
"loss": 0.4365, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.9001731102135025, |
|
"grad_norm": 0.3362819633369662, |
|
"learning_rate": 1.9063905406718735e-05, |
|
"loss": 0.4387, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.9030582804385459, |
|
"grad_norm": 0.35146022780606057, |
|
"learning_rate": 1.9053239163337553e-05, |
|
"loss": 0.4294, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.9059434506635892, |
|
"grad_norm": 0.3334044241812438, |
|
"learning_rate": 1.9042515514552438e-05, |
|
"loss": 0.4297, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.9088286208886325, |
|
"grad_norm": 0.33641137519229225, |
|
"learning_rate": 1.9031734528360642e-05, |
|
"loss": 0.42, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.9117137911136757, |
|
"grad_norm": 0.37493485136716287, |
|
"learning_rate": 1.9020896273122987e-05, |
|
"loss": 0.4254, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.914598961338719, |
|
"grad_norm": 0.3466700992777568, |
|
"learning_rate": 1.901000081756343e-05, |
|
"loss": 0.4203, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.9174841315637623, |
|
"grad_norm": 0.3601041654222663, |
|
"learning_rate": 1.899904823076863e-05, |
|
"loss": 0.4293, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.9203693017888055, |
|
"grad_norm": 0.5717625955327809, |
|
"learning_rate": 1.8988038582187506e-05, |
|
"loss": 0.4331, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.9232544720138488, |
|
"grad_norm": 0.3563731456994365, |
|
"learning_rate": 1.8976971941630792e-05, |
|
"loss": 0.4434, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9261396422388921, |
|
"grad_norm": 0.3133483522354351, |
|
"learning_rate": 1.896584837927061e-05, |
|
"loss": 0.4381, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.9290248124639354, |
|
"grad_norm": 0.3602400529793251, |
|
"learning_rate": 1.8954667965640013e-05, |
|
"loss": 0.4295, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.9319099826889786, |
|
"grad_norm": 0.3660387727678179, |
|
"learning_rate": 1.894343077163254e-05, |
|
"loss": 0.4137, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.9347951529140219, |
|
"grad_norm": 0.33502056662496815, |
|
"learning_rate": 1.8932136868501763e-05, |
|
"loss": 0.431, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.9376803231390652, |
|
"grad_norm": 0.3519580845164796, |
|
"learning_rate": 1.8920786327860848e-05, |
|
"loss": 0.4382, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.9405654933641084, |
|
"grad_norm": 0.3609318959364458, |
|
"learning_rate": 1.8909379221682083e-05, |
|
"loss": 0.4263, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.9434506635891518, |
|
"grad_norm": 0.3470987740772583, |
|
"learning_rate": 1.8897915622296436e-05, |
|
"loss": 0.436, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.9463358338141951, |
|
"grad_norm": 0.3524435689745886, |
|
"learning_rate": 1.8886395602393087e-05, |
|
"loss": 0.4364, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.9492210040392384, |
|
"grad_norm": 0.3438549232804042, |
|
"learning_rate": 1.887481923501898e-05, |
|
"loss": 0.4457, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.9521061742642816, |
|
"grad_norm": 0.3359346965205838, |
|
"learning_rate": 1.8863186593578336e-05, |
|
"loss": 0.4489, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.9549913444893249, |
|
"grad_norm": 0.33509833712084897, |
|
"learning_rate": 1.8851497751832216e-05, |
|
"loss": 0.4291, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.9578765147143682, |
|
"grad_norm": 0.34585796301858324, |
|
"learning_rate": 1.8839752783898033e-05, |
|
"loss": 0.4449, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.9607616849394114, |
|
"grad_norm": 0.38171505764397407, |
|
"learning_rate": 1.8827951764249093e-05, |
|
"loss": 0.4482, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.9636468551644547, |
|
"grad_norm": 0.37008446077548557, |
|
"learning_rate": 1.8816094767714113e-05, |
|
"loss": 0.441, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.966532025389498, |
|
"grad_norm": 0.36975245636292803, |
|
"learning_rate": 1.880418186947676e-05, |
|
"loss": 0.4411, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.9694171956145412, |
|
"grad_norm": 0.36040770935696576, |
|
"learning_rate": 1.8792213145075156e-05, |
|
"loss": 0.4279, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.9723023658395845, |
|
"grad_norm": 0.3555551748715796, |
|
"learning_rate": 1.8780188670401415e-05, |
|
"loss": 0.4201, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.9751875360646278, |
|
"grad_norm": 0.34389008179144037, |
|
"learning_rate": 1.876810852170116e-05, |
|
"loss": 0.4659, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.9780727062896711, |
|
"grad_norm": 0.34681456919275633, |
|
"learning_rate": 1.875597277557303e-05, |
|
"loss": 0.4214, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.9809578765147143, |
|
"grad_norm": 0.33612247065522033, |
|
"learning_rate": 1.8743781508968206e-05, |
|
"loss": 0.4322, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9838430467397576, |
|
"grad_norm": 0.35053266185577925, |
|
"learning_rate": 1.8731534799189905e-05, |
|
"loss": 0.4338, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.986728216964801, |
|
"grad_norm": 0.3554276718676979, |
|
"learning_rate": 1.8719232723892916e-05, |
|
"loss": 0.4387, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9896133871898442, |
|
"grad_norm": 0.3439024647253949, |
|
"learning_rate": 1.8706875361083088e-05, |
|
"loss": 0.4161, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.9924985574148875, |
|
"grad_norm": 0.32640748056119073, |
|
"learning_rate": 1.869446278911683e-05, |
|
"loss": 0.4205, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9953837276399308, |
|
"grad_norm": 0.3673852819098302, |
|
"learning_rate": 1.8681995086700654e-05, |
|
"loss": 0.4264, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.9982688978649741, |
|
"grad_norm": 0.3414077938012464, |
|
"learning_rate": 1.866947233289061e-05, |
|
"loss": 0.4295, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.4312867522239685, |
|
"eval_runtime": 0.6755, |
|
"eval_samples_per_second": 113.987, |
|
"eval_steps_per_second": 2.961, |
|
"step": 1733 |
|
}, |
|
{ |
|
"epoch": 1.0011540680900173, |
|
"grad_norm": 0.3719885030349497, |
|
"learning_rate": 1.865689460709185e-05, |
|
"loss": 0.4085, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 1.0040392383150605, |
|
"grad_norm": 0.3653013563481502, |
|
"learning_rate": 1.8644261989058082e-05, |
|
"loss": 0.3663, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.006924408540104, |
|
"grad_norm": 0.4249244844285388, |
|
"learning_rate": 1.8631574558891087e-05, |
|
"loss": 0.3636, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 1.009809578765147, |
|
"grad_norm": 0.4065943346576567, |
|
"learning_rate": 1.8618832397040192e-05, |
|
"loss": 0.3602, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.0126947489901905, |
|
"grad_norm": 0.3839053642256101, |
|
"learning_rate": 1.860603558430178e-05, |
|
"loss": 0.3793, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 1.0155799192152337, |
|
"grad_norm": 0.36995484185295535, |
|
"learning_rate": 1.859318420181877e-05, |
|
"loss": 0.3438, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.018465089440277, |
|
"grad_norm": 0.3848314010331738, |
|
"learning_rate": 1.8580278331080085e-05, |
|
"loss": 0.3859, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 1.0213502596653203, |
|
"grad_norm": 0.3963349177249421, |
|
"learning_rate": 1.8567318053920166e-05, |
|
"loss": 0.3769, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.0242354298903635, |
|
"grad_norm": 0.37297944843138187, |
|
"learning_rate": 1.8554303452518436e-05, |
|
"loss": 0.3579, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.0271206001154067, |
|
"grad_norm": 0.3904119687309658, |
|
"learning_rate": 1.854123460939877e-05, |
|
"loss": 0.3715, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.0300057703404502, |
|
"grad_norm": 0.3788697016458385, |
|
"learning_rate": 1.8528111607429e-05, |
|
"loss": 0.3728, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 1.0328909405654934, |
|
"grad_norm": 0.37738365383900435, |
|
"learning_rate": 1.851493452982036e-05, |
|
"loss": 0.3611, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.0357761107905366, |
|
"grad_norm": 0.39677380812493784, |
|
"learning_rate": 1.8501703460126973e-05, |
|
"loss": 0.3661, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 1.03866128101558, |
|
"grad_norm": 0.39895561200913643, |
|
"learning_rate": 1.8488418482245317e-05, |
|
"loss": 0.3699, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0415464512406232, |
|
"grad_norm": 0.3962653634072917, |
|
"learning_rate": 1.84750796804137e-05, |
|
"loss": 0.3513, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 1.0444316214656664, |
|
"grad_norm": 0.4045522991037393, |
|
"learning_rate": 1.8461687139211712e-05, |
|
"loss": 0.3621, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.0473167916907098, |
|
"grad_norm": 0.37889291444959144, |
|
"learning_rate": 1.844824094355971e-05, |
|
"loss": 0.3629, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 1.050201961915753, |
|
"grad_norm": 0.36255794337001546, |
|
"learning_rate": 1.843474117871826e-05, |
|
"loss": 0.356, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.0530871321407964, |
|
"grad_norm": 0.36607303065989255, |
|
"learning_rate": 1.84211879302876e-05, |
|
"loss": 0.3481, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.0559723023658396, |
|
"grad_norm": 0.38272410808497676, |
|
"learning_rate": 1.84075812842071e-05, |
|
"loss": 0.3509, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.0588574725908828, |
|
"grad_norm": 0.359466522831376, |
|
"learning_rate": 1.8393921326754724e-05, |
|
"loss": 0.3691, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 1.0617426428159262, |
|
"grad_norm": 0.3758374522906589, |
|
"learning_rate": 1.8380208144546473e-05, |
|
"loss": 0.3718, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.0646278130409694, |
|
"grad_norm": 0.3762140791163445, |
|
"learning_rate": 1.836644182453584e-05, |
|
"loss": 0.3777, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 1.0675129832660126, |
|
"grad_norm": 0.4061862324473009, |
|
"learning_rate": 1.835262245401326e-05, |
|
"loss": 0.3618, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.070398153491056, |
|
"grad_norm": 0.361301220342186, |
|
"learning_rate": 1.833875012060555e-05, |
|
"loss": 0.3974, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 1.0732833237160992, |
|
"grad_norm": 0.3769202277748975, |
|
"learning_rate": 1.8324824912275355e-05, |
|
"loss": 0.3495, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.0761684939411424, |
|
"grad_norm": 0.3605517647936932, |
|
"learning_rate": 1.8310846917320602e-05, |
|
"loss": 0.3481, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 1.0790536641661859, |
|
"grad_norm": 0.384557592260759, |
|
"learning_rate": 1.8296816224373926e-05, |
|
"loss": 0.3431, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.081938834391229, |
|
"grad_norm": 0.39969729027596174, |
|
"learning_rate": 1.828273292240211e-05, |
|
"loss": 0.361, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.0848240046162723, |
|
"grad_norm": 0.3893084314526289, |
|
"learning_rate": 1.8268597100705534e-05, |
|
"loss": 0.3533, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.0877091748413157, |
|
"grad_norm": 0.39190402847282024, |
|
"learning_rate": 1.8254408848917587e-05, |
|
"loss": 0.3528, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 1.0905943450663589, |
|
"grad_norm": 0.3991376675237472, |
|
"learning_rate": 1.824016825700412e-05, |
|
"loss": 0.3547, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.0934795152914023, |
|
"grad_norm": 0.3647535186718755, |
|
"learning_rate": 1.8225875415262852e-05, |
|
"loss": 0.3752, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 1.0963646855164455, |
|
"grad_norm": 0.36492225390564653, |
|
"learning_rate": 1.8211530414322834e-05, |
|
"loss": 0.3705, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0992498557414887, |
|
"grad_norm": 0.38796798500765634, |
|
"learning_rate": 1.8197133345143828e-05, |
|
"loss": 0.3691, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 1.1021350259665321, |
|
"grad_norm": 0.3827359128376877, |
|
"learning_rate": 1.8182684299015764e-05, |
|
"loss": 0.3689, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.1050201961915753, |
|
"grad_norm": 0.38752794638536536, |
|
"learning_rate": 1.816818336755816e-05, |
|
"loss": 0.3567, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 1.1079053664166185, |
|
"grad_norm": 0.38630104069610516, |
|
"learning_rate": 1.815363064271952e-05, |
|
"loss": 0.3483, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.110790536641662, |
|
"grad_norm": 0.37210418058912814, |
|
"learning_rate": 1.813902621677676e-05, |
|
"loss": 0.3543, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.1136757068667051, |
|
"grad_norm": 0.37713275848642197, |
|
"learning_rate": 1.812437018233464e-05, |
|
"loss": 0.3531, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.1165608770917483, |
|
"grad_norm": 0.36232403860634815, |
|
"learning_rate": 1.8109662632325152e-05, |
|
"loss": 0.3658, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 1.1194460473167918, |
|
"grad_norm": 0.3466007749649004, |
|
"learning_rate": 1.809490366000694e-05, |
|
"loss": 0.3717, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.122331217541835, |
|
"grad_norm": 0.37512115226860376, |
|
"learning_rate": 1.8080093358964727e-05, |
|
"loss": 0.3684, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 1.1252163877668782, |
|
"grad_norm": 0.4257633969210461, |
|
"learning_rate": 1.806523182310868e-05, |
|
"loss": 0.365, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.1281015579919216, |
|
"grad_norm": 0.3907894746639846, |
|
"learning_rate": 1.8050319146673856e-05, |
|
"loss": 0.3539, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 1.1309867282169648, |
|
"grad_norm": 0.3868414285848145, |
|
"learning_rate": 1.8035355424219586e-05, |
|
"loss": 0.3619, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.133871898442008, |
|
"grad_norm": 0.391737905658893, |
|
"learning_rate": 1.8020340750628872e-05, |
|
"loss": 0.3556, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 1.1367570686670514, |
|
"grad_norm": 0.39546663065808646, |
|
"learning_rate": 1.800527522110779e-05, |
|
"loss": 0.3608, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.1396422388920946, |
|
"grad_norm": 0.3982732465681356, |
|
"learning_rate": 1.7990158931184892e-05, |
|
"loss": 0.3553, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.1425274091171378, |
|
"grad_norm": 0.3738040441095041, |
|
"learning_rate": 1.79749919767106e-05, |
|
"loss": 0.3599, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.1454125793421812, |
|
"grad_norm": 0.36487443451311957, |
|
"learning_rate": 1.7959774453856576e-05, |
|
"loss": 0.3521, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 1.1482977495672244, |
|
"grad_norm": 0.40333453889614557, |
|
"learning_rate": 1.794450645911514e-05, |
|
"loss": 0.3651, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.1511829197922678, |
|
"grad_norm": 0.5753350465511803, |
|
"learning_rate": 1.7929188089298653e-05, |
|
"loss": 0.369, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 1.154068090017311, |
|
"grad_norm": 0.3727451234003791, |
|
"learning_rate": 1.791381944153889e-05, |
|
"loss": 0.3536, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1569532602423542, |
|
"grad_norm": 0.3787668346391338, |
|
"learning_rate": 1.789840061328643e-05, |
|
"loss": 0.3765, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 1.1598384304673977, |
|
"grad_norm": 0.36284152770529254, |
|
"learning_rate": 1.788293170231005e-05, |
|
"loss": 0.3455, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.1627236006924409, |
|
"grad_norm": 0.38508608974804587, |
|
"learning_rate": 1.7867412806696087e-05, |
|
"loss": 0.3716, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 1.165608770917484, |
|
"grad_norm": 0.3959297634423943, |
|
"learning_rate": 1.785184402484782e-05, |
|
"loss": 0.3715, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.1684939411425275, |
|
"grad_norm": 0.4224675659582224, |
|
"learning_rate": 1.7836225455484865e-05, |
|
"loss": 0.3844, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.1713791113675707, |
|
"grad_norm": 0.36797685694286536, |
|
"learning_rate": 1.7820557197642513e-05, |
|
"loss": 0.3691, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.1742642815926139, |
|
"grad_norm": 0.3831267176121955, |
|
"learning_rate": 1.7804839350671145e-05, |
|
"loss": 0.3571, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 1.1771494518176573, |
|
"grad_norm": 0.3905138892178264, |
|
"learning_rate": 1.778907201423557e-05, |
|
"loss": 0.3636, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.1800346220427005, |
|
"grad_norm": 0.3912936170187795, |
|
"learning_rate": 1.7773255288314398e-05, |
|
"loss": 0.3574, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 1.182919792267744, |
|
"grad_norm": 0.3754522878062524, |
|
"learning_rate": 1.775738927319942e-05, |
|
"loss": 0.3874, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.185804962492787, |
|
"grad_norm": 0.36965375437322023, |
|
"learning_rate": 1.774147406949496e-05, |
|
"loss": 0.3717, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 1.1886901327178303, |
|
"grad_norm": 0.4220256804649789, |
|
"learning_rate": 1.7725509778117242e-05, |
|
"loss": 0.3541, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.1915753029428737, |
|
"grad_norm": 0.38295286541947393, |
|
"learning_rate": 1.770949650029374e-05, |
|
"loss": 0.3574, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 1.194460473167917, |
|
"grad_norm": 0.37402840183729646, |
|
"learning_rate": 1.7693434337562557e-05, |
|
"loss": 0.3738, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.1973456433929601, |
|
"grad_norm": 0.385808424420751, |
|
"learning_rate": 1.7677323391771766e-05, |
|
"loss": 0.3646, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.2002308136180035, |
|
"grad_norm": 0.37207777332282577, |
|
"learning_rate": 1.766116376507876e-05, |
|
"loss": 0.3699, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.2031159838430467, |
|
"grad_norm": 0.39725920392186914, |
|
"learning_rate": 1.764495555994962e-05, |
|
"loss": 0.3736, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 1.20600115406809, |
|
"grad_norm": 0.3885483852856526, |
|
"learning_rate": 1.7628698879158444e-05, |
|
"loss": 0.3493, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.2088863242931334, |
|
"grad_norm": 0.3779946551242946, |
|
"learning_rate": 1.7612393825786723e-05, |
|
"loss": 0.3758, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 1.2117714945181766, |
|
"grad_norm": 0.3625482575463983, |
|
"learning_rate": 1.7596040503222665e-05, |
|
"loss": 0.3745, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.2146566647432198, |
|
"grad_norm": 0.38330347138439946, |
|
"learning_rate": 1.7579639015160545e-05, |
|
"loss": 0.362, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 1.2175418349682632, |
|
"grad_norm": 0.4177686333809965, |
|
"learning_rate": 1.7563189465600047e-05, |
|
"loss": 0.3699, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.2204270051933064, |
|
"grad_norm": 0.3707075705845332, |
|
"learning_rate": 1.7546691958845617e-05, |
|
"loss": 0.3485, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 1.2233121754183496, |
|
"grad_norm": 0.37675133161938845, |
|
"learning_rate": 1.7530146599505782e-05, |
|
"loss": 0.3738, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.226197345643393, |
|
"grad_norm": 0.4073716130331381, |
|
"learning_rate": 1.75135534924925e-05, |
|
"loss": 0.3726, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.2290825158684362, |
|
"grad_norm": 0.377632182741206, |
|
"learning_rate": 1.7496912743020487e-05, |
|
"loss": 0.3606, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.2319676860934794, |
|
"grad_norm": 0.41027591746654574, |
|
"learning_rate": 1.748022445660656e-05, |
|
"loss": 0.3677, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.2348528563185228, |
|
"grad_norm": 0.454127691618852, |
|
"learning_rate": 1.7463488739068952e-05, |
|
"loss": 0.356, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.237738026543566, |
|
"grad_norm": 0.38902494056071213, |
|
"learning_rate": 1.744670569652666e-05, |
|
"loss": 0.356, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 1.2406231967686094, |
|
"grad_norm": 0.3547365448603461, |
|
"learning_rate": 1.742987543539876e-05, |
|
"loss": 0.3717, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.2435083669936526, |
|
"grad_norm": 0.37046329915555815, |
|
"learning_rate": 1.741299806240373e-05, |
|
"loss": 0.3675, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 1.2463935372186958, |
|
"grad_norm": 0.3869703816603672, |
|
"learning_rate": 1.7396073684558788e-05, |
|
"loss": 0.3665, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.2492787074437393, |
|
"grad_norm": 0.38188698969658114, |
|
"learning_rate": 1.737910240917919e-05, |
|
"loss": 0.3624, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 1.2521638776687825, |
|
"grad_norm": 0.37535923832323237, |
|
"learning_rate": 1.7362084343877576e-05, |
|
"loss": 0.368, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.2550490478938257, |
|
"grad_norm": 0.36714065996796413, |
|
"learning_rate": 1.7345019596563268e-05, |
|
"loss": 0.3667, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.257934218118869, |
|
"grad_norm": 0.4157188737740098, |
|
"learning_rate": 1.732790827544159e-05, |
|
"loss": 0.3737, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.2608193883439123, |
|
"grad_norm": 0.3811681622112457, |
|
"learning_rate": 1.7310750489013198e-05, |
|
"loss": 0.3544, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 1.2637045585689557, |
|
"grad_norm": 0.3753050362248373, |
|
"learning_rate": 1.729354634607336e-05, |
|
"loss": 0.3664, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.266589728793999, |
|
"grad_norm": 0.36855745458054423, |
|
"learning_rate": 1.7276295955711295e-05, |
|
"loss": 0.3637, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 1.269474899019042, |
|
"grad_norm": 0.3650539628680322, |
|
"learning_rate": 1.725899942730947e-05, |
|
"loss": 0.3645, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.2723600692440855, |
|
"grad_norm": 0.3831096440319005, |
|
"learning_rate": 1.724165687054291e-05, |
|
"loss": 0.3618, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 1.2752452394691287, |
|
"grad_norm": 0.3603410399079337, |
|
"learning_rate": 1.7224268395378493e-05, |
|
"loss": 0.3453, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.278130409694172, |
|
"grad_norm": 0.37269959302348715, |
|
"learning_rate": 1.720683411207426e-05, |
|
"loss": 0.3576, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 1.2810155799192153, |
|
"grad_norm": 0.38173221937480023, |
|
"learning_rate": 1.718935413117872e-05, |
|
"loss": 0.371, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.2839007501442585, |
|
"grad_norm": 0.3722436971668028, |
|
"learning_rate": 1.7171828563530146e-05, |
|
"loss": 0.3663, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.2867859203693017, |
|
"grad_norm": 0.39837683099640625, |
|
"learning_rate": 1.7154257520255864e-05, |
|
"loss": 0.3745, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.2896710905943451, |
|
"grad_norm": 0.41009144762310795, |
|
"learning_rate": 1.713664111277156e-05, |
|
"loss": 0.3666, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 1.2925562608193883, |
|
"grad_norm": 0.36668782585448506, |
|
"learning_rate": 1.711897945278057e-05, |
|
"loss": 0.3738, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.2954414310444315, |
|
"grad_norm": 0.41098358203090174, |
|
"learning_rate": 1.710127265227316e-05, |
|
"loss": 0.3668, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 1.298326601269475, |
|
"grad_norm": 0.3862089819354528, |
|
"learning_rate": 1.7083520823525836e-05, |
|
"loss": 0.3661, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.3012117714945182, |
|
"grad_norm": 0.4033977142486018, |
|
"learning_rate": 1.706572407910062e-05, |
|
"loss": 0.3688, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 1.3040969417195614, |
|
"grad_norm": 0.4003446173157873, |
|
"learning_rate": 1.7047882531844338e-05, |
|
"loss": 0.377, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.3069821119446048, |
|
"grad_norm": 0.3702354075920004, |
|
"learning_rate": 1.7029996294887904e-05, |
|
"loss": 0.3774, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.309867282169648, |
|
"grad_norm": 0.3915858903728003, |
|
"learning_rate": 1.7012065481645604e-05, |
|
"loss": 0.3671, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.3127524523946912, |
|
"grad_norm": 0.3721772452216554, |
|
"learning_rate": 1.699409020581438e-05, |
|
"loss": 0.3553, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.3156376226197346, |
|
"grad_norm": 0.3726078669899795, |
|
"learning_rate": 1.6976070581373098e-05, |
|
"loss": 0.3781, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.3185227928447778, |
|
"grad_norm": 0.3851091572746381, |
|
"learning_rate": 1.6958006722581838e-05, |
|
"loss": 0.3764, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.321407963069821, |
|
"grad_norm": 0.3999997138224476, |
|
"learning_rate": 1.693989874398116e-05, |
|
"loss": 0.3719, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.3242931332948644, |
|
"grad_norm": 0.36767006912934236, |
|
"learning_rate": 1.6921746760391387e-05, |
|
"loss": 0.3748, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.3271783035199076, |
|
"grad_norm": 0.3730179310503956, |
|
"learning_rate": 1.6903550886911873e-05, |
|
"loss": 0.3608, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.3300634737449508, |
|
"grad_norm": 0.3672274323141715, |
|
"learning_rate": 1.688531123892027e-05, |
|
"loss": 0.3443, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.3329486439699942, |
|
"grad_norm": 0.36049125372252444, |
|
"learning_rate": 1.6867027932071786e-05, |
|
"loss": 0.3606, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.3358338141950374, |
|
"grad_norm": 0.37209341340715907, |
|
"learning_rate": 1.684870108229849e-05, |
|
"loss": 0.3866, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.3387189844200809, |
|
"grad_norm": 0.4041341082410308, |
|
"learning_rate": 1.6830330805808524e-05, |
|
"loss": 0.3552, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.341604154645124, |
|
"grad_norm": 0.44070233485279253, |
|
"learning_rate": 1.681191721908541e-05, |
|
"loss": 0.3641, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.3444893248701673, |
|
"grad_norm": 0.3782903985125648, |
|
"learning_rate": 1.6793460438887294e-05, |
|
"loss": 0.3702, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.3473744950952107, |
|
"grad_norm": 0.38093409058463656, |
|
"learning_rate": 1.67749605822462e-05, |
|
"loss": 0.3738, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.3502596653202539, |
|
"grad_norm": 0.4076843344147931, |
|
"learning_rate": 1.6756417766467294e-05, |
|
"loss": 0.3713, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.3531448355452973, |
|
"grad_norm": 0.4114981370603978, |
|
"learning_rate": 1.6737832109128142e-05, |
|
"loss": 0.3626, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.3560300057703405, |
|
"grad_norm": 0.4070630768126986, |
|
"learning_rate": 1.6719203728077963e-05, |
|
"loss": 0.3579, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.3589151759953837, |
|
"grad_norm": 0.3980167515551806, |
|
"learning_rate": 1.670053274143689e-05, |
|
"loss": 0.3688, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.3618003462204271, |
|
"grad_norm": 0.4117026797519244, |
|
"learning_rate": 1.6681819267595193e-05, |
|
"loss": 0.3631, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.3646855164454703, |
|
"grad_norm": 0.3794143818195765, |
|
"learning_rate": 1.6663063425212564e-05, |
|
"loss": 0.3658, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.3675706866705135, |
|
"grad_norm": 0.4389143548454321, |
|
"learning_rate": 1.6644265333217347e-05, |
|
"loss": 0.3622, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.370455856895557, |
|
"grad_norm": 0.34975549009666657, |
|
"learning_rate": 1.6625425110805774e-05, |
|
"loss": 0.3636, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.3733410271206001, |
|
"grad_norm": 0.4000657679860908, |
|
"learning_rate": 1.660654287744123e-05, |
|
"loss": 0.3655, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.3762261973456433, |
|
"grad_norm": 0.39327391261319994, |
|
"learning_rate": 1.6587618752853484e-05, |
|
"loss": 0.3673, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.3791113675706868, |
|
"grad_norm": 0.3803854498603796, |
|
"learning_rate": 1.656865285703794e-05, |
|
"loss": 0.3644, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.38199653779573, |
|
"grad_norm": 0.36271341863389506, |
|
"learning_rate": 1.6549645310254848e-05, |
|
"loss": 0.3597, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.3848817080207732, |
|
"grad_norm": 0.39017089645929526, |
|
"learning_rate": 1.653059623302858e-05, |
|
"loss": 0.3584, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.3877668782458166, |
|
"grad_norm": 0.377459677055801, |
|
"learning_rate": 1.6511505746146837e-05, |
|
"loss": 0.3603, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.3906520484708598, |
|
"grad_norm": 0.36253902214360645, |
|
"learning_rate": 1.6492373970659897e-05, |
|
"loss": 0.3622, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.393537218695903, |
|
"grad_norm": 0.3792755236074643, |
|
"learning_rate": 1.647320102787984e-05, |
|
"loss": 0.3579, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.3964223889209464, |
|
"grad_norm": 0.4106370547161479, |
|
"learning_rate": 1.645398703937979e-05, |
|
"loss": 0.3548, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.3993075591459896, |
|
"grad_norm": 0.3961446879313167, |
|
"learning_rate": 1.6434732126993137e-05, |
|
"loss": 0.3788, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.4021927293710328, |
|
"grad_norm": 0.41444403209474506, |
|
"learning_rate": 1.6415436412812742e-05, |
|
"loss": 0.3935, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.4050778995960762, |
|
"grad_norm": 0.45443169110667836, |
|
"learning_rate": 1.6396100019190216e-05, |
|
"loss": 0.3672, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.4079630698211194, |
|
"grad_norm": 0.3704577288597062, |
|
"learning_rate": 1.6376723068735096e-05, |
|
"loss": 0.3614, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.4108482400461626, |
|
"grad_norm": 0.37885398698326, |
|
"learning_rate": 1.6357305684314083e-05, |
|
"loss": 0.3473, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.413733410271206, |
|
"grad_norm": 0.41962586363127974, |
|
"learning_rate": 1.6337847989050276e-05, |
|
"loss": 0.3658, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.4166185804962492, |
|
"grad_norm": 0.3851208552370083, |
|
"learning_rate": 1.6318350106322363e-05, |
|
"loss": 0.3699, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.4195037507212924, |
|
"grad_norm": 0.3548157462238281, |
|
"learning_rate": 1.6298812159763868e-05, |
|
"loss": 0.3681, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.4223889209463358, |
|
"grad_norm": 0.37651463846589284, |
|
"learning_rate": 1.6279234273262357e-05, |
|
"loss": 0.3516, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.425274091171379, |
|
"grad_norm": 0.35900318539287046, |
|
"learning_rate": 1.6259616570958638e-05, |
|
"loss": 0.3704, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.4281592613964225, |
|
"grad_norm": 0.4024381546192383, |
|
"learning_rate": 1.6239959177246e-05, |
|
"loss": 0.3638, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.4310444316214657, |
|
"grad_norm": 0.398944280818137, |
|
"learning_rate": 1.6220262216769393e-05, |
|
"loss": 0.3813, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.4339296018465089, |
|
"grad_norm": 0.3831395961609494, |
|
"learning_rate": 1.6200525814424666e-05, |
|
"loss": 0.3693, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.4368147720715523, |
|
"grad_norm": 0.3661492085432, |
|
"learning_rate": 1.6180750095357766e-05, |
|
"loss": 0.3776, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.4396999422965955, |
|
"grad_norm": 0.36586438881277866, |
|
"learning_rate": 1.6160935184963937e-05, |
|
"loss": 0.3503, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.442585112521639, |
|
"grad_norm": 0.3784608295775924, |
|
"learning_rate": 1.614108120888693e-05, |
|
"loss": 0.3715, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.445470282746682, |
|
"grad_norm": 0.364821310782025, |
|
"learning_rate": 1.6121188293018198e-05, |
|
"loss": 0.3774, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.4483554529717253, |
|
"grad_norm": 0.38118287581212157, |
|
"learning_rate": 1.6101256563496133e-05, |
|
"loss": 0.3608, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.4512406231967687, |
|
"grad_norm": 0.37900358798651623, |
|
"learning_rate": 1.608128614670521e-05, |
|
"loss": 0.3889, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.454125793421812, |
|
"grad_norm": 0.40251780895398115, |
|
"learning_rate": 1.6061277169275227e-05, |
|
"loss": 0.3606, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.4570109636468551, |
|
"grad_norm": 0.38327828759400334, |
|
"learning_rate": 1.6041229758080496e-05, |
|
"loss": 0.3635, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.4598961338718985, |
|
"grad_norm": 0.37246590219746933, |
|
"learning_rate": 1.6021144040239017e-05, |
|
"loss": 0.3766, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.4627813040969417, |
|
"grad_norm": 0.3833944550693614, |
|
"learning_rate": 1.60010201431117e-05, |
|
"loss": 0.3612, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.465666474321985, |
|
"grad_norm": 0.36363911288467893, |
|
"learning_rate": 1.598085819430155e-05, |
|
"loss": 0.3711, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.4685516445470284, |
|
"grad_norm": 0.3588573344819394, |
|
"learning_rate": 1.596065832165283e-05, |
|
"loss": 0.3726, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.4714368147720716, |
|
"grad_norm": 0.3705849995740623, |
|
"learning_rate": 1.5940420653250293e-05, |
|
"loss": 0.3642, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.4743219849971148, |
|
"grad_norm": 0.38341287260652185, |
|
"learning_rate": 1.5920145317418346e-05, |
|
"loss": 0.3714, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.4772071552221582, |
|
"grad_norm": 0.38734703763461453, |
|
"learning_rate": 1.589983244272024e-05, |
|
"loss": 0.3836, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.4800923254472014, |
|
"grad_norm": 0.36011456145391224, |
|
"learning_rate": 1.5879482157957245e-05, |
|
"loss": 0.3669, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.4829774956722446, |
|
"grad_norm": 0.3784776076526531, |
|
"learning_rate": 1.5859094592167858e-05, |
|
"loss": 0.3634, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.485862665897288, |
|
"grad_norm": 0.38334066789201254, |
|
"learning_rate": 1.5838669874626963e-05, |
|
"loss": 0.3836, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.4887478361223312, |
|
"grad_norm": 0.3769831368967705, |
|
"learning_rate": 1.5818208134845022e-05, |
|
"loss": 0.3592, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.4916330063473744, |
|
"grad_norm": 0.411783030147672, |
|
"learning_rate": 1.579770950256724e-05, |
|
"loss": 0.3794, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.4945181765724178, |
|
"grad_norm": 0.5027841057723603, |
|
"learning_rate": 1.577717410777276e-05, |
|
"loss": 0.3601, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.497403346797461, |
|
"grad_norm": 0.38774582541265035, |
|
"learning_rate": 1.5756602080673843e-05, |
|
"loss": 0.3753, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.5002885170225042, |
|
"grad_norm": 0.37739115096678616, |
|
"learning_rate": 1.5735993551715006e-05, |
|
"loss": 0.3737, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.5031736872475476, |
|
"grad_norm": 0.3902916999812962, |
|
"learning_rate": 1.571534865157224e-05, |
|
"loss": 0.3632, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.5060588574725908, |
|
"grad_norm": 0.3868041357607236, |
|
"learning_rate": 1.5694667511152143e-05, |
|
"loss": 0.3809, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.508944027697634, |
|
"grad_norm": 0.37704578010786705, |
|
"learning_rate": 1.5673950261591133e-05, |
|
"loss": 0.3694, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.5118291979226774, |
|
"grad_norm": 0.3607652817962323, |
|
"learning_rate": 1.5653197034254564e-05, |
|
"loss": 0.3519, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.5147143681477209, |
|
"grad_norm": 0.37320845213700304, |
|
"learning_rate": 1.563240796073594e-05, |
|
"loss": 0.3606, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.5175995383727638, |
|
"grad_norm": 0.43098838699804737, |
|
"learning_rate": 1.5611583172856045e-05, |
|
"loss": 0.3741, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.5204847085978073, |
|
"grad_norm": 0.3812237058754953, |
|
"learning_rate": 1.5590722802662143e-05, |
|
"loss": 0.3486, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.5233698788228507, |
|
"grad_norm": 0.3799133932625459, |
|
"learning_rate": 1.55698269824271e-05, |
|
"loss": 0.3657, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.5262550490478937, |
|
"grad_norm": 0.34887334582855944, |
|
"learning_rate": 1.5548895844648583e-05, |
|
"loss": 0.3656, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.529140219272937, |
|
"grad_norm": 0.3798697034141352, |
|
"learning_rate": 1.552792952204819e-05, |
|
"loss": 0.3714, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.5320253894979805, |
|
"grad_norm": 0.3597061950949025, |
|
"learning_rate": 1.550692814757063e-05, |
|
"loss": 0.3651, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.5349105597230237, |
|
"grad_norm": 0.39911942641093723, |
|
"learning_rate": 1.5485891854382868e-05, |
|
"loss": 0.3662, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.537795729948067, |
|
"grad_norm": 0.4048874005019976, |
|
"learning_rate": 1.5464820775873285e-05, |
|
"loss": 0.378, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.5406809001731103, |
|
"grad_norm": 0.3614850352345454, |
|
"learning_rate": 1.544371504565082e-05, |
|
"loss": 0.3511, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.5435660703981535, |
|
"grad_norm": 0.3852211514058087, |
|
"learning_rate": 1.5422574797544166e-05, |
|
"loss": 0.3571, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.5464512406231967, |
|
"grad_norm": 0.36466553350789727, |
|
"learning_rate": 1.5401400165600848e-05, |
|
"loss": 0.3654, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.5493364108482401, |
|
"grad_norm": 0.4640557855511497, |
|
"learning_rate": 1.5380191284086442e-05, |
|
"loss": 0.3663, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.5522215810732833, |
|
"grad_norm": 0.40382378120892326, |
|
"learning_rate": 1.5358948287483688e-05, |
|
"loss": 0.395, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.5551067512983265, |
|
"grad_norm": 0.34945555100262243, |
|
"learning_rate": 1.533767131049164e-05, |
|
"loss": 0.3507, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.55799192152337, |
|
"grad_norm": 0.4017093497659257, |
|
"learning_rate": 1.5316360488024838e-05, |
|
"loss": 0.3666, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.5608770917484132, |
|
"grad_norm": 0.36824354949459775, |
|
"learning_rate": 1.5295015955212397e-05, |
|
"loss": 0.3629, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.5637622619734564, |
|
"grad_norm": 0.37118819752646043, |
|
"learning_rate": 1.5273637847397216e-05, |
|
"loss": 0.3619, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.5666474321984998, |
|
"grad_norm": 0.380235084250298, |
|
"learning_rate": 1.5252226300135075e-05, |
|
"loss": 0.3627, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.569532602423543, |
|
"grad_norm": 0.35479734123182555, |
|
"learning_rate": 1.5230781449193787e-05, |
|
"loss": 0.3499, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.5724177726485862, |
|
"grad_norm": 0.37537141405956453, |
|
"learning_rate": 1.5209303430552353e-05, |
|
"loss": 0.3689, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.5753029428736296, |
|
"grad_norm": 0.38314418131207195, |
|
"learning_rate": 1.5187792380400066e-05, |
|
"loss": 0.3568, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.5781881130986728, |
|
"grad_norm": 0.4015489085451092, |
|
"learning_rate": 1.5166248435135687e-05, |
|
"loss": 0.3584, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.581073283323716, |
|
"grad_norm": 0.413370518767082, |
|
"learning_rate": 1.5144671731366537e-05, |
|
"loss": 0.3627, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.5839584535487594, |
|
"grad_norm": 0.3855856313069642, |
|
"learning_rate": 1.5123062405907689e-05, |
|
"loss": 0.3576, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.5868436237738026, |
|
"grad_norm": 0.40248976719527124, |
|
"learning_rate": 1.510142059578103e-05, |
|
"loss": 0.371, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.5897287939988458, |
|
"grad_norm": 0.3896121946823667, |
|
"learning_rate": 1.5079746438214452e-05, |
|
"loss": 0.3569, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.5926139642238892, |
|
"grad_norm": 0.3601348358274979, |
|
"learning_rate": 1.5058040070640947e-05, |
|
"loss": 0.3689, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.5954991344489324, |
|
"grad_norm": 0.35986769610870906, |
|
"learning_rate": 1.5036301630697747e-05, |
|
"loss": 0.3555, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.5983843046739756, |
|
"grad_norm": 0.36475176711784657, |
|
"learning_rate": 1.5014531256225459e-05, |
|
"loss": 0.3605, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.601269474899019, |
|
"grad_norm": 0.37930345881140104, |
|
"learning_rate": 1.499272908526717e-05, |
|
"loss": 0.3728, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.6041546451240625, |
|
"grad_norm": 0.38456807910427965, |
|
"learning_rate": 1.4970895256067593e-05, |
|
"loss": 0.3736, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.6070398153491054, |
|
"grad_norm": 0.4121207010530726, |
|
"learning_rate": 1.4949029907072179e-05, |
|
"loss": 0.3855, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.6099249855741489, |
|
"grad_norm": 0.36551396208539605, |
|
"learning_rate": 1.4927133176926245e-05, |
|
"loss": 0.3662, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.6128101557991923, |
|
"grad_norm": 0.3716986354984255, |
|
"learning_rate": 1.4905205204474088e-05, |
|
"loss": 0.3782, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.6156953260242353, |
|
"grad_norm": 0.6240855472300652, |
|
"learning_rate": 1.488324612875811e-05, |
|
"loss": 0.3464, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.6185804962492787, |
|
"grad_norm": 0.33838650586561203, |
|
"learning_rate": 1.4861256089017934e-05, |
|
"loss": 0.3571, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.621465666474322, |
|
"grad_norm": 0.3772401322980198, |
|
"learning_rate": 1.483923522468952e-05, |
|
"loss": 0.3582, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.6243508366993653, |
|
"grad_norm": 0.3686319357750962, |
|
"learning_rate": 1.4817183675404292e-05, |
|
"loss": 0.3705, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.6272360069244085, |
|
"grad_norm": 0.3968174151919166, |
|
"learning_rate": 1.4795101580988228e-05, |
|
"loss": 0.3806, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.630121177149452, |
|
"grad_norm": 0.3902887780136523, |
|
"learning_rate": 1.4772989081460997e-05, |
|
"loss": 0.3626, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.6330063473744951, |
|
"grad_norm": 0.3746341856550175, |
|
"learning_rate": 1.4750846317035071e-05, |
|
"loss": 0.3672, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.6358915175995383, |
|
"grad_norm": 0.3994993753443447, |
|
"learning_rate": 1.4728673428114808e-05, |
|
"loss": 0.3688, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.6387766878245817, |
|
"grad_norm": 0.37287704407061595, |
|
"learning_rate": 1.4706470555295605e-05, |
|
"loss": 0.3554, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.641661858049625, |
|
"grad_norm": 0.37701343446505425, |
|
"learning_rate": 1.4684237839362963e-05, |
|
"loss": 0.3654, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.6445470282746681, |
|
"grad_norm": 0.38168823604260177, |
|
"learning_rate": 1.4661975421291623e-05, |
|
"loss": 0.3586, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.6474321984997116, |
|
"grad_norm": 0.39975859055427077, |
|
"learning_rate": 1.4639683442244663e-05, |
|
"loss": 0.3716, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.6503173687247548, |
|
"grad_norm": 0.37090442126154205, |
|
"learning_rate": 1.4617362043572607e-05, |
|
"loss": 0.3588, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.653202538949798, |
|
"grad_norm": 0.3519117177291488, |
|
"learning_rate": 1.4595011366812512e-05, |
|
"loss": 0.3663, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.6560877091748414, |
|
"grad_norm": 0.3737325432685507, |
|
"learning_rate": 1.4572631553687103e-05, |
|
"loss": 0.3582, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.6589728793998846, |
|
"grad_norm": 0.38870437566944893, |
|
"learning_rate": 1.4550222746103835e-05, |
|
"loss": 0.351, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.6618580496249278, |
|
"grad_norm": 0.3617260019248044, |
|
"learning_rate": 1.4527785086154027e-05, |
|
"loss": 0.3696, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.6647432198499712, |
|
"grad_norm": 0.38332448310467415, |
|
"learning_rate": 1.4505318716111943e-05, |
|
"loss": 0.373, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.6676283900750144, |
|
"grad_norm": 0.3747344452694502, |
|
"learning_rate": 1.448282377843389e-05, |
|
"loss": 0.3664, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.6705135603000576, |
|
"grad_norm": 0.36202726723998613, |
|
"learning_rate": 1.4460300415757323e-05, |
|
"loss": 0.3766, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.673398730525101, |
|
"grad_norm": 0.37923473785013656, |
|
"learning_rate": 1.4437748770899936e-05, |
|
"loss": 0.3814, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.6762839007501442, |
|
"grad_norm": 0.3873960582212302, |
|
"learning_rate": 1.4415168986858754e-05, |
|
"loss": 0.3745, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.6791690709751874, |
|
"grad_norm": 0.4013398358287003, |
|
"learning_rate": 1.4392561206809232e-05, |
|
"loss": 0.3798, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.6820542412002308, |
|
"grad_norm": 0.40802241994691185, |
|
"learning_rate": 1.4369925574104344e-05, |
|
"loss": 0.3586, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.684939411425274, |
|
"grad_norm": 0.4010573684279247, |
|
"learning_rate": 1.4347262232273671e-05, |
|
"loss": 0.3578, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.6878245816503172, |
|
"grad_norm": 0.3609006547431779, |
|
"learning_rate": 1.4324571325022496e-05, |
|
"loss": 0.3592, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.6907097518753607, |
|
"grad_norm": 0.3682631162288877, |
|
"learning_rate": 1.4301852996230889e-05, |
|
"loss": 0.3611, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.693594922100404, |
|
"grad_norm": 0.39391349541100773, |
|
"learning_rate": 1.42791073899528e-05, |
|
"loss": 0.3549, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.696480092325447, |
|
"grad_norm": 0.44233833212347734, |
|
"learning_rate": 1.4256334650415135e-05, |
|
"loss": 0.3537, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.6993652625504905, |
|
"grad_norm": 0.3869468573057741, |
|
"learning_rate": 1.4233534922016859e-05, |
|
"loss": 0.37, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.702250432775534, |
|
"grad_norm": 0.37586357463502135, |
|
"learning_rate": 1.4210708349328062e-05, |
|
"loss": 0.3675, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.7051356030005769, |
|
"grad_norm": 0.3903279762819204, |
|
"learning_rate": 1.418785507708905e-05, |
|
"loss": 0.369, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.7080207732256203, |
|
"grad_norm": 0.3969910656440696, |
|
"learning_rate": 1.4164975250209432e-05, |
|
"loss": 0.3694, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.7109059434506637, |
|
"grad_norm": 0.3556902520239883, |
|
"learning_rate": 1.4142069013767192e-05, |
|
"loss": 0.3504, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.713791113675707, |
|
"grad_norm": 0.3583546127221291, |
|
"learning_rate": 1.4119136513007776e-05, |
|
"loss": 0.3681, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.71667628390075, |
|
"grad_norm": 0.3968433520239081, |
|
"learning_rate": 1.409617789334317e-05, |
|
"loss": 0.3752, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.7195614541257935, |
|
"grad_norm": 0.3898401819324146, |
|
"learning_rate": 1.4073193300350973e-05, |
|
"loss": 0.3698, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.7224466243508367, |
|
"grad_norm": 0.3698983985858203, |
|
"learning_rate": 1.405018287977348e-05, |
|
"loss": 0.3521, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.72533179457588, |
|
"grad_norm": 0.4816180292439013, |
|
"learning_rate": 1.402714677751675e-05, |
|
"loss": 0.3604, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.7282169648009233, |
|
"grad_norm": 0.4104869915376292, |
|
"learning_rate": 1.40040851396497e-05, |
|
"loss": 0.3799, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.7311021350259665, |
|
"grad_norm": 0.3917852021920299, |
|
"learning_rate": 1.3980998112403146e-05, |
|
"loss": 0.3584, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7339873052510097, |
|
"grad_norm": 0.3910325407989454, |
|
"learning_rate": 1.3957885842168908e-05, |
|
"loss": 0.3579, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 1.7368724754760532, |
|
"grad_norm": 0.35526389545543485, |
|
"learning_rate": 1.3934748475498867e-05, |
|
"loss": 0.3588, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.7397576457010964, |
|
"grad_norm": 0.39546625910444144, |
|
"learning_rate": 1.3911586159104032e-05, |
|
"loss": 0.3476, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 1.7426428159261396, |
|
"grad_norm": 0.369055887907894, |
|
"learning_rate": 1.3888399039853618e-05, |
|
"loss": 0.352, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.745527986151183, |
|
"grad_norm": 0.35801953491056676, |
|
"learning_rate": 1.3865187264774114e-05, |
|
"loss": 0.3554, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.7484131563762262, |
|
"grad_norm": 0.3531901383810597, |
|
"learning_rate": 1.3841950981048342e-05, |
|
"loss": 0.3522, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.7512983266012694, |
|
"grad_norm": 0.39607234863789387, |
|
"learning_rate": 1.3818690336014543e-05, |
|
"loss": 0.3762, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 1.7541834968263128, |
|
"grad_norm": 0.38127586298657323, |
|
"learning_rate": 1.3795405477165406e-05, |
|
"loss": 0.3563, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.757068667051356, |
|
"grad_norm": 0.43662892822050353, |
|
"learning_rate": 1.3772096552147187e-05, |
|
"loss": 0.352, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.7599538372763992, |
|
"grad_norm": 0.3889378354940805, |
|
"learning_rate": 1.3748763708758717e-05, |
|
"loss": 0.3686, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.7628390075014426, |
|
"grad_norm": 0.3972446044944282, |
|
"learning_rate": 1.3725407094950506e-05, |
|
"loss": 0.3621, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 1.7657241777264858, |
|
"grad_norm": 0.40962998535223355, |
|
"learning_rate": 1.3702026858823781e-05, |
|
"loss": 0.3876, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.768609347951529, |
|
"grad_norm": 0.35961773766923205, |
|
"learning_rate": 1.3678623148629555e-05, |
|
"loss": 0.3736, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 1.7714945181765724, |
|
"grad_norm": 0.38471972852000913, |
|
"learning_rate": 1.3655196112767695e-05, |
|
"loss": 0.3491, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.7743796884016156, |
|
"grad_norm": 0.36648434186274914, |
|
"learning_rate": 1.363174589978596e-05, |
|
"loss": 0.3593, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.7772648586266588, |
|
"grad_norm": 0.3872125897463841, |
|
"learning_rate": 1.3608272658379087e-05, |
|
"loss": 0.3646, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.7801500288517023, |
|
"grad_norm": 0.3740044921465578, |
|
"learning_rate": 1.358477653738782e-05, |
|
"loss": 0.3517, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 1.7830351990767457, |
|
"grad_norm": 0.38429954155990775, |
|
"learning_rate": 1.3561257685797992e-05, |
|
"loss": 0.3655, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.7859203693017887, |
|
"grad_norm": 0.3478740415125172, |
|
"learning_rate": 1.3537716252739552e-05, |
|
"loss": 0.3675, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 1.788805539526832, |
|
"grad_norm": 0.3597910283031001, |
|
"learning_rate": 1.3514152387485651e-05, |
|
"loss": 0.3489, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.7916907097518755, |
|
"grad_norm": 0.41193186738188275, |
|
"learning_rate": 1.3490566239451672e-05, |
|
"loss": 0.3818, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 1.7945758799769185, |
|
"grad_norm": 0.33623886878783116, |
|
"learning_rate": 1.3466957958194284e-05, |
|
"loss": 0.3618, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.797461050201962, |
|
"grad_norm": 0.35667893854991767, |
|
"learning_rate": 1.344332769341052e-05, |
|
"loss": 0.3558, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.8003462204270053, |
|
"grad_norm": 0.39258025755310366, |
|
"learning_rate": 1.3419675594936778e-05, |
|
"loss": 0.3477, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.8032313906520485, |
|
"grad_norm": 0.3782720017741446, |
|
"learning_rate": 1.339600181274793e-05, |
|
"loss": 0.3735, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.8061165608770917, |
|
"grad_norm": 0.4035057736179016, |
|
"learning_rate": 1.3372306496956324e-05, |
|
"loss": 0.3845, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.8090017311021351, |
|
"grad_norm": 0.3763574258043012, |
|
"learning_rate": 1.3348589797810854e-05, |
|
"loss": 0.3602, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 1.8118869013271783, |
|
"grad_norm": 0.35992628716248726, |
|
"learning_rate": 1.3324851865696e-05, |
|
"loss": 0.3792, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.8147720715522215, |
|
"grad_norm": 0.37804380856317177, |
|
"learning_rate": 1.3301092851130886e-05, |
|
"loss": 0.3657, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 1.817657241777265, |
|
"grad_norm": 0.3598537111947173, |
|
"learning_rate": 1.3277312904768314e-05, |
|
"loss": 0.3721, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.8205424120023082, |
|
"grad_norm": 0.34265734973552825, |
|
"learning_rate": 1.32535121773938e-05, |
|
"loss": 0.3673, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 1.8234275822273514, |
|
"grad_norm": 0.40992539604640754, |
|
"learning_rate": 1.3229690819924653e-05, |
|
"loss": 0.3738, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.8263127524523948, |
|
"grad_norm": 0.39170242560206425, |
|
"learning_rate": 1.3205848983408968e-05, |
|
"loss": 0.3725, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 1.829197922677438, |
|
"grad_norm": 0.4135698812295932, |
|
"learning_rate": 1.3181986819024717e-05, |
|
"loss": 0.3765, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.8320830929024812, |
|
"grad_norm": 0.39775337228906954, |
|
"learning_rate": 1.3158104478078753e-05, |
|
"loss": 0.3648, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.8349682631275246, |
|
"grad_norm": 0.4025320364227362, |
|
"learning_rate": 1.3134202112005876e-05, |
|
"loss": 0.3679, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.8378534333525678, |
|
"grad_norm": 0.39435133301085734, |
|
"learning_rate": 1.311027987236786e-05, |
|
"loss": 0.3766, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.840738603577611, |
|
"grad_norm": 0.3791471283767141, |
|
"learning_rate": 1.3086337910852487e-05, |
|
"loss": 0.3867, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.8436237738026544, |
|
"grad_norm": 0.37370201756820587, |
|
"learning_rate": 1.306237637927261e-05, |
|
"loss": 0.3669, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 1.8465089440276976, |
|
"grad_norm": 0.38003239897050844, |
|
"learning_rate": 1.3038395429565156e-05, |
|
"loss": 0.3597, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.8493941142527408, |
|
"grad_norm": 0.37745120795042336, |
|
"learning_rate": 1.3014395213790187e-05, |
|
"loss": 0.3666, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 1.8522792844777842, |
|
"grad_norm": 0.369849417274224, |
|
"learning_rate": 1.299037588412993e-05, |
|
"loss": 0.3731, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.8551644547028274, |
|
"grad_norm": 0.37108268900622904, |
|
"learning_rate": 1.2966337592887807e-05, |
|
"loss": 0.3619, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 1.8580496249278706, |
|
"grad_norm": 0.3603718565169327, |
|
"learning_rate": 1.2942280492487478e-05, |
|
"loss": 0.3711, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.860934795152914, |
|
"grad_norm": 0.38800493629769506, |
|
"learning_rate": 1.291820473547186e-05, |
|
"loss": 0.3672, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.8638199653779572, |
|
"grad_norm": 0.3778795059973765, |
|
"learning_rate": 1.2894110474502182e-05, |
|
"loss": 0.3665, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.8667051356030004, |
|
"grad_norm": 0.40908012961452245, |
|
"learning_rate": 1.2869997862356994e-05, |
|
"loss": 0.3639, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 1.8695903058280439, |
|
"grad_norm": 0.36111567245322745, |
|
"learning_rate": 1.2845867051931214e-05, |
|
"loss": 0.3679, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.8724754760530873, |
|
"grad_norm": 0.4126916975430262, |
|
"learning_rate": 1.2821718196235147e-05, |
|
"loss": 0.3577, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 1.8753606462781303, |
|
"grad_norm": 0.3896170917861394, |
|
"learning_rate": 1.2797551448393527e-05, |
|
"loss": 0.3533, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.8782458165031737, |
|
"grad_norm": 0.3918073404798387, |
|
"learning_rate": 1.2773366961644537e-05, |
|
"loss": 0.3693, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 1.881130986728217, |
|
"grad_norm": 0.36560888725527363, |
|
"learning_rate": 1.274916488933884e-05, |
|
"loss": 0.3698, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.88401615695326, |
|
"grad_norm": 0.35904154235729674, |
|
"learning_rate": 1.2724945384938608e-05, |
|
"loss": 0.3621, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 1.8869013271783035, |
|
"grad_norm": 0.3712234770214718, |
|
"learning_rate": 1.2700708602016545e-05, |
|
"loss": 0.3596, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.889786497403347, |
|
"grad_norm": 0.35515855306613603, |
|
"learning_rate": 1.2676454694254918e-05, |
|
"loss": 0.3436, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.8926716676283901, |
|
"grad_norm": 0.38038321952993087, |
|
"learning_rate": 1.265218381544458e-05, |
|
"loss": 0.3598, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.8955568378534333, |
|
"grad_norm": 0.38405790753936503, |
|
"learning_rate": 1.2627896119483998e-05, |
|
"loss": 0.3613, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 1.8984420080784767, |
|
"grad_norm": 0.35411934799722744, |
|
"learning_rate": 1.2603591760378266e-05, |
|
"loss": 0.3621, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.90132717830352, |
|
"grad_norm": 0.3833342471723807, |
|
"learning_rate": 1.2579270892238147e-05, |
|
"loss": 0.3688, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 1.9042123485285631, |
|
"grad_norm": 0.3783988493179844, |
|
"learning_rate": 1.2554933669279076e-05, |
|
"loss": 0.3566, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.9070975187536066, |
|
"grad_norm": 0.3849340817581559, |
|
"learning_rate": 1.2530580245820202e-05, |
|
"loss": 0.3664, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 1.9099826889786498, |
|
"grad_norm": 0.36371725434046853, |
|
"learning_rate": 1.250621077628339e-05, |
|
"loss": 0.3622, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.912867859203693, |
|
"grad_norm": 0.3741399204987393, |
|
"learning_rate": 1.2481825415192255e-05, |
|
"loss": 0.3381, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 1.9157530294287364, |
|
"grad_norm": 0.4669077205458527, |
|
"learning_rate": 1.245742431717118e-05, |
|
"loss": 0.3823, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.9186381996537796, |
|
"grad_norm": 0.35803705351389503, |
|
"learning_rate": 1.2433007636944331e-05, |
|
"loss": 0.351, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.9215233698788228, |
|
"grad_norm": 0.3701949796075695, |
|
"learning_rate": 1.240857552933468e-05, |
|
"loss": 0.3458, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.9244085401038662, |
|
"grad_norm": 0.3867704015823223, |
|
"learning_rate": 1.2384128149263023e-05, |
|
"loss": 0.372, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 1.9272937103289094, |
|
"grad_norm": 0.34734943471086316, |
|
"learning_rate": 1.2359665651746995e-05, |
|
"loss": 0.3587, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.9301788805539526, |
|
"grad_norm": 0.3456302062945545, |
|
"learning_rate": 1.2335188191900087e-05, |
|
"loss": 0.3639, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 1.933064050778996, |
|
"grad_norm": 0.37432101569615994, |
|
"learning_rate": 1.231069592493067e-05, |
|
"loss": 0.3567, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.9359492210040392, |
|
"grad_norm": 0.38495008542439035, |
|
"learning_rate": 1.2286189006140998e-05, |
|
"loss": 0.355, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 1.9388343912290824, |
|
"grad_norm": 0.3603027711889298, |
|
"learning_rate": 1.2261667590926232e-05, |
|
"loss": 0.3643, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.9417195614541258, |
|
"grad_norm": 0.3801200430528795, |
|
"learning_rate": 1.223713183477346e-05, |
|
"loss": 0.3547, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 1.944604731679169, |
|
"grad_norm": 0.3801873136595747, |
|
"learning_rate": 1.2212581893260694e-05, |
|
"loss": 0.3674, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.9474899019042122, |
|
"grad_norm": 0.3724561589479728, |
|
"learning_rate": 1.2188017922055901e-05, |
|
"loss": 0.3466, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.9503750721292556, |
|
"grad_norm": 0.38382927060326005, |
|
"learning_rate": 1.2163440076916005e-05, |
|
"loss": 0.3736, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.953260242354299, |
|
"grad_norm": 0.40285349364914064, |
|
"learning_rate": 1.2138848513685902e-05, |
|
"loss": 0.3518, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 1.956145412579342, |
|
"grad_norm": 0.39374554584138266, |
|
"learning_rate": 1.2114243388297478e-05, |
|
"loss": 0.3553, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.9590305828043855, |
|
"grad_norm": 0.3733836670088024, |
|
"learning_rate": 1.2089624856768603e-05, |
|
"loss": 0.3782, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 1.9619157530294289, |
|
"grad_norm": 0.3512295476922515, |
|
"learning_rate": 1.2064993075202172e-05, |
|
"loss": 0.352, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.9648009232544719, |
|
"grad_norm": 0.35755975140067886, |
|
"learning_rate": 1.2040348199785074e-05, |
|
"loss": 0.3614, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 1.9676860934795153, |
|
"grad_norm": 0.3541512834606944, |
|
"learning_rate": 1.2015690386787245e-05, |
|
"loss": 0.359, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.9705712637045587, |
|
"grad_norm": 0.3774486665711377, |
|
"learning_rate": 1.1991019792560648e-05, |
|
"loss": 0.3658, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 1.9734564339296017, |
|
"grad_norm": 0.38262821666740215, |
|
"learning_rate": 1.1966336573538287e-05, |
|
"loss": 0.3763, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.976341604154645, |
|
"grad_norm": 0.3903065644133358, |
|
"learning_rate": 1.1941640886233224e-05, |
|
"loss": 0.3589, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.9792267743796885, |
|
"grad_norm": 0.3579664105437974, |
|
"learning_rate": 1.1916932887237571e-05, |
|
"loss": 0.3594, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.9821119446047317, |
|
"grad_norm": 0.385483963575225, |
|
"learning_rate": 1.1892212733221523e-05, |
|
"loss": 0.3738, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 1.984997114829775, |
|
"grad_norm": 0.37602191182512, |
|
"learning_rate": 1.1867480580932336e-05, |
|
"loss": 0.3539, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.9878822850548183, |
|
"grad_norm": 0.40221670992777864, |
|
"learning_rate": 1.1842736587193349e-05, |
|
"loss": 0.3618, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 1.9907674552798615, |
|
"grad_norm": 0.3945792439685852, |
|
"learning_rate": 1.1817980908902981e-05, |
|
"loss": 0.3617, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.9936526255049047, |
|
"grad_norm": 0.3638645756888353, |
|
"learning_rate": 1.1793213703033755e-05, |
|
"loss": 0.3621, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 1.9965377957299482, |
|
"grad_norm": 0.383149019555126, |
|
"learning_rate": 1.1768435126631276e-05, |
|
"loss": 0.3596, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.9994229659549914, |
|
"grad_norm": 0.380227166238423, |
|
"learning_rate": 1.1743645336813248e-05, |
|
"loss": 0.3638, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.4331684410572052, |
|
"eval_runtime": 0.5935, |
|
"eval_samples_per_second": 129.744, |
|
"eval_steps_per_second": 3.37, |
|
"step": 3466 |
|
}, |
|
{ |
|
"epoch": 2.0023081361800346, |
|
"grad_norm": 0.41074392868999404, |
|
"learning_rate": 1.1718844490768488e-05, |
|
"loss": 0.3104, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.005193306405078, |
|
"grad_norm": 0.3928008512339745, |
|
"learning_rate": 1.1694032745755906e-05, |
|
"loss": 0.261, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.008078476630121, |
|
"grad_norm": 0.3931528202345155, |
|
"learning_rate": 1.1669210259103535e-05, |
|
"loss": 0.287, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.0109636468551644, |
|
"grad_norm": 0.3869283477561703, |
|
"learning_rate": 1.164437718820751e-05, |
|
"loss": 0.2683, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 2.013848817080208, |
|
"grad_norm": 0.3991121860772099, |
|
"learning_rate": 1.1619533690531076e-05, |
|
"loss": 0.2695, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.016733987305251, |
|
"grad_norm": 0.4025523043283219, |
|
"learning_rate": 1.1594679923603606e-05, |
|
"loss": 0.2662, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 2.019619157530294, |
|
"grad_norm": 0.4441663382272395, |
|
"learning_rate": 1.1569816045019575e-05, |
|
"loss": 0.2663, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0225043277553376, |
|
"grad_norm": 0.4158755932657419, |
|
"learning_rate": 1.1544942212437586e-05, |
|
"loss": 0.2701, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 2.025389497980381, |
|
"grad_norm": 0.4324043740872268, |
|
"learning_rate": 1.152005858357935e-05, |
|
"loss": 0.2751, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.028274668205424, |
|
"grad_norm": 0.41576677704649545, |
|
"learning_rate": 1.1495165316228704e-05, |
|
"loss": 0.2837, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 2.0311598384304674, |
|
"grad_norm": 0.4261462623114962, |
|
"learning_rate": 1.1470262568230593e-05, |
|
"loss": 0.2778, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.034045008655511, |
|
"grad_norm": 0.4222222859454043, |
|
"learning_rate": 1.1445350497490077e-05, |
|
"loss": 0.2663, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.036930178880554, |
|
"grad_norm": 0.4149492406466183, |
|
"learning_rate": 1.1420429261971342e-05, |
|
"loss": 0.2615, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.0398153491055973, |
|
"grad_norm": 0.4325594095101094, |
|
"learning_rate": 1.139549901969667e-05, |
|
"loss": 0.276, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 2.0427005193306407, |
|
"grad_norm": 0.4066105524550755, |
|
"learning_rate": 1.137055992874547e-05, |
|
"loss": 0.2749, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.0455856895556837, |
|
"grad_norm": 0.4091251188163908, |
|
"learning_rate": 1.1345612147253247e-05, |
|
"loss": 0.2662, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 2.048470859780727, |
|
"grad_norm": 0.3948683403145309, |
|
"learning_rate": 1.1320655833410621e-05, |
|
"loss": 0.2813, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.0513560300057705, |
|
"grad_norm": 0.41481838952861994, |
|
"learning_rate": 1.1295691145462306e-05, |
|
"loss": 0.2863, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 2.0542412002308135, |
|
"grad_norm": 0.4032363652268982, |
|
"learning_rate": 1.1270718241706118e-05, |
|
"loss": 0.2565, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.057126370455857, |
|
"grad_norm": 0.48615644869007646, |
|
"learning_rate": 1.1245737280491973e-05, |
|
"loss": 0.2856, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 2.0600115406809003, |
|
"grad_norm": 0.4219271094611073, |
|
"learning_rate": 1.122074842022087e-05, |
|
"loss": 0.2719, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.0628967109059433, |
|
"grad_norm": 0.41257650256952855, |
|
"learning_rate": 1.1195751819343906e-05, |
|
"loss": 0.2745, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.0657818811309867, |
|
"grad_norm": 0.428723721941346, |
|
"learning_rate": 1.1170747636361245e-05, |
|
"loss": 0.2779, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.06866705135603, |
|
"grad_norm": 0.40354050541964437, |
|
"learning_rate": 1.1145736029821148e-05, |
|
"loss": 0.283, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 2.071552221581073, |
|
"grad_norm": 0.43472195003969, |
|
"learning_rate": 1.1120717158318928e-05, |
|
"loss": 0.281, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.0744373918061165, |
|
"grad_norm": 0.4123476736485213, |
|
"learning_rate": 1.1095691180495973e-05, |
|
"loss": 0.2798, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 2.07732256203116, |
|
"grad_norm": 0.4150389828010794, |
|
"learning_rate": 1.1070658255038733e-05, |
|
"loss": 0.2871, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.080207732256203, |
|
"grad_norm": 0.4712720490312681, |
|
"learning_rate": 1.1045618540677712e-05, |
|
"loss": 0.2815, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 2.0830929024812463, |
|
"grad_norm": 0.5081431777602221, |
|
"learning_rate": 1.1020572196186457e-05, |
|
"loss": 0.263, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.0859780727062898, |
|
"grad_norm": 0.4549015236527131, |
|
"learning_rate": 1.0995519380380557e-05, |
|
"loss": 0.2717, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 2.0888632429313327, |
|
"grad_norm": 0.5033902066650326, |
|
"learning_rate": 1.097046025211664e-05, |
|
"loss": 0.2813, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.091748413156376, |
|
"grad_norm": 0.40826808731777586, |
|
"learning_rate": 1.0945394970291354e-05, |
|
"loss": 0.2607, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.0946335833814196, |
|
"grad_norm": 0.4593185041491404, |
|
"learning_rate": 1.0920323693840368e-05, |
|
"loss": 0.28, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.0975187536064626, |
|
"grad_norm": 0.4374169481716498, |
|
"learning_rate": 1.0895246581737366e-05, |
|
"loss": 0.2968, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 2.100403923831506, |
|
"grad_norm": 0.4281118948367996, |
|
"learning_rate": 1.087016379299303e-05, |
|
"loss": 0.2877, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.1032890940565494, |
|
"grad_norm": 0.45375611915195996, |
|
"learning_rate": 1.0845075486654037e-05, |
|
"loss": 0.261, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 2.106174264281593, |
|
"grad_norm": 0.4267355049796937, |
|
"learning_rate": 1.0819981821802053e-05, |
|
"loss": 0.2786, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.109059434506636, |
|
"grad_norm": 0.4376492488121602, |
|
"learning_rate": 1.0794882957552722e-05, |
|
"loss": 0.2766, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 2.111944604731679, |
|
"grad_norm": 0.45029828413170214, |
|
"learning_rate": 1.0769779053054651e-05, |
|
"loss": 0.283, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.1148297749567226, |
|
"grad_norm": 0.43071600979778896, |
|
"learning_rate": 1.0744670267488417e-05, |
|
"loss": 0.2848, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 2.1177149451817656, |
|
"grad_norm": 0.3970843751114558, |
|
"learning_rate": 1.071955676006554e-05, |
|
"loss": 0.2762, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.120600115406809, |
|
"grad_norm": 0.38320271641559517, |
|
"learning_rate": 1.0694438690027475e-05, |
|
"loss": 0.2819, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.1234852856318525, |
|
"grad_norm": 0.4529791521783765, |
|
"learning_rate": 1.0669316216644623e-05, |
|
"loss": 0.266, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.1263704558568954, |
|
"grad_norm": 0.42862083344692714, |
|
"learning_rate": 1.0644189499215292e-05, |
|
"loss": 0.2765, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 2.129255626081939, |
|
"grad_norm": 0.43384197829762733, |
|
"learning_rate": 1.061905869706471e-05, |
|
"loss": 0.3061, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.1321407963069823, |
|
"grad_norm": 0.4224346600569041, |
|
"learning_rate": 1.0593923969544e-05, |
|
"loss": 0.3003, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 2.1350259665320253, |
|
"grad_norm": 0.4332701906364038, |
|
"learning_rate": 1.056878547602918e-05, |
|
"loss": 0.2765, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.1379111367570687, |
|
"grad_norm": 0.4393240656228905, |
|
"learning_rate": 1.0543643375920145e-05, |
|
"loss": 0.2879, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 2.140796306982112, |
|
"grad_norm": 0.4036822061189937, |
|
"learning_rate": 1.0518497828639658e-05, |
|
"loss": 0.27, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.143681477207155, |
|
"grad_norm": 0.427794972829589, |
|
"learning_rate": 1.0493348993632337e-05, |
|
"loss": 0.2749, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 2.1465666474321985, |
|
"grad_norm": 0.39124739436748485, |
|
"learning_rate": 1.046819703036366e-05, |
|
"loss": 0.2718, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.149451817657242, |
|
"grad_norm": 0.4099521410733268, |
|
"learning_rate": 1.0443042098318927e-05, |
|
"loss": 0.2733, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.152336987882285, |
|
"grad_norm": 0.4225255601400179, |
|
"learning_rate": 1.0417884357002268e-05, |
|
"loss": 0.2773, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.1552221581073283, |
|
"grad_norm": 0.43412449015519566, |
|
"learning_rate": 1.039272396593563e-05, |
|
"loss": 0.2544, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 2.1581073283323717, |
|
"grad_norm": 0.414617071495182, |
|
"learning_rate": 1.0367561084657752e-05, |
|
"loss": 0.2672, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.1609924985574147, |
|
"grad_norm": 0.4451465025534693, |
|
"learning_rate": 1.0342395872723173e-05, |
|
"loss": 0.2597, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 2.163877668782458, |
|
"grad_norm": 0.4052440261347642, |
|
"learning_rate": 1.03172284897012e-05, |
|
"loss": 0.2758, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.1667628390075016, |
|
"grad_norm": 0.4393651404130225, |
|
"learning_rate": 1.0292059095174923e-05, |
|
"loss": 0.281, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 2.1696480092325445, |
|
"grad_norm": 0.43033068987713674, |
|
"learning_rate": 1.026688784874017e-05, |
|
"loss": 0.2839, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.172533179457588, |
|
"grad_norm": 0.4187797584558774, |
|
"learning_rate": 1.024171491000452e-05, |
|
"loss": 0.2955, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 2.1754183496826314, |
|
"grad_norm": 0.4306833753003216, |
|
"learning_rate": 1.021654043858628e-05, |
|
"loss": 0.268, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.1783035199076743, |
|
"grad_norm": 0.46354342504920265, |
|
"learning_rate": 1.0191364594113475e-05, |
|
"loss": 0.2753, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.1811886901327178, |
|
"grad_norm": 0.4667906762400645, |
|
"learning_rate": 1.0166187536222844e-05, |
|
"loss": 0.2826, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.184073860357761, |
|
"grad_norm": 0.4268396541209257, |
|
"learning_rate": 1.0141009424558803e-05, |
|
"loss": 0.2594, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 2.1869590305828046, |
|
"grad_norm": 0.42773226782347973, |
|
"learning_rate": 1.0115830418772474e-05, |
|
"loss": 0.2815, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.1898442008078476, |
|
"grad_norm": 0.42670690759544905, |
|
"learning_rate": 1.0090650678520624e-05, |
|
"loss": 0.2884, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 2.192729371032891, |
|
"grad_norm": 0.43803219635914387, |
|
"learning_rate": 1.0065470363464694e-05, |
|
"loss": 0.2789, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.1956145412579344, |
|
"grad_norm": 0.4799532890386973, |
|
"learning_rate": 1.0040289633269764e-05, |
|
"loss": 0.2945, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 2.1984997114829774, |
|
"grad_norm": 0.4207961621120341, |
|
"learning_rate": 1.0015108647603545e-05, |
|
"loss": 0.2665, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.201384881708021, |
|
"grad_norm": 0.43772623733868893, |
|
"learning_rate": 9.98992756613537e-06, |
|
"loss": 0.2791, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 2.2042700519330642, |
|
"grad_norm": 0.4246990837045649, |
|
"learning_rate": 9.96474654853518e-06, |
|
"loss": 0.2702, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.207155222158107, |
|
"grad_norm": 0.4214383786492429, |
|
"learning_rate": 9.93956575447251e-06, |
|
"loss": 0.2724, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.2100403923831506, |
|
"grad_norm": 0.42698570251672047, |
|
"learning_rate": 9.914385343615476e-06, |
|
"loss": 0.2714, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.212925562608194, |
|
"grad_norm": 0.4504000608499869, |
|
"learning_rate": 9.889205475629763e-06, |
|
"loss": 0.2754, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 2.215810732833237, |
|
"grad_norm": 0.4323598292397766, |
|
"learning_rate": 9.864026310177621e-06, |
|
"loss": 0.2624, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.2186959030582805, |
|
"grad_norm": 0.40922887326049673, |
|
"learning_rate": 9.83884800691684e-06, |
|
"loss": 0.2633, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 2.221581073283324, |
|
"grad_norm": 0.4357930415869159, |
|
"learning_rate": 9.813670725499745e-06, |
|
"loss": 0.2904, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.224466243508367, |
|
"grad_norm": 0.47764591829975994, |
|
"learning_rate": 9.788494625572177e-06, |
|
"loss": 0.2685, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 2.2273514137334103, |
|
"grad_norm": 0.4306543758497723, |
|
"learning_rate": 9.763319866772486e-06, |
|
"loss": 0.2757, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.2302365839584537, |
|
"grad_norm": 0.451979701648703, |
|
"learning_rate": 9.738146608730527e-06, |
|
"loss": 0.281, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 2.2331217541834967, |
|
"grad_norm": 0.39863721704621635, |
|
"learning_rate": 9.712975011066633e-06, |
|
"loss": 0.2651, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.23600692440854, |
|
"grad_norm": 0.4431702895497509, |
|
"learning_rate": 9.687805233390602e-06, |
|
"loss": 0.2875, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.2388920946335835, |
|
"grad_norm": 0.42463992002267936, |
|
"learning_rate": 9.662637435300702e-06, |
|
"loss": 0.2747, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.2417772648586265, |
|
"grad_norm": 0.4421016875554418, |
|
"learning_rate": 9.637471776382642e-06, |
|
"loss": 0.3014, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 2.24466243508367, |
|
"grad_norm": 0.4309775900999432, |
|
"learning_rate": 9.612308416208573e-06, |
|
"loss": 0.3009, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.2475476053087133, |
|
"grad_norm": 0.38604226765745275, |
|
"learning_rate": 9.587147514336068e-06, |
|
"loss": 0.2784, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 2.2504327755337563, |
|
"grad_norm": 0.4812969089447562, |
|
"learning_rate": 9.561989230307106e-06, |
|
"loss": 0.2768, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.2533179457587997, |
|
"grad_norm": 0.42601904264725454, |
|
"learning_rate": 9.536833723647073e-06, |
|
"loss": 0.2797, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 2.256203115983843, |
|
"grad_norm": 0.4357929404581439, |
|
"learning_rate": 9.51168115386374e-06, |
|
"loss": 0.2785, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.259088286208886, |
|
"grad_norm": 0.41501335099687836, |
|
"learning_rate": 9.486531680446266e-06, |
|
"loss": 0.2674, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 2.2619734564339296, |
|
"grad_norm": 0.4259900039138935, |
|
"learning_rate": 9.46138546286416e-06, |
|
"loss": 0.2659, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.264858626658973, |
|
"grad_norm": 0.4619671751542806, |
|
"learning_rate": 9.436242660566299e-06, |
|
"loss": 0.266, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.267743796884016, |
|
"grad_norm": 0.4284866042642329, |
|
"learning_rate": 9.411103432979895e-06, |
|
"loss": 0.277, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.2706289671090594, |
|
"grad_norm": 0.4405127132258486, |
|
"learning_rate": 9.385967939509503e-06, |
|
"loss": 0.2639, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 2.273514137334103, |
|
"grad_norm": 0.41142311165199374, |
|
"learning_rate": 9.360836339535997e-06, |
|
"loss": 0.2898, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.2763993075591458, |
|
"grad_norm": 0.41387485994040385, |
|
"learning_rate": 9.335708792415554e-06, |
|
"loss": 0.2831, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 2.279284477784189, |
|
"grad_norm": 0.4321573127781368, |
|
"learning_rate": 9.310585457478665e-06, |
|
"loss": 0.2667, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.2821696480092326, |
|
"grad_norm": 0.4184357007573235, |
|
"learning_rate": 9.285466494029103e-06, |
|
"loss": 0.2725, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 2.2850548182342756, |
|
"grad_norm": 0.4338603810054309, |
|
"learning_rate": 9.260352061342928e-06, |
|
"loss": 0.2788, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.287939988459319, |
|
"grad_norm": 0.5970591619349905, |
|
"learning_rate": 9.235242318667473e-06, |
|
"loss": 0.2679, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 2.2908251586843624, |
|
"grad_norm": 0.41404764849303466, |
|
"learning_rate": 9.21013742522032e-06, |
|
"loss": 0.2638, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.293710328909406, |
|
"grad_norm": 0.44951250598474296, |
|
"learning_rate": 9.185037540188319e-06, |
|
"loss": 0.2674, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.296595499134449, |
|
"grad_norm": 0.42037037135886823, |
|
"learning_rate": 9.159942822726539e-06, |
|
"loss": 0.2713, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.2994806693594922, |
|
"grad_norm": 0.42502356412209286, |
|
"learning_rate": 9.134853431957316e-06, |
|
"loss": 0.2648, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 2.3023658395845357, |
|
"grad_norm": 0.4216737901866948, |
|
"learning_rate": 9.109769526969179e-06, |
|
"loss": 0.2724, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.3052510098095786, |
|
"grad_norm": 0.4167455267572832, |
|
"learning_rate": 9.084691266815888e-06, |
|
"loss": 0.2766, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 2.308136180034622, |
|
"grad_norm": 0.45897408948064594, |
|
"learning_rate": 9.059618810515405e-06, |
|
"loss": 0.2781, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.3110213502596655, |
|
"grad_norm": 0.42036299610150296, |
|
"learning_rate": 9.034552317048895e-06, |
|
"loss": 0.2707, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 2.3139065204847085, |
|
"grad_norm": 0.4277783623427705, |
|
"learning_rate": 9.009491945359711e-06, |
|
"loss": 0.2832, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.316791690709752, |
|
"grad_norm": 0.421315487309371, |
|
"learning_rate": 8.984437854352384e-06, |
|
"loss": 0.2754, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 2.3196768609347953, |
|
"grad_norm": 0.4657256225278301, |
|
"learning_rate": 8.959390202891625e-06, |
|
"loss": 0.2912, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.3225620311598383, |
|
"grad_norm": 0.4272698374442868, |
|
"learning_rate": 8.93434914980131e-06, |
|
"loss": 0.2662, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.3254472013848817, |
|
"grad_norm": 0.4198417527704976, |
|
"learning_rate": 8.909314853863483e-06, |
|
"loss": 0.2727, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.328332371609925, |
|
"grad_norm": 0.5124921390099292, |
|
"learning_rate": 8.884287473817332e-06, |
|
"loss": 0.2708, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 2.331217541834968, |
|
"grad_norm": 0.42269744019953115, |
|
"learning_rate": 8.859267168358198e-06, |
|
"loss": 0.2745, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.3341027120600115, |
|
"grad_norm": 0.42318242166255127, |
|
"learning_rate": 8.834254096136561e-06, |
|
"loss": 0.2949, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 2.336987882285055, |
|
"grad_norm": 0.419851745283628, |
|
"learning_rate": 8.809248415757028e-06, |
|
"loss": 0.2795, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.339873052510098, |
|
"grad_norm": 0.44233002428351004, |
|
"learning_rate": 8.784250285777361e-06, |
|
"loss": 0.2719, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 2.3427582227351413, |
|
"grad_norm": 0.396083210912282, |
|
"learning_rate": 8.759259864707415e-06, |
|
"loss": 0.2697, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.3456433929601848, |
|
"grad_norm": 0.47094149055802703, |
|
"learning_rate": 8.73427731100818e-06, |
|
"loss": 0.2724, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 2.3485285631852277, |
|
"grad_norm": 0.42603022428645243, |
|
"learning_rate": 8.70930278309076e-06, |
|
"loss": 0.2654, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.351413733410271, |
|
"grad_norm": 0.4379509456011344, |
|
"learning_rate": 8.684336439315362e-06, |
|
"loss": 0.2786, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.3542989036353146, |
|
"grad_norm": 0.426972682822357, |
|
"learning_rate": 8.659378437990305e-06, |
|
"loss": 0.2852, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.357184073860358, |
|
"grad_norm": 0.42210417306175224, |
|
"learning_rate": 8.634428937371008e-06, |
|
"loss": 0.2777, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 2.360069244085401, |
|
"grad_norm": 0.4490344314229441, |
|
"learning_rate": 8.609488095658987e-06, |
|
"loss": 0.2732, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.3629544143104444, |
|
"grad_norm": 0.44262847306870384, |
|
"learning_rate": 8.584556071000845e-06, |
|
"loss": 0.2724, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 2.365839584535488, |
|
"grad_norm": 0.43681619099011737, |
|
"learning_rate": 8.559633021487298e-06, |
|
"loss": 0.291, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.368724754760531, |
|
"grad_norm": 0.669470605388525, |
|
"learning_rate": 8.53471910515213e-06, |
|
"loss": 0.29, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 2.371609924985574, |
|
"grad_norm": 0.4872795075157814, |
|
"learning_rate": 8.509814479971226e-06, |
|
"loss": 0.293, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.3744950952106176, |
|
"grad_norm": 0.4330590098492678, |
|
"learning_rate": 8.484919303861555e-06, |
|
"loss": 0.2794, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 2.3773802654356606, |
|
"grad_norm": 0.42534287206042853, |
|
"learning_rate": 8.460033734680158e-06, |
|
"loss": 0.2766, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.380265435660704, |
|
"grad_norm": 0.43174048146055444, |
|
"learning_rate": 8.43515793022318e-06, |
|
"loss": 0.2797, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.3831506058857475, |
|
"grad_norm": 0.4429583272831574, |
|
"learning_rate": 8.410292048224838e-06, |
|
"loss": 0.269, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.3860357761107904, |
|
"grad_norm": 0.41491168931306777, |
|
"learning_rate": 8.38543624635643e-06, |
|
"loss": 0.2771, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 2.388920946335834, |
|
"grad_norm": 0.4490843763025565, |
|
"learning_rate": 8.360590682225345e-06, |
|
"loss": 0.2775, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.3918061165608773, |
|
"grad_norm": 0.4162339442858775, |
|
"learning_rate": 8.335755513374044e-06, |
|
"loss": 0.2725, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 2.3946912867859202, |
|
"grad_norm": 0.3966357439538777, |
|
"learning_rate": 8.310930897279088e-06, |
|
"loss": 0.2824, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.3975764570109637, |
|
"grad_norm": 0.43243463473106125, |
|
"learning_rate": 8.286116991350113e-06, |
|
"loss": 0.273, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 2.400461627236007, |
|
"grad_norm": 0.40628353100856796, |
|
"learning_rate": 8.261313952928853e-06, |
|
"loss": 0.2697, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.40334679746105, |
|
"grad_norm": 0.41821368197595044, |
|
"learning_rate": 8.236521939288116e-06, |
|
"loss": 0.2722, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 2.4062319676860935, |
|
"grad_norm": 0.43315603245507267, |
|
"learning_rate": 8.211741107630817e-06, |
|
"loss": 0.2811, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.409117137911137, |
|
"grad_norm": 0.4192983792279987, |
|
"learning_rate": 8.186971615088967e-06, |
|
"loss": 0.27, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.41200230813618, |
|
"grad_norm": 0.4228869690317952, |
|
"learning_rate": 8.162213618722679e-06, |
|
"loss": 0.2936, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.4148874783612233, |
|
"grad_norm": 0.4264461059873532, |
|
"learning_rate": 8.137467275519156e-06, |
|
"loss": 0.255, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 2.4177726485862667, |
|
"grad_norm": 0.42183959270214744, |
|
"learning_rate": 8.112732742391722e-06, |
|
"loss": 0.265, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.4206578188113097, |
|
"grad_norm": 0.440409389917108, |
|
"learning_rate": 8.088010176178816e-06, |
|
"loss": 0.2895, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 2.423542989036353, |
|
"grad_norm": 0.4190921915499105, |
|
"learning_rate": 8.063299733642991e-06, |
|
"loss": 0.268, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.4264281592613965, |
|
"grad_norm": 0.4370598207669186, |
|
"learning_rate": 8.03860157146993e-06, |
|
"loss": 0.2764, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 2.4293133294864395, |
|
"grad_norm": 0.4607794338360012, |
|
"learning_rate": 8.013915846267439e-06, |
|
"loss": 0.2863, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.432198499711483, |
|
"grad_norm": 0.4378000785386685, |
|
"learning_rate": 7.98924271456447e-06, |
|
"loss": 0.2747, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 2.4350836699365264, |
|
"grad_norm": 0.4598949217946755, |
|
"learning_rate": 7.964582332810122e-06, |
|
"loss": 0.2756, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.4379688401615693, |
|
"grad_norm": 0.4305567337108496, |
|
"learning_rate": 7.939934857372646e-06, |
|
"loss": 0.2734, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.4408540103866128, |
|
"grad_norm": 0.42896614513275355, |
|
"learning_rate": 7.915300444538458e-06, |
|
"loss": 0.2704, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.443739180611656, |
|
"grad_norm": 0.4104247108771218, |
|
"learning_rate": 7.89067925051114e-06, |
|
"loss": 0.2683, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 2.446624350836699, |
|
"grad_norm": 0.42265110955709223, |
|
"learning_rate": 7.866071431410458e-06, |
|
"loss": 0.2834, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.4495095210617426, |
|
"grad_norm": 0.4025030251156565, |
|
"learning_rate": 7.841477143271374e-06, |
|
"loss": 0.2719, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 2.452394691286786, |
|
"grad_norm": 0.4347245013374062, |
|
"learning_rate": 7.816896542043048e-06, |
|
"loss": 0.2849, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.455279861511829, |
|
"grad_norm": 0.3970293249450069, |
|
"learning_rate": 7.792329783587853e-06, |
|
"loss": 0.281, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 2.4581650317368724, |
|
"grad_norm": 0.5089714627565546, |
|
"learning_rate": 7.767777023680384e-06, |
|
"loss": 0.2886, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.461050201961916, |
|
"grad_norm": 0.42036661292632566, |
|
"learning_rate": 7.743238418006476e-06, |
|
"loss": 0.274, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 2.463935372186959, |
|
"grad_norm": 0.41995706488380274, |
|
"learning_rate": 7.71871412216222e-06, |
|
"loss": 0.2834, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.466820542412002, |
|
"grad_norm": 0.405500972649185, |
|
"learning_rate": 7.694204291652962e-06, |
|
"loss": 0.2723, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.4697057126370456, |
|
"grad_norm": 0.47474988784176503, |
|
"learning_rate": 7.669709081892324e-06, |
|
"loss": 0.276, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.472590882862089, |
|
"grad_norm": 0.4208990452736956, |
|
"learning_rate": 7.645228648201225e-06, |
|
"loss": 0.2686, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 2.475476053087132, |
|
"grad_norm": 0.5125940516643855, |
|
"learning_rate": 7.62076314580689e-06, |
|
"loss": 0.2791, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.4783612233121755, |
|
"grad_norm": 0.43397963005803125, |
|
"learning_rate": 7.596312729841868e-06, |
|
"loss": 0.2714, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 2.481246393537219, |
|
"grad_norm": 0.43916382261638937, |
|
"learning_rate": 7.571877555343039e-06, |
|
"loss": 0.2953, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.484131563762262, |
|
"grad_norm": 0.42968851926015045, |
|
"learning_rate": 7.547457777250649e-06, |
|
"loss": 0.2915, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 2.4870167339873053, |
|
"grad_norm": 0.4334658833975296, |
|
"learning_rate": 7.523053550407308e-06, |
|
"loss": 0.2781, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.4899019042123487, |
|
"grad_norm": 0.442518451619476, |
|
"learning_rate": 7.498665029557028e-06, |
|
"loss": 0.2795, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 2.4927870744373917, |
|
"grad_norm": 0.4381590289442612, |
|
"learning_rate": 7.474292369344226e-06, |
|
"loss": 0.2641, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.495672244662435, |
|
"grad_norm": 0.44001237345403327, |
|
"learning_rate": 7.449935724312745e-06, |
|
"loss": 0.2688, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.4985574148874785, |
|
"grad_norm": 0.43261375909491934, |
|
"learning_rate": 7.42559524890488e-06, |
|
"loss": 0.2791, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.5014425851125215, |
|
"grad_norm": 0.41821164862411353, |
|
"learning_rate": 7.401271097460401e-06, |
|
"loss": 0.2753, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 2.504327755337565, |
|
"grad_norm": 0.4284156665016015, |
|
"learning_rate": 7.376963424215568e-06, |
|
"loss": 0.2916, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.5072129255626083, |
|
"grad_norm": 0.42595494161348285, |
|
"learning_rate": 7.352672383302153e-06, |
|
"loss": 0.2587, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 2.5100980957876513, |
|
"grad_norm": 0.4234233587645987, |
|
"learning_rate": 7.328398128746463e-06, |
|
"loss": 0.2723, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.5129832660126947, |
|
"grad_norm": 0.4206786678316223, |
|
"learning_rate": 7.304140814468365e-06, |
|
"loss": 0.2737, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 2.515868436237738, |
|
"grad_norm": 0.4421492786532555, |
|
"learning_rate": 7.27990059428032e-06, |
|
"loss": 0.2871, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.5187536064627816, |
|
"grad_norm": 0.4071794979099346, |
|
"learning_rate": 7.255677621886388e-06, |
|
"loss": 0.2669, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 2.5216387766878245, |
|
"grad_norm": 0.4085099532665915, |
|
"learning_rate": 7.23147205088126e-06, |
|
"loss": 0.2749, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.524523946912868, |
|
"grad_norm": 0.4540968719545943, |
|
"learning_rate": 7.207284034749294e-06, |
|
"loss": 0.2784, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.5274091171379114, |
|
"grad_norm": 0.43868950738012735, |
|
"learning_rate": 7.183113726863529e-06, |
|
"loss": 0.2817, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.5302942873629544, |
|
"grad_norm": 0.5603667780494723, |
|
"learning_rate": 7.15896128048473e-06, |
|
"loss": 0.2779, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 2.533179457587998, |
|
"grad_norm": 0.4643684780575487, |
|
"learning_rate": 7.134826848760392e-06, |
|
"loss": 0.2704, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.536064627813041, |
|
"grad_norm": 0.4311580857729655, |
|
"learning_rate": 7.110710584723785e-06, |
|
"loss": 0.2773, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 2.538949798038084, |
|
"grad_norm": 0.41665610893096905, |
|
"learning_rate": 7.086612641292985e-06, |
|
"loss": 0.2697, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.5418349682631276, |
|
"grad_norm": 0.4448150891591054, |
|
"learning_rate": 7.062533171269887e-06, |
|
"loss": 0.2679, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 2.544720138488171, |
|
"grad_norm": 0.4485899811392423, |
|
"learning_rate": 7.038472327339268e-06, |
|
"loss": 0.2624, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.547605308713214, |
|
"grad_norm": 0.4694083142624704, |
|
"learning_rate": 7.0144302620677804e-06, |
|
"loss": 0.2671, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 2.5504904789382574, |
|
"grad_norm": 0.44085510653638454, |
|
"learning_rate": 6.990407127903015e-06, |
|
"loss": 0.2741, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.553375649163301, |
|
"grad_norm": 0.4331688809785418, |
|
"learning_rate": 6.966403077172521e-06, |
|
"loss": 0.2784, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.556260819388344, |
|
"grad_norm": 0.42326748215143545, |
|
"learning_rate": 6.942418262082833e-06, |
|
"loss": 0.2803, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.5591459896133872, |
|
"grad_norm": 0.4579702510573031, |
|
"learning_rate": 6.91845283471853e-06, |
|
"loss": 0.2629, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 2.5620311598384307, |
|
"grad_norm": 0.41574752352195643, |
|
"learning_rate": 6.894506947041246e-06, |
|
"loss": 0.2606, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.5649163300634736, |
|
"grad_norm": 0.4365002129279704, |
|
"learning_rate": 6.870580750888722e-06, |
|
"loss": 0.2724, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 2.567801500288517, |
|
"grad_norm": 0.4399394455174853, |
|
"learning_rate": 6.846674397973825e-06, |
|
"loss": 0.2649, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.5706866705135605, |
|
"grad_norm": 0.447371975699018, |
|
"learning_rate": 6.822788039883621e-06, |
|
"loss": 0.2684, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 2.5735718407386035, |
|
"grad_norm": 0.3961735893626953, |
|
"learning_rate": 6.7989218280783686e-06, |
|
"loss": 0.2587, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.576457010963647, |
|
"grad_norm": 0.44985375940450545, |
|
"learning_rate": 6.775075913890597e-06, |
|
"loss": 0.2613, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 2.5793421811886903, |
|
"grad_norm": 0.4488817173284117, |
|
"learning_rate": 6.751250448524123e-06, |
|
"loss": 0.2814, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.5822273514137333, |
|
"grad_norm": 0.5139233580575341, |
|
"learning_rate": 6.727445583053096e-06, |
|
"loss": 0.2707, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.5851125216387767, |
|
"grad_norm": 0.4742477912373411, |
|
"learning_rate": 6.703661468421065e-06, |
|
"loss": 0.2971, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.58799769186382, |
|
"grad_norm": 0.4354163078529058, |
|
"learning_rate": 6.679898255439974e-06, |
|
"loss": 0.2796, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 2.590882862088863, |
|
"grad_norm": 0.42132555668081595, |
|
"learning_rate": 6.656156094789253e-06, |
|
"loss": 0.2844, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.5937680323139065, |
|
"grad_norm": 0.43721072545918765, |
|
"learning_rate": 6.632435137014823e-06, |
|
"loss": 0.2778, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 2.59665320253895, |
|
"grad_norm": 0.405910083382628, |
|
"learning_rate": 6.608735532528178e-06, |
|
"loss": 0.2977, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.599538372763993, |
|
"grad_norm": 0.41260341077774265, |
|
"learning_rate": 6.585057431605406e-06, |
|
"loss": 0.2686, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 2.6024235429890363, |
|
"grad_norm": 0.44802578482097516, |
|
"learning_rate": 6.561400984386243e-06, |
|
"loss": 0.2809, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.6053087132140798, |
|
"grad_norm": 0.4321051913017041, |
|
"learning_rate": 6.537766340873125e-06, |
|
"loss": 0.2842, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 2.6081938834391227, |
|
"grad_norm": 0.4775646961550856, |
|
"learning_rate": 6.514153650930228e-06, |
|
"loss": 0.2702, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.611079053664166, |
|
"grad_norm": 0.4330851918875865, |
|
"learning_rate": 6.4905630642825245e-06, |
|
"loss": 0.261, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.6139642238892096, |
|
"grad_norm": 0.46362229239200453, |
|
"learning_rate": 6.466994730514842e-06, |
|
"loss": 0.2815, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.6168493941142525, |
|
"grad_norm": 0.41520002516181664, |
|
"learning_rate": 6.443448799070895e-06, |
|
"loss": 0.2786, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 2.619734564339296, |
|
"grad_norm": 0.45005348838742215, |
|
"learning_rate": 6.419925419252353e-06, |
|
"loss": 0.2758, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.6226197345643394, |
|
"grad_norm": 0.56725726849741, |
|
"learning_rate": 6.39642474021788e-06, |
|
"loss": 0.2641, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 2.6255049047893824, |
|
"grad_norm": 0.49336912135631245, |
|
"learning_rate": 6.3729469109822115e-06, |
|
"loss": 0.2803, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.628390075014426, |
|
"grad_norm": 0.4407248177296093, |
|
"learning_rate": 6.349492080415187e-06, |
|
"loss": 0.2849, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 2.631275245239469, |
|
"grad_norm": 0.41465469511645586, |
|
"learning_rate": 6.326060397240814e-06, |
|
"loss": 0.275, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.634160415464512, |
|
"grad_norm": 0.4463897975975723, |
|
"learning_rate": 6.3026520100363276e-06, |
|
"loss": 0.2763, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 2.6370455856895556, |
|
"grad_norm": 0.4179248454832217, |
|
"learning_rate": 6.279267067231242e-06, |
|
"loss": 0.2759, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.639930755914599, |
|
"grad_norm": 0.4482183921648813, |
|
"learning_rate": 6.255905717106421e-06, |
|
"loss": 0.2809, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.642815926139642, |
|
"grad_norm": 0.45808207591686173, |
|
"learning_rate": 6.232568107793127e-06, |
|
"loss": 0.2891, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.6457010963646854, |
|
"grad_norm": 0.43079637132281384, |
|
"learning_rate": 6.209254387272085e-06, |
|
"loss": 0.2846, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 2.648586266589729, |
|
"grad_norm": 0.43896911522538273, |
|
"learning_rate": 6.185964703372538e-06, |
|
"loss": 0.2709, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.651471436814772, |
|
"grad_norm": 0.41937040243151874, |
|
"learning_rate": 6.162699203771324e-06, |
|
"loss": 0.2698, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 2.6543566070398152, |
|
"grad_norm": 0.436960743544877, |
|
"learning_rate": 6.139458035991932e-06, |
|
"loss": 0.2571, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.6572417772648587, |
|
"grad_norm": 0.43802501221211554, |
|
"learning_rate": 6.116241347403564e-06, |
|
"loss": 0.2845, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 2.6601269474899016, |
|
"grad_norm": 0.4586510827028596, |
|
"learning_rate": 6.093049285220198e-06, |
|
"loss": 0.2715, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.663012117714945, |
|
"grad_norm": 0.4206625935899696, |
|
"learning_rate": 6.069881996499664e-06, |
|
"loss": 0.2725, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 2.6658972879399885, |
|
"grad_norm": 0.4183330676839759, |
|
"learning_rate": 6.046739628142705e-06, |
|
"loss": 0.2732, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.668782458165032, |
|
"grad_norm": 0.4380908491163701, |
|
"learning_rate": 6.023622326892051e-06, |
|
"loss": 0.2763, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.671667628390075, |
|
"grad_norm": 0.44598178634221547, |
|
"learning_rate": 6.000530239331481e-06, |
|
"loss": 0.2794, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.6745527986151183, |
|
"grad_norm": 0.43851972482947554, |
|
"learning_rate": 5.977463511884898e-06, |
|
"loss": 0.2679, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 2.6774379688401617, |
|
"grad_norm": 0.4173251500861273, |
|
"learning_rate": 5.954422290815396e-06, |
|
"loss": 0.2757, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.6803231390652047, |
|
"grad_norm": 0.42797667265485484, |
|
"learning_rate": 5.931406722224344e-06, |
|
"loss": 0.2673, |
|
"step": 4645 |
|
}, |
|
{ |
|
"epoch": 2.683208309290248, |
|
"grad_norm": 0.41916940809600545, |
|
"learning_rate": 5.908416952050453e-06, |
|
"loss": 0.2587, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.6860934795152915, |
|
"grad_norm": 0.40342454442056763, |
|
"learning_rate": 5.885453126068836e-06, |
|
"loss": 0.2575, |
|
"step": 4655 |
|
}, |
|
{ |
|
"epoch": 2.6889786497403345, |
|
"grad_norm": 0.42304042640556994, |
|
"learning_rate": 5.862515389890111e-06, |
|
"loss": 0.2743, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.691863819965378, |
|
"grad_norm": 0.4459071254483481, |
|
"learning_rate": 5.839603888959455e-06, |
|
"loss": 0.2764, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 2.6947489901904214, |
|
"grad_norm": 0.41434533982836086, |
|
"learning_rate": 5.8167187685557045e-06, |
|
"loss": 0.2631, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.6976341604154648, |
|
"grad_norm": 0.44266182668844456, |
|
"learning_rate": 5.7938601737904e-06, |
|
"loss": 0.2709, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.7005193306405078, |
|
"grad_norm": 0.4225234227671778, |
|
"learning_rate": 5.771028249606906e-06, |
|
"loss": 0.2736, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.703404500865551, |
|
"grad_norm": 0.43317051916217664, |
|
"learning_rate": 5.74822314077946e-06, |
|
"loss": 0.2812, |
|
"step": 4685 |
|
}, |
|
{ |
|
"epoch": 2.7062896710905946, |
|
"grad_norm": 0.4224513310713421, |
|
"learning_rate": 5.725444991912266e-06, |
|
"loss": 0.2657, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.7091748413156376, |
|
"grad_norm": 0.4314258773445562, |
|
"learning_rate": 5.702693947438589e-06, |
|
"loss": 0.271, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 2.712060011540681, |
|
"grad_norm": 0.4422321485260852, |
|
"learning_rate": 5.679970151619823e-06, |
|
"loss": 0.2711, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.7149451817657244, |
|
"grad_norm": 0.44035975780616426, |
|
"learning_rate": 5.6572737485445804e-06, |
|
"loss": 0.2874, |
|
"step": 4705 |
|
}, |
|
{ |
|
"epoch": 2.7178303519907674, |
|
"grad_norm": 0.44924301832440483, |
|
"learning_rate": 5.634604882127771e-06, |
|
"loss": 0.2703, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.720715522215811, |
|
"grad_norm": 0.4107307992041668, |
|
"learning_rate": 5.611963696109726e-06, |
|
"loss": 0.2729, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 2.7236006924408542, |
|
"grad_norm": 0.4372366602675938, |
|
"learning_rate": 5.58935033405523e-06, |
|
"loss": 0.276, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.726485862665897, |
|
"grad_norm": 0.45334235025453323, |
|
"learning_rate": 5.5667649393526535e-06, |
|
"loss": 0.2801, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.7293710328909406, |
|
"grad_norm": 0.4201853295737695, |
|
"learning_rate": 5.544207655213021e-06, |
|
"loss": 0.2831, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.732256203115984, |
|
"grad_norm": 0.44159486930994, |
|
"learning_rate": 5.521678624669121e-06, |
|
"loss": 0.2644, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 2.735141373341027, |
|
"grad_norm": 0.45477978259519153, |
|
"learning_rate": 5.499177990574591e-06, |
|
"loss": 0.2856, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.7380265435660704, |
|
"grad_norm": 0.4329706475468657, |
|
"learning_rate": 5.4767058956029955e-06, |
|
"loss": 0.2757, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 2.740911713791114, |
|
"grad_norm": 0.46147021358014734, |
|
"learning_rate": 5.454262482246954e-06, |
|
"loss": 0.2816, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.743796884016157, |
|
"grad_norm": 0.4661777717367274, |
|
"learning_rate": 5.431847892817208e-06, |
|
"loss": 0.2715, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 2.7466820542412003, |
|
"grad_norm": 0.42606918880864486, |
|
"learning_rate": 5.40946226944173e-06, |
|
"loss": 0.2785, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.7495672244662437, |
|
"grad_norm": 0.44527079904346323, |
|
"learning_rate": 5.387105754064829e-06, |
|
"loss": 0.2776, |
|
"step": 4765 |
|
}, |
|
{ |
|
"epoch": 2.7524523946912867, |
|
"grad_norm": 0.4377246196123211, |
|
"learning_rate": 5.364778488446247e-06, |
|
"loss": 0.2753, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.75533756491633, |
|
"grad_norm": 0.44386684339819055, |
|
"learning_rate": 5.342480614160247e-06, |
|
"loss": 0.2859, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.7582227351413735, |
|
"grad_norm": 0.4235638405047958, |
|
"learning_rate": 5.320212272594725e-06, |
|
"loss": 0.2811, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.7611079053664165, |
|
"grad_norm": 0.45140518863698265, |
|
"learning_rate": 5.297973604950321e-06, |
|
"loss": 0.2704, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 2.76399307559146, |
|
"grad_norm": 0.4079388286223738, |
|
"learning_rate": 5.275764752239523e-06, |
|
"loss": 0.2625, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.7668782458165033, |
|
"grad_norm": 0.41869006995290337, |
|
"learning_rate": 5.2535858552857474e-06, |
|
"loss": 0.2603, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 2.7697634160415463, |
|
"grad_norm": 0.4333081525779454, |
|
"learning_rate": 5.231437054722477e-06, |
|
"loss": 0.2908, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.7726485862665897, |
|
"grad_norm": 0.46442418499425026, |
|
"learning_rate": 5.209318490992355e-06, |
|
"loss": 0.2711, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 2.775533756491633, |
|
"grad_norm": 0.42235720398392995, |
|
"learning_rate": 5.187230304346304e-06, |
|
"loss": 0.2874, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.778418926716676, |
|
"grad_norm": 0.42546995948748306, |
|
"learning_rate": 5.1651726348426146e-06, |
|
"loss": 0.2845, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 2.7813040969417195, |
|
"grad_norm": 0.4458920282497016, |
|
"learning_rate": 5.143145622346089e-06, |
|
"loss": 0.2656, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.784189267166763, |
|
"grad_norm": 0.4140358163182573, |
|
"learning_rate": 5.121149406527123e-06, |
|
"loss": 0.2674, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.787074437391806, |
|
"grad_norm": 0.4300310193339041, |
|
"learning_rate": 5.099184126860837e-06, |
|
"loss": 0.266, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.7899596076168494, |
|
"grad_norm": 0.8717864481697682, |
|
"learning_rate": 5.077249922626197e-06, |
|
"loss": 0.2794, |
|
"step": 4835 |
|
}, |
|
{ |
|
"epoch": 2.792844777841893, |
|
"grad_norm": 0.43179815522266113, |
|
"learning_rate": 5.055346932905125e-06, |
|
"loss": 0.2821, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.7957299480669358, |
|
"grad_norm": 0.438127032880629, |
|
"learning_rate": 5.033475296581606e-06, |
|
"loss": 0.2749, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 2.798615118291979, |
|
"grad_norm": 0.42678461004761115, |
|
"learning_rate": 5.011635152340816e-06, |
|
"loss": 0.2724, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.8015002885170226, |
|
"grad_norm": 0.4403224341518811, |
|
"learning_rate": 4.9898266386682534e-06, |
|
"loss": 0.2848, |
|
"step": 4855 |
|
}, |
|
{ |
|
"epoch": 2.8043854587420656, |
|
"grad_norm": 0.4551147345010523, |
|
"learning_rate": 4.968049893848854e-06, |
|
"loss": 0.2637, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.807270628967109, |
|
"grad_norm": 0.43423817098138445, |
|
"learning_rate": 4.9463050559660985e-06, |
|
"loss": 0.268, |
|
"step": 4865 |
|
}, |
|
{ |
|
"epoch": 2.8101557991921524, |
|
"grad_norm": 0.42455505455856984, |
|
"learning_rate": 4.924592262901154e-06, |
|
"loss": 0.2681, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.8130409694171954, |
|
"grad_norm": 0.4530758791142782, |
|
"learning_rate": 4.9029116523320035e-06, |
|
"loss": 0.268, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.815926139642239, |
|
"grad_norm": 0.4249843125062271, |
|
"learning_rate": 4.881263361732552e-06, |
|
"loss": 0.2753, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.8188113098672822, |
|
"grad_norm": 0.4472656624417374, |
|
"learning_rate": 4.859647528371784e-06, |
|
"loss": 0.283, |
|
"step": 4885 |
|
}, |
|
{ |
|
"epoch": 2.821696480092325, |
|
"grad_norm": 0.42578297771011603, |
|
"learning_rate": 4.838064289312862e-06, |
|
"loss": 0.2898, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.8245816503173686, |
|
"grad_norm": 0.4436993117564527, |
|
"learning_rate": 4.816513781412282e-06, |
|
"loss": 0.2701, |
|
"step": 4895 |
|
}, |
|
{ |
|
"epoch": 2.827466820542412, |
|
"grad_norm": 0.4621541772278322, |
|
"learning_rate": 4.7949961413189885e-06, |
|
"loss": 0.2771, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.830351990767455, |
|
"grad_norm": 0.4266188005044788, |
|
"learning_rate": 4.77351150547352e-06, |
|
"loss": 0.2803, |
|
"step": 4905 |
|
}, |
|
{ |
|
"epoch": 2.8332371609924984, |
|
"grad_norm": 0.45663638676525325, |
|
"learning_rate": 4.752060010107145e-06, |
|
"loss": 0.2826, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.836122331217542, |
|
"grad_norm": 0.42586386429655265, |
|
"learning_rate": 4.730641791240981e-06, |
|
"loss": 0.2633, |
|
"step": 4915 |
|
}, |
|
{ |
|
"epoch": 2.839007501442585, |
|
"grad_norm": 0.42216429186560445, |
|
"learning_rate": 4.709256984685147e-06, |
|
"loss": 0.2835, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.8418926716676283, |
|
"grad_norm": 0.45151376072343974, |
|
"learning_rate": 4.687905726037902e-06, |
|
"loss": 0.2716, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.8447778418926717, |
|
"grad_norm": 0.40227119407723355, |
|
"learning_rate": 4.666588150684786e-06, |
|
"loss": 0.2744, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.847663012117715, |
|
"grad_norm": 0.41851949935896066, |
|
"learning_rate": 4.6453043937977496e-06, |
|
"loss": 0.2771, |
|
"step": 4935 |
|
}, |
|
{ |
|
"epoch": 2.850548182342758, |
|
"grad_norm": 0.40933746114854325, |
|
"learning_rate": 4.624054590334304e-06, |
|
"loss": 0.2689, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.8534333525678015, |
|
"grad_norm": 0.47564242491449826, |
|
"learning_rate": 4.6028388750366756e-06, |
|
"loss": 0.2809, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 2.856318522792845, |
|
"grad_norm": 0.43828857688753076, |
|
"learning_rate": 4.581657382430932e-06, |
|
"loss": 0.2853, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.859203693017888, |
|
"grad_norm": 0.43574357681542614, |
|
"learning_rate": 4.56051024682615e-06, |
|
"loss": 0.2756, |
|
"step": 4955 |
|
}, |
|
{ |
|
"epoch": 2.8620888632429313, |
|
"grad_norm": 0.42537161903700743, |
|
"learning_rate": 4.53939760231354e-06, |
|
"loss": 0.2874, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.8649740334679747, |
|
"grad_norm": 0.4597603651479367, |
|
"learning_rate": 4.518319582765623e-06, |
|
"loss": 0.2892, |
|
"step": 4965 |
|
}, |
|
{ |
|
"epoch": 2.8678592036930177, |
|
"grad_norm": 0.4446590811574243, |
|
"learning_rate": 4.497276321835357e-06, |
|
"loss": 0.2712, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.870744373918061, |
|
"grad_norm": 0.46299522122029557, |
|
"learning_rate": 4.476267952955295e-06, |
|
"loss": 0.3009, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.8736295441431046, |
|
"grad_norm": 0.4385843033290836, |
|
"learning_rate": 4.455294609336771e-06, |
|
"loss": 0.2755, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.876514714368148, |
|
"grad_norm": 0.44649836304692625, |
|
"learning_rate": 4.434356423968999e-06, |
|
"loss": 0.2837, |
|
"step": 4985 |
|
}, |
|
{ |
|
"epoch": 2.879399884593191, |
|
"grad_norm": 0.4656079138932178, |
|
"learning_rate": 4.413453529618274e-06, |
|
"loss": 0.2896, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.8822850548182344, |
|
"grad_norm": 0.40090614598820945, |
|
"learning_rate": 4.3925860588271015e-06, |
|
"loss": 0.3039, |
|
"step": 4995 |
|
}, |
|
{ |
|
"epoch": 2.885170225043278, |
|
"grad_norm": 0.429166127365773, |
|
"learning_rate": 4.371754143913398e-06, |
|
"loss": 0.2732, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.888055395268321, |
|
"grad_norm": 0.4028565372934939, |
|
"learning_rate": 4.350957916969598e-06, |
|
"loss": 0.2629, |
|
"step": 5005 |
|
}, |
|
{ |
|
"epoch": 2.890940565493364, |
|
"grad_norm": 0.4626974059523127, |
|
"learning_rate": 4.330197509861851e-06, |
|
"loss": 0.2748, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.8938257357184076, |
|
"grad_norm": 0.4168915007613455, |
|
"learning_rate": 4.3094730542291875e-06, |
|
"loss": 0.2851, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 2.8967109059434506, |
|
"grad_norm": 0.4404850147613879, |
|
"learning_rate": 4.288784681482657e-06, |
|
"loss": 0.2633, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.899596076168494, |
|
"grad_norm": 0.4368263858822652, |
|
"learning_rate": 4.268132522804532e-06, |
|
"loss": 0.2795, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 2.9024812463935374, |
|
"grad_norm": 0.4518971041077726, |
|
"learning_rate": 4.247516709147437e-06, |
|
"loss": 0.2779, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.9053664166185804, |
|
"grad_norm": 0.42168757805620943, |
|
"learning_rate": 4.2269373712335535e-06, |
|
"loss": 0.2781, |
|
"step": 5035 |
|
}, |
|
{ |
|
"epoch": 2.908251586843624, |
|
"grad_norm": 0.44301611235467, |
|
"learning_rate": 4.206394639553766e-06, |
|
"loss": 0.2844, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.9111367570686673, |
|
"grad_norm": 0.43608207255196907, |
|
"learning_rate": 4.185888644366841e-06, |
|
"loss": 0.2764, |
|
"step": 5045 |
|
}, |
|
{ |
|
"epoch": 2.9140219272937102, |
|
"grad_norm": 0.43614919217568904, |
|
"learning_rate": 4.165419515698613e-06, |
|
"loss": 0.2686, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.9169070975187537, |
|
"grad_norm": 0.45163629978863207, |
|
"learning_rate": 4.14498738334115e-06, |
|
"loss": 0.2685, |
|
"step": 5055 |
|
}, |
|
{ |
|
"epoch": 2.919792267743797, |
|
"grad_norm": 0.42566284160748424, |
|
"learning_rate": 4.1245923768519235e-06, |
|
"loss": 0.2624, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.92267743796884, |
|
"grad_norm": 0.42580290639250135, |
|
"learning_rate": 4.104234625552991e-06, |
|
"loss": 0.2656, |
|
"step": 5065 |
|
}, |
|
{ |
|
"epoch": 2.9255626081938835, |
|
"grad_norm": 0.4508732247385924, |
|
"learning_rate": 4.083914258530202e-06, |
|
"loss": 0.2829, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.928447778418927, |
|
"grad_norm": 0.4230373908678959, |
|
"learning_rate": 4.063631404632336e-06, |
|
"loss": 0.283, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 2.93133294864397, |
|
"grad_norm": 0.4519630975638044, |
|
"learning_rate": 4.043386192470309e-06, |
|
"loss": 0.299, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.9342181188690133, |
|
"grad_norm": 0.46105467021109736, |
|
"learning_rate": 4.0231787504163684e-06, |
|
"loss": 0.2878, |
|
"step": 5085 |
|
}, |
|
{ |
|
"epoch": 2.9371032890940567, |
|
"grad_norm": 0.4511914211925812, |
|
"learning_rate": 4.00300920660325e-06, |
|
"loss": 0.2778, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.9399884593190997, |
|
"grad_norm": 0.44702115253367053, |
|
"learning_rate": 3.982877688923396e-06, |
|
"loss": 0.2662, |
|
"step": 5095 |
|
}, |
|
{ |
|
"epoch": 2.942873629544143, |
|
"grad_norm": 0.427417788950817, |
|
"learning_rate": 3.962784325028119e-06, |
|
"loss": 0.2713, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.9457587997691865, |
|
"grad_norm": 0.43897355082883444, |
|
"learning_rate": 3.942729242326814e-06, |
|
"loss": 0.2743, |
|
"step": 5105 |
|
}, |
|
{ |
|
"epoch": 2.9486439699942295, |
|
"grad_norm": 0.4670512082769263, |
|
"learning_rate": 3.9227125679861286e-06, |
|
"loss": 0.284, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.951529140219273, |
|
"grad_norm": 0.4664395393065826, |
|
"learning_rate": 3.902734428929172e-06, |
|
"loss": 0.2769, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 2.9544143104443163, |
|
"grad_norm": 0.4670672506794604, |
|
"learning_rate": 3.882794951834711e-06, |
|
"loss": 0.2754, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.9572994806693593, |
|
"grad_norm": 0.43499585946337327, |
|
"learning_rate": 3.862894263136361e-06, |
|
"loss": 0.2688, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.9601846508944027, |
|
"grad_norm": 0.4471133717151604, |
|
"learning_rate": 3.8430324890217805e-06, |
|
"loss": 0.2685, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.963069821119446, |
|
"grad_norm": 0.4382618072040372, |
|
"learning_rate": 3.823209755431873e-06, |
|
"loss": 0.2809, |
|
"step": 5135 |
|
}, |
|
{ |
|
"epoch": 2.965954991344489, |
|
"grad_norm": 0.4241285275096586, |
|
"learning_rate": 3.8034261880600034e-06, |
|
"loss": 0.27, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.9688401615695326, |
|
"grad_norm": 0.4091644110971949, |
|
"learning_rate": 3.78368191235118e-06, |
|
"loss": 0.2799, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 2.971725331794576, |
|
"grad_norm": 0.43102734170478396, |
|
"learning_rate": 3.76397705350127e-06, |
|
"loss": 0.2797, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.974610502019619, |
|
"grad_norm": 0.4370911603217546, |
|
"learning_rate": 3.7443117364561964e-06, |
|
"loss": 0.2701, |
|
"step": 5155 |
|
}, |
|
{ |
|
"epoch": 2.9774956722446624, |
|
"grad_norm": 0.42916373087001025, |
|
"learning_rate": 3.7246860859111655e-06, |
|
"loss": 0.2734, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.980380842469706, |
|
"grad_norm": 0.4275858080889107, |
|
"learning_rate": 3.705100226309858e-06, |
|
"loss": 0.2677, |
|
"step": 5165 |
|
}, |
|
{ |
|
"epoch": 2.983266012694749, |
|
"grad_norm": 0.4310089026075201, |
|
"learning_rate": 3.6855542818436397e-06, |
|
"loss": 0.2726, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.986151182919792, |
|
"grad_norm": 0.44889673891857773, |
|
"learning_rate": 3.6660483764507916e-06, |
|
"loss": 0.265, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 2.9890363531448356, |
|
"grad_norm": 0.4597978279604376, |
|
"learning_rate": 3.6465826338157007e-06, |
|
"loss": 0.2857, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.9919215233698786, |
|
"grad_norm": 0.4672195167157573, |
|
"learning_rate": 3.6271571773680893e-06, |
|
"loss": 0.2765, |
|
"step": 5185 |
|
}, |
|
{ |
|
"epoch": 2.994806693594922, |
|
"grad_norm": 0.4409412220124302, |
|
"learning_rate": 3.6077721302822355e-06, |
|
"loss": 0.2826, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.9976918638199654, |
|
"grad_norm": 0.4448322757550867, |
|
"learning_rate": 3.588427615476189e-06, |
|
"loss": 0.2951, |
|
"step": 5195 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.4704112410545349, |
|
"eval_runtime": 0.5938, |
|
"eval_samples_per_second": 129.667, |
|
"eval_steps_per_second": 3.368, |
|
"step": 5199 |
|
}, |
|
{ |
|
"epoch": 3.000577034045009, |
|
"grad_norm": 0.478834624205184, |
|
"learning_rate": 3.5691237556109794e-06, |
|
"loss": 0.2548, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.003462204270052, |
|
"grad_norm": 0.45264093142712225, |
|
"learning_rate": 3.549860673089852e-06, |
|
"loss": 0.2178, |
|
"step": 5205 |
|
}, |
|
{ |
|
"epoch": 3.0063473744950953, |
|
"grad_norm": 0.42695857914733454, |
|
"learning_rate": 3.5306384900574943e-06, |
|
"loss": 0.2145, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 3.0092325447201387, |
|
"grad_norm": 0.4252664702307335, |
|
"learning_rate": 3.5114573283992536e-06, |
|
"loss": 0.2017, |
|
"step": 5215 |
|
}, |
|
{ |
|
"epoch": 3.0121177149451817, |
|
"grad_norm": 0.4362013593591915, |
|
"learning_rate": 3.492317309740362e-06, |
|
"loss": 0.202, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 3.015002885170225, |
|
"grad_norm": 0.7918296216376329, |
|
"learning_rate": 3.473218555445166e-06, |
|
"loss": 0.2023, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 3.0178880553952685, |
|
"grad_norm": 0.4425689050623567, |
|
"learning_rate": 3.4541611866163748e-06, |
|
"loss": 0.2156, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 3.0207732256203115, |
|
"grad_norm": 0.434243728200696, |
|
"learning_rate": 3.435145324094259e-06, |
|
"loss": 0.1967, |
|
"step": 5235 |
|
}, |
|
{ |
|
"epoch": 3.023658395845355, |
|
"grad_norm": 0.48550762220985233, |
|
"learning_rate": 3.4161710884559186e-06, |
|
"loss": 0.228, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 3.0265435660703983, |
|
"grad_norm": 0.4451374888019577, |
|
"learning_rate": 3.3972386000144975e-06, |
|
"loss": 0.1994, |
|
"step": 5245 |
|
}, |
|
{ |
|
"epoch": 3.0294287362954413, |
|
"grad_norm": 0.4479982505677569, |
|
"learning_rate": 3.378347978818425e-06, |
|
"loss": 0.2064, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 3.0323139065204847, |
|
"grad_norm": 0.48420281819894295, |
|
"learning_rate": 3.359499344650651e-06, |
|
"loss": 0.2104, |
|
"step": 5255 |
|
}, |
|
{ |
|
"epoch": 3.035199076745528, |
|
"grad_norm": 0.4674463960460507, |
|
"learning_rate": 3.3406928170278997e-06, |
|
"loss": 0.2107, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 3.038084246970571, |
|
"grad_norm": 0.42110517886246845, |
|
"learning_rate": 3.3219285151999037e-06, |
|
"loss": 0.1936, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 3.0409694171956145, |
|
"grad_norm": 0.4325902687189473, |
|
"learning_rate": 3.3032065581486393e-06, |
|
"loss": 0.2107, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 3.043854587420658, |
|
"grad_norm": 0.45814670201275987, |
|
"learning_rate": 3.2845270645875838e-06, |
|
"loss": 0.2073, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 3.046739757645701, |
|
"grad_norm": 0.4673351294536787, |
|
"learning_rate": 3.26589015296096e-06, |
|
"loss": 0.2141, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 3.0496249278707444, |
|
"grad_norm": 0.43997040330941384, |
|
"learning_rate": 3.2472959414429883e-06, |
|
"loss": 0.2108, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 3.0525100980957878, |
|
"grad_norm": 0.4862585478605358, |
|
"learning_rate": 3.228744547937125e-06, |
|
"loss": 0.2089, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 3.0553952683208307, |
|
"grad_norm": 0.4501337212448104, |
|
"learning_rate": 3.2102360900753237e-06, |
|
"loss": 0.216, |
|
"step": 5295 |
|
}, |
|
{ |
|
"epoch": 3.058280438545874, |
|
"grad_norm": 0.43785695040034356, |
|
"learning_rate": 3.1917706852173003e-06, |
|
"loss": 0.1999, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.0611656087709176, |
|
"grad_norm": 0.45799695752548214, |
|
"learning_rate": 3.173348450449759e-06, |
|
"loss": 0.2034, |
|
"step": 5305 |
|
}, |
|
{ |
|
"epoch": 3.0640507789959606, |
|
"grad_norm": 0.4896057098908631, |
|
"learning_rate": 3.1549695025856873e-06, |
|
"loss": 0.2138, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 3.066935949221004, |
|
"grad_norm": 0.43978051760808934, |
|
"learning_rate": 3.1366339581635785e-06, |
|
"loss": 0.211, |
|
"step": 5315 |
|
}, |
|
{ |
|
"epoch": 3.0698211194460474, |
|
"grad_norm": 0.5408651817364915, |
|
"learning_rate": 3.118341933446726e-06, |
|
"loss": 0.2189, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 3.0727062896710904, |
|
"grad_norm": 0.47167963172935967, |
|
"learning_rate": 3.100093544422459e-06, |
|
"loss": 0.2087, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 3.075591459896134, |
|
"grad_norm": 0.45222235059141447, |
|
"learning_rate": 3.0818889068014167e-06, |
|
"loss": 0.2086, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 3.0784766301211772, |
|
"grad_norm": 0.46762858344658953, |
|
"learning_rate": 3.0637281360168348e-06, |
|
"loss": 0.2058, |
|
"step": 5335 |
|
}, |
|
{ |
|
"epoch": 3.0813618003462206, |
|
"grad_norm": 0.4982055068360655, |
|
"learning_rate": 3.045611347223776e-06, |
|
"loss": 0.2168, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 3.0842469705712636, |
|
"grad_norm": 0.44825046777882716, |
|
"learning_rate": 3.0275386552984232e-06, |
|
"loss": 0.2089, |
|
"step": 5345 |
|
}, |
|
{ |
|
"epoch": 3.087132140796307, |
|
"grad_norm": 0.4780597142010933, |
|
"learning_rate": 3.00951017483735e-06, |
|
"loss": 0.2, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 3.0900173110213505, |
|
"grad_norm": 0.4749892463930495, |
|
"learning_rate": 2.991526020156795e-06, |
|
"loss": 0.2082, |
|
"step": 5355 |
|
}, |
|
{ |
|
"epoch": 3.0929024812463934, |
|
"grad_norm": 0.5316835135478458, |
|
"learning_rate": 2.9735863052919256e-06, |
|
"loss": 0.2117, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 3.095787651471437, |
|
"grad_norm": 0.48592697259547013, |
|
"learning_rate": 2.955691143996119e-06, |
|
"loss": 0.2189, |
|
"step": 5365 |
|
}, |
|
{ |
|
"epoch": 3.0986728216964803, |
|
"grad_norm": 0.478325002856811, |
|
"learning_rate": 2.9378406497402577e-06, |
|
"loss": 0.2217, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 3.1015579919215233, |
|
"grad_norm": 0.47784013307657136, |
|
"learning_rate": 2.9200349357119805e-06, |
|
"loss": 0.2047, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 3.1044431621465667, |
|
"grad_norm": 0.4366920553845523, |
|
"learning_rate": 2.902274114814995e-06, |
|
"loss": 0.2052, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 3.10732833237161, |
|
"grad_norm": 0.48726638763350977, |
|
"learning_rate": 2.884558299668333e-06, |
|
"loss": 0.2135, |
|
"step": 5385 |
|
}, |
|
{ |
|
"epoch": 3.110213502596653, |
|
"grad_norm": 0.4352029364038837, |
|
"learning_rate": 2.866887602605667e-06, |
|
"loss": 0.2146, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 3.1130986728216965, |
|
"grad_norm": 0.4618849547883958, |
|
"learning_rate": 2.8492621356745677e-06, |
|
"loss": 0.2007, |
|
"step": 5395 |
|
}, |
|
{ |
|
"epoch": 3.11598384304674, |
|
"grad_norm": 0.5680788070054426, |
|
"learning_rate": 2.831682010635811e-06, |
|
"loss": 0.2254, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.118869013271783, |
|
"grad_norm": 0.4478067921317212, |
|
"learning_rate": 2.8141473389626706e-06, |
|
"loss": 0.2025, |
|
"step": 5405 |
|
}, |
|
{ |
|
"epoch": 3.1217541834968263, |
|
"grad_norm": 0.4383072651557812, |
|
"learning_rate": 2.7966582318402046e-06, |
|
"loss": 0.2038, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 3.1246393537218697, |
|
"grad_norm": 0.4469251744492116, |
|
"learning_rate": 2.7792148001645505e-06, |
|
"loss": 0.2034, |
|
"step": 5415 |
|
}, |
|
{ |
|
"epoch": 3.1275245239469127, |
|
"grad_norm": 0.45796076894000676, |
|
"learning_rate": 2.7618171545422156e-06, |
|
"loss": 0.2237, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 3.130409694171956, |
|
"grad_norm": 0.4732594923700523, |
|
"learning_rate": 2.744465405289406e-06, |
|
"loss": 0.2115, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 3.1332948643969996, |
|
"grad_norm": 0.47883717903500816, |
|
"learning_rate": 2.7271596624312845e-06, |
|
"loss": 0.2176, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 3.1361800346220425, |
|
"grad_norm": 0.47490798721155425, |
|
"learning_rate": 2.709900035701297e-06, |
|
"loss": 0.1962, |
|
"step": 5435 |
|
}, |
|
{ |
|
"epoch": 3.139065204847086, |
|
"grad_norm": 0.47556110953605957, |
|
"learning_rate": 2.6926866345404846e-06, |
|
"loss": 0.2122, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 3.1419503750721294, |
|
"grad_norm": 0.4674071776337381, |
|
"learning_rate": 2.6755195680967607e-06, |
|
"loss": 0.1992, |
|
"step": 5445 |
|
}, |
|
{ |
|
"epoch": 3.1448355452971724, |
|
"grad_norm": 0.474411421746897, |
|
"learning_rate": 2.658398945224253e-06, |
|
"loss": 0.1924, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 3.1477207155222158, |
|
"grad_norm": 0.47606141844860833, |
|
"learning_rate": 2.6413248744825837e-06, |
|
"loss": 0.2076, |
|
"step": 5455 |
|
}, |
|
{ |
|
"epoch": 3.150605885747259, |
|
"grad_norm": 0.45905433780073146, |
|
"learning_rate": 2.624297464136204e-06, |
|
"loss": 0.2142, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 3.153491055972302, |
|
"grad_norm": 0.536638485210775, |
|
"learning_rate": 2.6073168221536906e-06, |
|
"loss": 0.219, |
|
"step": 5465 |
|
}, |
|
{ |
|
"epoch": 3.1563762261973456, |
|
"grad_norm": 0.45678787981866464, |
|
"learning_rate": 2.5903830562070675e-06, |
|
"loss": 0.2036, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 3.159261396422389, |
|
"grad_norm": 0.4496673360192395, |
|
"learning_rate": 2.57349627367113e-06, |
|
"loss": 0.2092, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 3.162146566647432, |
|
"grad_norm": 0.4612630830131145, |
|
"learning_rate": 2.5566565816227585e-06, |
|
"loss": 0.204, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 3.1650317368724754, |
|
"grad_norm": 0.45145251818392435, |
|
"learning_rate": 2.5398640868402304e-06, |
|
"loss": 0.2056, |
|
"step": 5485 |
|
}, |
|
{ |
|
"epoch": 3.167916907097519, |
|
"grad_norm": 0.49129615711916425, |
|
"learning_rate": 2.523118895802553e-06, |
|
"loss": 0.2069, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 3.170802077322562, |
|
"grad_norm": 0.5139622757706684, |
|
"learning_rate": 2.506421114688794e-06, |
|
"loss": 0.221, |
|
"step": 5495 |
|
}, |
|
{ |
|
"epoch": 3.1736872475476052, |
|
"grad_norm": 0.48052040604024665, |
|
"learning_rate": 2.4897708493773997e-06, |
|
"loss": 0.1973, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.1765724177726486, |
|
"grad_norm": 0.49706524838406957, |
|
"learning_rate": 2.4731682054455174e-06, |
|
"loss": 0.2053, |
|
"step": 5505 |
|
}, |
|
{ |
|
"epoch": 3.179457587997692, |
|
"grad_norm": 0.5133679294908896, |
|
"learning_rate": 2.456613288168337e-06, |
|
"loss": 0.2035, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 3.182342758222735, |
|
"grad_norm": 0.483475787236041, |
|
"learning_rate": 2.440106202518423e-06, |
|
"loss": 0.2082, |
|
"step": 5515 |
|
}, |
|
{ |
|
"epoch": 3.1852279284477785, |
|
"grad_norm": 0.4815903117086154, |
|
"learning_rate": 2.423647053165046e-06, |
|
"loss": 0.2046, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 3.188113098672822, |
|
"grad_norm": 0.4753159403205944, |
|
"learning_rate": 2.4072359444735117e-06, |
|
"loss": 0.2116, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 3.190998268897865, |
|
"grad_norm": 0.5207748356365552, |
|
"learning_rate": 2.390872980504516e-06, |
|
"loss": 0.2078, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 3.1938834391229083, |
|
"grad_norm": 0.500796915089432, |
|
"learning_rate": 2.374558265013469e-06, |
|
"loss": 0.2013, |
|
"step": 5535 |
|
}, |
|
{ |
|
"epoch": 3.1967686093479517, |
|
"grad_norm": 0.5192772858747745, |
|
"learning_rate": 2.358291901449842e-06, |
|
"loss": 0.2003, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 3.1996537795729947, |
|
"grad_norm": 0.4809730211239497, |
|
"learning_rate": 2.342073992956517e-06, |
|
"loss": 0.2098, |
|
"step": 5545 |
|
}, |
|
{ |
|
"epoch": 3.202538949798038, |
|
"grad_norm": 0.4627046426816422, |
|
"learning_rate": 2.3259046423691333e-06, |
|
"loss": 0.2032, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 3.2054241200230815, |
|
"grad_norm": 0.5339618244022528, |
|
"learning_rate": 2.309783952215421e-06, |
|
"loss": 0.2075, |
|
"step": 5555 |
|
}, |
|
{ |
|
"epoch": 3.2083092902481245, |
|
"grad_norm": 0.5160587961491079, |
|
"learning_rate": 2.293712024714564e-06, |
|
"loss": 0.2147, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 3.211194460473168, |
|
"grad_norm": 0.5541552433226901, |
|
"learning_rate": 2.2776889617765495e-06, |
|
"loss": 0.2367, |
|
"step": 5565 |
|
}, |
|
{ |
|
"epoch": 3.2140796306982113, |
|
"grad_norm": 0.45574118435347694, |
|
"learning_rate": 2.2617148650015275e-06, |
|
"loss": 0.2104, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 3.2169648009232543, |
|
"grad_norm": 0.4714574839149813, |
|
"learning_rate": 2.2457898356791496e-06, |
|
"loss": 0.2154, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 3.2198499711482977, |
|
"grad_norm": 0.5065703016927723, |
|
"learning_rate": 2.2299139747879385e-06, |
|
"loss": 0.2045, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 3.222735141373341, |
|
"grad_norm": 0.46572520384835114, |
|
"learning_rate": 2.214087382994653e-06, |
|
"loss": 0.2368, |
|
"step": 5585 |
|
}, |
|
{ |
|
"epoch": 3.225620311598384, |
|
"grad_norm": 0.4780383032384353, |
|
"learning_rate": 2.198310160653636e-06, |
|
"loss": 0.2041, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 3.2285054818234276, |
|
"grad_norm": 0.48268579681924456, |
|
"learning_rate": 2.1825824078061898e-06, |
|
"loss": 0.2057, |
|
"step": 5595 |
|
}, |
|
{ |
|
"epoch": 3.231390652048471, |
|
"grad_norm": 0.48646053407264, |
|
"learning_rate": 2.1669042241799378e-06, |
|
"loss": 0.199, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.234275822273514, |
|
"grad_norm": 0.49576817208299834, |
|
"learning_rate": 2.1512757091881874e-06, |
|
"loss": 0.1971, |
|
"step": 5605 |
|
}, |
|
{ |
|
"epoch": 3.2371609924985574, |
|
"grad_norm": 0.4841025834202475, |
|
"learning_rate": 2.1356969619293023e-06, |
|
"loss": 0.2227, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 3.240046162723601, |
|
"grad_norm": 0.5512557293571464, |
|
"learning_rate": 2.1201680811860815e-06, |
|
"loss": 0.1993, |
|
"step": 5615 |
|
}, |
|
{ |
|
"epoch": 3.2429313329486438, |
|
"grad_norm": 0.49443124657159826, |
|
"learning_rate": 2.1046891654251266e-06, |
|
"loss": 0.207, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 3.245816503173687, |
|
"grad_norm": 0.502803434696454, |
|
"learning_rate": 2.089260312796213e-06, |
|
"loss": 0.2037, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 3.2487016733987306, |
|
"grad_norm": 0.4761464874997464, |
|
"learning_rate": 2.0738816211316716e-06, |
|
"loss": 0.1969, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 3.251586843623774, |
|
"grad_norm": 0.5250059943014331, |
|
"learning_rate": 2.0585531879457743e-06, |
|
"loss": 0.1998, |
|
"step": 5635 |
|
}, |
|
{ |
|
"epoch": 3.254472013848817, |
|
"grad_norm": 0.4534253902795417, |
|
"learning_rate": 2.043275110434113e-06, |
|
"loss": 0.2057, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 3.2573571840738604, |
|
"grad_norm": 0.456942792070371, |
|
"learning_rate": 2.0280474854729727e-06, |
|
"loss": 0.2014, |
|
"step": 5645 |
|
}, |
|
{ |
|
"epoch": 3.260242354298904, |
|
"grad_norm": 0.49132441725377424, |
|
"learning_rate": 2.0128704096187258e-06, |
|
"loss": 0.214, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 3.263127524523947, |
|
"grad_norm": 0.43559057805097934, |
|
"learning_rate": 1.9977439791072296e-06, |
|
"loss": 0.2025, |
|
"step": 5655 |
|
}, |
|
{ |
|
"epoch": 3.2660126947489903, |
|
"grad_norm": 0.5032263108222483, |
|
"learning_rate": 1.9826682898531923e-06, |
|
"loss": 0.2036, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 3.2688978649740337, |
|
"grad_norm": 0.5166100007288694, |
|
"learning_rate": 1.967643437449591e-06, |
|
"loss": 0.2039, |
|
"step": 5665 |
|
}, |
|
{ |
|
"epoch": 3.2717830351990767, |
|
"grad_norm": 0.5625840783677031, |
|
"learning_rate": 1.9526695171670428e-06, |
|
"loss": 0.2186, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 3.27466820542412, |
|
"grad_norm": 0.4771204637584591, |
|
"learning_rate": 1.937746623953218e-06, |
|
"loss": 0.2105, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 3.2775533756491635, |
|
"grad_norm": 0.5120534992728352, |
|
"learning_rate": 1.9228748524322283e-06, |
|
"loss": 0.1962, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 3.2804385458742065, |
|
"grad_norm": 0.5018775730767316, |
|
"learning_rate": 1.908054296904023e-06, |
|
"loss": 0.2109, |
|
"step": 5685 |
|
}, |
|
{ |
|
"epoch": 3.28332371609925, |
|
"grad_norm": 0.4876912669877195, |
|
"learning_rate": 1.8932850513438139e-06, |
|
"loss": 0.2126, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 3.2862088863242933, |
|
"grad_norm": 0.4691715888051167, |
|
"learning_rate": 1.8785672094014484e-06, |
|
"loss": 0.2016, |
|
"step": 5695 |
|
}, |
|
{ |
|
"epoch": 3.2890940565493363, |
|
"grad_norm": 0.48546733091545335, |
|
"learning_rate": 1.8639008644008317e-06, |
|
"loss": 0.2139, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.2919792267743797, |
|
"grad_norm": 0.48053507834963044, |
|
"learning_rate": 1.84928610933934e-06, |
|
"loss": 0.2069, |
|
"step": 5705 |
|
}, |
|
{ |
|
"epoch": 3.294864396999423, |
|
"grad_norm": 0.4978213968888964, |
|
"learning_rate": 1.8347230368872227e-06, |
|
"loss": 0.2125, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 3.297749567224466, |
|
"grad_norm": 0.4619698699264187, |
|
"learning_rate": 1.8202117393870122e-06, |
|
"loss": 0.2048, |
|
"step": 5715 |
|
}, |
|
{ |
|
"epoch": 3.3006347374495095, |
|
"grad_norm": 0.4439799217375983, |
|
"learning_rate": 1.805752308852945e-06, |
|
"loss": 0.2076, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 3.303519907674553, |
|
"grad_norm": 0.5050476605960446, |
|
"learning_rate": 1.7913448369703801e-06, |
|
"loss": 0.2148, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 3.306405077899596, |
|
"grad_norm": 0.4449280284195996, |
|
"learning_rate": 1.776989415095206e-06, |
|
"loss": 0.1961, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 3.3092902481246393, |
|
"grad_norm": 0.5023493469028406, |
|
"learning_rate": 1.7626861342532764e-06, |
|
"loss": 0.2108, |
|
"step": 5735 |
|
}, |
|
{ |
|
"epoch": 3.3121754183496828, |
|
"grad_norm": 0.4899997511129661, |
|
"learning_rate": 1.748435085139818e-06, |
|
"loss": 0.2041, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 3.3150605885747257, |
|
"grad_norm": 0.620987392864056, |
|
"learning_rate": 1.7342363581188716e-06, |
|
"loss": 0.1967, |
|
"step": 5745 |
|
}, |
|
{ |
|
"epoch": 3.317945758799769, |
|
"grad_norm": 0.48947716401563934, |
|
"learning_rate": 1.720090043222704e-06, |
|
"loss": 0.2093, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 3.3208309290248126, |
|
"grad_norm": 0.49193608057425164, |
|
"learning_rate": 1.705996230151239e-06, |
|
"loss": 0.2143, |
|
"step": 5755 |
|
}, |
|
{ |
|
"epoch": 3.3237160992498556, |
|
"grad_norm": 0.5410390908943622, |
|
"learning_rate": 1.6919550082715108e-06, |
|
"loss": 0.2101, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 3.326601269474899, |
|
"grad_norm": 0.44748685099184915, |
|
"learning_rate": 1.6779664666170626e-06, |
|
"loss": 0.2154, |
|
"step": 5765 |
|
}, |
|
{ |
|
"epoch": 3.3294864396999424, |
|
"grad_norm": 0.49202824127233535, |
|
"learning_rate": 1.6640306938874052e-06, |
|
"loss": 0.2084, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 3.3323716099249854, |
|
"grad_norm": 0.5129313111006824, |
|
"learning_rate": 1.650147778447444e-06, |
|
"loss": 0.2043, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 3.335256780150029, |
|
"grad_norm": 0.5011687713242771, |
|
"learning_rate": 1.6363178083269381e-06, |
|
"loss": 0.2137, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 3.338141950375072, |
|
"grad_norm": 0.5125645791853729, |
|
"learning_rate": 1.6225408712199097e-06, |
|
"loss": 0.213, |
|
"step": 5785 |
|
}, |
|
{ |
|
"epoch": 3.341027120600115, |
|
"grad_norm": 0.496426542842325, |
|
"learning_rate": 1.608817054484113e-06, |
|
"loss": 0.2036, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 3.3439122908251586, |
|
"grad_norm": 0.573443010809008, |
|
"learning_rate": 1.5951464451404775e-06, |
|
"loss": 0.1975, |
|
"step": 5795 |
|
}, |
|
{ |
|
"epoch": 3.346797461050202, |
|
"grad_norm": 0.4777011846082858, |
|
"learning_rate": 1.5815291298725387e-06, |
|
"loss": 0.2278, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.349682631275245, |
|
"grad_norm": 0.4994285452680278, |
|
"learning_rate": 1.5679651950259178e-06, |
|
"loss": 0.2019, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 3.3525678015002884, |
|
"grad_norm": 0.47305655838061333, |
|
"learning_rate": 1.5544547266077425e-06, |
|
"loss": 0.2121, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 3.355452971725332, |
|
"grad_norm": 0.5117964943614841, |
|
"learning_rate": 1.54099781028613e-06, |
|
"loss": 0.2117, |
|
"step": 5815 |
|
}, |
|
{ |
|
"epoch": 3.358338141950375, |
|
"grad_norm": 0.47969243336386125, |
|
"learning_rate": 1.52759453138962e-06, |
|
"loss": 0.2103, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 3.3612233121754183, |
|
"grad_norm": 0.46863801053654247, |
|
"learning_rate": 1.514244974906649e-06, |
|
"loss": 0.2162, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 3.3641084824004617, |
|
"grad_norm": 0.4937087047605848, |
|
"learning_rate": 1.5009492254850056e-06, |
|
"loss": 0.2074, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 3.366993652625505, |
|
"grad_norm": 0.5349150058728949, |
|
"learning_rate": 1.4877073674313004e-06, |
|
"loss": 0.2092, |
|
"step": 5835 |
|
}, |
|
{ |
|
"epoch": 3.369878822850548, |
|
"grad_norm": 0.509590831524668, |
|
"learning_rate": 1.4745194847104184e-06, |
|
"loss": 0.2258, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 3.3727639930755915, |
|
"grad_norm": 0.5411363420436703, |
|
"learning_rate": 1.461385660944994e-06, |
|
"loss": 0.2026, |
|
"step": 5845 |
|
}, |
|
{ |
|
"epoch": 3.375649163300635, |
|
"grad_norm": 0.4869231603073319, |
|
"learning_rate": 1.4483059794148869e-06, |
|
"loss": 0.212, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 3.378534333525678, |
|
"grad_norm": 0.5066221127114735, |
|
"learning_rate": 1.4352805230566458e-06, |
|
"loss": 0.2066, |
|
"step": 5855 |
|
}, |
|
{ |
|
"epoch": 3.3814195037507213, |
|
"grad_norm": 0.49848266645770745, |
|
"learning_rate": 1.4223093744629802e-06, |
|
"loss": 0.2039, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 3.3843046739757647, |
|
"grad_norm": 0.46674023509056656, |
|
"learning_rate": 1.409392615882248e-06, |
|
"loss": 0.2056, |
|
"step": 5865 |
|
}, |
|
{ |
|
"epoch": 3.3871898442008077, |
|
"grad_norm": 0.47153344056400004, |
|
"learning_rate": 1.3965303292179211e-06, |
|
"loss": 0.2035, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 3.390075014425851, |
|
"grad_norm": 0.5737791203551417, |
|
"learning_rate": 1.3837225960280777e-06, |
|
"loss": 0.2092, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 3.3929601846508946, |
|
"grad_norm": 0.5430927208050875, |
|
"learning_rate": 1.370969497524872e-06, |
|
"loss": 0.2102, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 3.3958453548759375, |
|
"grad_norm": 0.4648636735503783, |
|
"learning_rate": 1.3582711145740378e-06, |
|
"loss": 0.2036, |
|
"step": 5885 |
|
}, |
|
{ |
|
"epoch": 3.398730525100981, |
|
"grad_norm": 0.4809453402273537, |
|
"learning_rate": 1.3456275276943543e-06, |
|
"loss": 0.206, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 3.4016156953260244, |
|
"grad_norm": 0.5512582459841553, |
|
"learning_rate": 1.3330388170571496e-06, |
|
"loss": 0.2057, |
|
"step": 5895 |
|
}, |
|
{ |
|
"epoch": 3.4045008655510673, |
|
"grad_norm": 0.5125832192345321, |
|
"learning_rate": 1.3205050624857895e-06, |
|
"loss": 0.2065, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.4073860357761108, |
|
"grad_norm": 0.5558586586402942, |
|
"learning_rate": 1.3080263434551743e-06, |
|
"loss": 0.2166, |
|
"step": 5905 |
|
}, |
|
{ |
|
"epoch": 3.410271206001154, |
|
"grad_norm": 0.48236963618343165, |
|
"learning_rate": 1.295602739091224e-06, |
|
"loss": 0.2054, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 3.413156376226197, |
|
"grad_norm": 0.47170396426714695, |
|
"learning_rate": 1.2832343281703852e-06, |
|
"loss": 0.2102, |
|
"step": 5915 |
|
}, |
|
{ |
|
"epoch": 3.4160415464512406, |
|
"grad_norm": 0.4953986618513792, |
|
"learning_rate": 1.2709211891191331e-06, |
|
"loss": 0.2262, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 3.418926716676284, |
|
"grad_norm": 0.5094102635524805, |
|
"learning_rate": 1.2586634000134735e-06, |
|
"loss": 0.2052, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 3.4218118869013274, |
|
"grad_norm": 0.46465406913045626, |
|
"learning_rate": 1.2464610385784381e-06, |
|
"loss": 0.2166, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 3.4246970571263704, |
|
"grad_norm": 0.537941105319963, |
|
"learning_rate": 1.2343141821876015e-06, |
|
"loss": 0.2052, |
|
"step": 5935 |
|
}, |
|
{ |
|
"epoch": 3.427582227351414, |
|
"grad_norm": 0.4708366082196696, |
|
"learning_rate": 1.2222229078625935e-06, |
|
"loss": 0.2045, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 3.4304673975764572, |
|
"grad_norm": 0.4910988413622275, |
|
"learning_rate": 1.2101872922725976e-06, |
|
"loss": 0.2131, |
|
"step": 5945 |
|
}, |
|
{ |
|
"epoch": 3.4333525678015, |
|
"grad_norm": 0.4726816646220706, |
|
"learning_rate": 1.198207411733877e-06, |
|
"loss": 0.2126, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 3.4362377380265436, |
|
"grad_norm": 0.499819420315185, |
|
"learning_rate": 1.1862833422092878e-06, |
|
"loss": 0.2053, |
|
"step": 5955 |
|
}, |
|
{ |
|
"epoch": 3.439122908251587, |
|
"grad_norm": 0.45811978260654357, |
|
"learning_rate": 1.1744151593077925e-06, |
|
"loss": 0.2166, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 3.44200807847663, |
|
"grad_norm": 0.49132678636515253, |
|
"learning_rate": 1.1626029382839798e-06, |
|
"loss": 0.2093, |
|
"step": 5965 |
|
}, |
|
{ |
|
"epoch": 3.4448932487016735, |
|
"grad_norm": 0.5178705009636367, |
|
"learning_rate": 1.1508467540376e-06, |
|
"loss": 0.1978, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 3.447778418926717, |
|
"grad_norm": 0.4771433582042318, |
|
"learning_rate": 1.1391466811130813e-06, |
|
"loss": 0.2055, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 3.45066358915176, |
|
"grad_norm": 0.4849667450545781, |
|
"learning_rate": 1.127502793699049e-06, |
|
"loss": 0.2114, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 3.4535487593768033, |
|
"grad_norm": 0.5323744822003759, |
|
"learning_rate": 1.1159151656278677e-06, |
|
"loss": 0.2246, |
|
"step": 5985 |
|
}, |
|
{ |
|
"epoch": 3.4564339296018467, |
|
"grad_norm": 0.517295087718039, |
|
"learning_rate": 1.104383870375172e-06, |
|
"loss": 0.2033, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 3.4593190998268897, |
|
"grad_norm": 0.49882873868199235, |
|
"learning_rate": 1.0929089810593974e-06, |
|
"loss": 0.2072, |
|
"step": 5995 |
|
}, |
|
{ |
|
"epoch": 3.462204270051933, |
|
"grad_norm": 0.5007022327120706, |
|
"learning_rate": 1.0814905704413093e-06, |
|
"loss": 0.2257, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.4650894402769765, |
|
"grad_norm": 0.46291584199269026, |
|
"learning_rate": 1.070128710923556e-06, |
|
"loss": 0.2086, |
|
"step": 6005 |
|
}, |
|
{ |
|
"epoch": 3.4679746105020195, |
|
"grad_norm": 0.5269534068567228, |
|
"learning_rate": 1.0588234745502012e-06, |
|
"loss": 0.2112, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 3.470859780727063, |
|
"grad_norm": 0.4910407597306869, |
|
"learning_rate": 1.047574933006268e-06, |
|
"loss": 0.2177, |
|
"step": 6015 |
|
}, |
|
{ |
|
"epoch": 3.4737449509521063, |
|
"grad_norm": 0.47260522012547096, |
|
"learning_rate": 1.0363831576172834e-06, |
|
"loss": 0.2001, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 3.4766301211771493, |
|
"grad_norm": 0.46807651107457027, |
|
"learning_rate": 1.0252482193488345e-06, |
|
"loss": 0.1936, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 3.4795152914021927, |
|
"grad_norm": 0.4496096381304417, |
|
"learning_rate": 1.0141701888061018e-06, |
|
"loss": 0.2051, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 3.482400461627236, |
|
"grad_norm": 0.48460781075602494, |
|
"learning_rate": 1.0031491362334289e-06, |
|
"loss": 0.2025, |
|
"step": 6035 |
|
}, |
|
{ |
|
"epoch": 3.485285631852279, |
|
"grad_norm": 0.5182489885112946, |
|
"learning_rate": 9.92185131513862e-07, |
|
"loss": 0.2167, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 3.4881708020773226, |
|
"grad_norm": 0.48427730291008736, |
|
"learning_rate": 9.812782441687263e-07, |
|
"loss": 0.2024, |
|
"step": 6045 |
|
}, |
|
{ |
|
"epoch": 3.491055972302366, |
|
"grad_norm": 0.5074747506126092, |
|
"learning_rate": 9.704285433571637e-07, |
|
"loss": 0.2143, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 3.493941142527409, |
|
"grad_norm": 0.4838143459849086, |
|
"learning_rate": 9.596360978757025e-07, |
|
"loss": 0.2139, |
|
"step": 6055 |
|
}, |
|
{ |
|
"epoch": 3.4968263127524524, |
|
"grad_norm": 0.47554491034770197, |
|
"learning_rate": 9.489009761578294e-07, |
|
"loss": 0.206, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 3.499711482977496, |
|
"grad_norm": 0.5171439253070426, |
|
"learning_rate": 9.382232462735463e-07, |
|
"loss": 0.2044, |
|
"step": 6065 |
|
}, |
|
{ |
|
"epoch": 3.5025966532025388, |
|
"grad_norm": 0.5115632720888774, |
|
"learning_rate": 9.276029759289384e-07, |
|
"loss": 0.2195, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 3.505481823427582, |
|
"grad_norm": 0.5128873018700219, |
|
"learning_rate": 9.17040232465748e-07, |
|
"loss": 0.208, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 3.5083669936526256, |
|
"grad_norm": 0.47047254608971883, |
|
"learning_rate": 9.065350828609509e-07, |
|
"loss": 0.2166, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 3.5112521638776686, |
|
"grad_norm": 0.47331390143331864, |
|
"learning_rate": 8.960875937263214e-07, |
|
"loss": 0.1926, |
|
"step": 6085 |
|
}, |
|
{ |
|
"epoch": 3.514137334102712, |
|
"grad_norm": 0.5526849091013493, |
|
"learning_rate": 8.856978313080244e-07, |
|
"loss": 0.208, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 3.5170225043277554, |
|
"grad_norm": 0.5056598583710792, |
|
"learning_rate": 8.753658614861782e-07, |
|
"loss": 0.202, |
|
"step": 6095 |
|
}, |
|
{ |
|
"epoch": 3.5199076745527984, |
|
"grad_norm": 0.45972967026892514, |
|
"learning_rate": 8.650917497744548e-07, |
|
"loss": 0.2052, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.522792844777842, |
|
"grad_norm": 0.9905171948522371, |
|
"learning_rate": 8.548755613196491e-07, |
|
"loss": 0.1975, |
|
"step": 6105 |
|
}, |
|
{ |
|
"epoch": 3.5256780150028852, |
|
"grad_norm": 0.487725697200175, |
|
"learning_rate": 8.447173609012693e-07, |
|
"loss": 0.2003, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 3.5285631852279282, |
|
"grad_norm": 0.436842348667491, |
|
"learning_rate": 8.346172129311436e-07, |
|
"loss": 0.2002, |
|
"step": 6115 |
|
}, |
|
{ |
|
"epoch": 3.5314483554529716, |
|
"grad_norm": 0.5324360594518225, |
|
"learning_rate": 8.245751814529812e-07, |
|
"loss": 0.2042, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 3.534333525678015, |
|
"grad_norm": 0.49501211496626846, |
|
"learning_rate": 8.145913301419894e-07, |
|
"loss": 0.2046, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 3.537218695903058, |
|
"grad_norm": 0.5636032763069877, |
|
"learning_rate": 8.046657223044618e-07, |
|
"loss": 0.2065, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 3.5401038661281015, |
|
"grad_norm": 0.47764396959998434, |
|
"learning_rate": 7.947984208773796e-07, |
|
"loss": 0.201, |
|
"step": 6135 |
|
}, |
|
{ |
|
"epoch": 3.542989036353145, |
|
"grad_norm": 0.47110105270453406, |
|
"learning_rate": 7.849894884280085e-07, |
|
"loss": 0.1939, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 3.545874206578188, |
|
"grad_norm": 0.5089791473431078, |
|
"learning_rate": 7.752389871535027e-07, |
|
"loss": 0.2237, |
|
"step": 6145 |
|
}, |
|
{ |
|
"epoch": 3.5487593768032313, |
|
"grad_norm": 0.4693514320901827, |
|
"learning_rate": 7.65546978880517e-07, |
|
"loss": 0.1918, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 3.5516445470282747, |
|
"grad_norm": 0.5339146402486223, |
|
"learning_rate": 7.55913525064802e-07, |
|
"loss": 0.2076, |
|
"step": 6155 |
|
}, |
|
{ |
|
"epoch": 3.5545297172533177, |
|
"grad_norm": 0.510340675728564, |
|
"learning_rate": 7.463386867908318e-07, |
|
"loss": 0.1947, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 3.557414887478361, |
|
"grad_norm": 0.5063108850560543, |
|
"learning_rate": 7.368225247713978e-07, |
|
"loss": 0.2293, |
|
"step": 6165 |
|
}, |
|
{ |
|
"epoch": 3.5603000577034045, |
|
"grad_norm": 0.5151597458874256, |
|
"learning_rate": 7.273650993472414e-07, |
|
"loss": 0.2027, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 3.563185227928448, |
|
"grad_norm": 0.6646624866313264, |
|
"learning_rate": 7.179664704866551e-07, |
|
"loss": 0.1955, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 3.566070398153491, |
|
"grad_norm": 0.4766136250823664, |
|
"learning_rate": 7.086266977851141e-07, |
|
"loss": 0.2012, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 3.5689555683785343, |
|
"grad_norm": 0.4840543281228228, |
|
"learning_rate": 6.993458404648945e-07, |
|
"loss": 0.2069, |
|
"step": 6185 |
|
}, |
|
{ |
|
"epoch": 3.5718407386035778, |
|
"grad_norm": 0.49635401265932044, |
|
"learning_rate": 6.901239573746987e-07, |
|
"loss": 0.2054, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 3.5747259088286207, |
|
"grad_norm": 0.5178071858503266, |
|
"learning_rate": 6.809611069892785e-07, |
|
"loss": 0.2114, |
|
"step": 6195 |
|
}, |
|
{ |
|
"epoch": 3.577611079053664, |
|
"grad_norm": 0.4805032184640311, |
|
"learning_rate": 6.718573474090673e-07, |
|
"loss": 0.2088, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.5804962492787076, |
|
"grad_norm": 0.5312256937793656, |
|
"learning_rate": 6.628127363598125e-07, |
|
"loss": 0.218, |
|
"step": 6205 |
|
}, |
|
{ |
|
"epoch": 3.583381419503751, |
|
"grad_norm": 0.5038683897704207, |
|
"learning_rate": 6.538273311922105e-07, |
|
"loss": 0.2036, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 3.586266589728794, |
|
"grad_norm": 0.48973488364704354, |
|
"learning_rate": 6.449011888815359e-07, |
|
"loss": 0.2059, |
|
"step": 6215 |
|
}, |
|
{ |
|
"epoch": 3.5891517599538374, |
|
"grad_norm": 0.46502727902053664, |
|
"learning_rate": 6.360343660272905e-07, |
|
"loss": 0.2032, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 3.592036930178881, |
|
"grad_norm": 0.4693993644882787, |
|
"learning_rate": 6.272269188528346e-07, |
|
"loss": 0.1993, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 3.594922100403924, |
|
"grad_norm": 0.4759028304516473, |
|
"learning_rate": 6.184789032050408e-07, |
|
"loss": 0.1937, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 3.597807270628967, |
|
"grad_norm": 0.5255955515623099, |
|
"learning_rate": 6.097903745539258e-07, |
|
"loss": 0.2051, |
|
"step": 6235 |
|
}, |
|
{ |
|
"epoch": 3.6006924408540106, |
|
"grad_norm": 0.48476442425010263, |
|
"learning_rate": 6.011613879923161e-07, |
|
"loss": 0.1946, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 3.6035776110790536, |
|
"grad_norm": 0.5048238073660125, |
|
"learning_rate": 5.925919982354844e-07, |
|
"loss": 0.2122, |
|
"step": 6245 |
|
}, |
|
{ |
|
"epoch": 3.606462781304097, |
|
"grad_norm": 0.5137209328897905, |
|
"learning_rate": 5.840822596208073e-07, |
|
"loss": 0.2078, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 3.6093479515291405, |
|
"grad_norm": 0.46932455507505844, |
|
"learning_rate": 5.756322261074232e-07, |
|
"loss": 0.2105, |
|
"step": 6255 |
|
}, |
|
{ |
|
"epoch": 3.6122331217541834, |
|
"grad_norm": 0.47438897533582397, |
|
"learning_rate": 5.672419512758909e-07, |
|
"loss": 0.2046, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 3.615118291979227, |
|
"grad_norm": 0.49163449448043284, |
|
"learning_rate": 5.589114883278423e-07, |
|
"loss": 0.2106, |
|
"step": 6265 |
|
}, |
|
{ |
|
"epoch": 3.6180034622042703, |
|
"grad_norm": 0.4948571949148204, |
|
"learning_rate": 5.506408900856508e-07, |
|
"loss": 0.2019, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 3.6208886324293132, |
|
"grad_norm": 0.5239387280536255, |
|
"learning_rate": 5.424302089920974e-07, |
|
"loss": 0.2152, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 3.6237738026543567, |
|
"grad_norm": 0.4645502539433645, |
|
"learning_rate": 5.342794971100374e-07, |
|
"loss": 0.2013, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 3.6266589728794, |
|
"grad_norm": 0.5644959774991677, |
|
"learning_rate": 5.261888061220643e-07, |
|
"loss": 0.2022, |
|
"step": 6285 |
|
}, |
|
{ |
|
"epoch": 3.629544143104443, |
|
"grad_norm": 0.48442583058736516, |
|
"learning_rate": 5.181581873301944e-07, |
|
"loss": 0.2036, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 3.6324293133294865, |
|
"grad_norm": 0.515528269609854, |
|
"learning_rate": 5.101876916555271e-07, |
|
"loss": 0.204, |
|
"step": 6295 |
|
}, |
|
{ |
|
"epoch": 3.63531448355453, |
|
"grad_norm": 0.5338898797051952, |
|
"learning_rate": 5.022773696379313e-07, |
|
"loss": 0.2033, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.638199653779573, |
|
"grad_norm": 0.4794106672832799, |
|
"learning_rate": 4.944272714357223e-07, |
|
"loss": 0.2061, |
|
"step": 6305 |
|
}, |
|
{ |
|
"epoch": 3.6410848240046163, |
|
"grad_norm": 0.49326404127777695, |
|
"learning_rate": 4.866374468253487e-07, |
|
"loss": 0.1928, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 3.6439699942296597, |
|
"grad_norm": 0.5247706559589024, |
|
"learning_rate": 4.789079452010648e-07, |
|
"loss": 0.2058, |
|
"step": 6315 |
|
}, |
|
{ |
|
"epoch": 3.6468551644547027, |
|
"grad_norm": 0.4781901107121829, |
|
"learning_rate": 4.7123881557462834e-07, |
|
"loss": 0.2045, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 3.649740334679746, |
|
"grad_norm": 0.4743243681177699, |
|
"learning_rate": 4.6363010657498597e-07, |
|
"loss": 0.2118, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 3.6526255049047895, |
|
"grad_norm": 0.5180788089096849, |
|
"learning_rate": 4.560818664479671e-07, |
|
"loss": 0.1981, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 3.6555106751298325, |
|
"grad_norm": 0.479204470483921, |
|
"learning_rate": 4.485941430559726e-07, |
|
"loss": 0.2242, |
|
"step": 6335 |
|
}, |
|
{ |
|
"epoch": 3.658395845354876, |
|
"grad_norm": 0.49576155614359946, |
|
"learning_rate": 4.411669838776733e-07, |
|
"loss": 0.1973, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 3.6612810155799194, |
|
"grad_norm": 0.48614229875693404, |
|
"learning_rate": 4.338004360077164e-07, |
|
"loss": 0.1984, |
|
"step": 6345 |
|
}, |
|
{ |
|
"epoch": 3.6641661858049623, |
|
"grad_norm": 0.46848988610190684, |
|
"learning_rate": 4.2649454615641737e-07, |
|
"loss": 0.2052, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 3.6670513560300058, |
|
"grad_norm": 0.5277331889272244, |
|
"learning_rate": 4.1924936064946633e-07, |
|
"loss": 0.1966, |
|
"step": 6355 |
|
}, |
|
{ |
|
"epoch": 3.669936526255049, |
|
"grad_norm": 0.48392147766138405, |
|
"learning_rate": 4.120649254276321e-07, |
|
"loss": 0.2009, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 3.672821696480092, |
|
"grad_norm": 0.48495642382601156, |
|
"learning_rate": 4.04941286046483e-07, |
|
"loss": 0.2158, |
|
"step": 6365 |
|
}, |
|
{ |
|
"epoch": 3.6757068667051356, |
|
"grad_norm": 0.49853651987090997, |
|
"learning_rate": 3.9787848767607997e-07, |
|
"loss": 0.2097, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 3.678592036930179, |
|
"grad_norm": 0.49229974849216307, |
|
"learning_rate": 3.908765751007038e-07, |
|
"loss": 0.2174, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 3.681477207155222, |
|
"grad_norm": 0.49794648252416773, |
|
"learning_rate": 3.839355927185662e-07, |
|
"loss": 0.2119, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 3.6843623773802654, |
|
"grad_norm": 0.7006970650191741, |
|
"learning_rate": 3.770555845415258e-07, |
|
"loss": 0.2038, |
|
"step": 6385 |
|
}, |
|
{ |
|
"epoch": 3.687247547605309, |
|
"grad_norm": 0.5062300259095932, |
|
"learning_rate": 3.7023659419481275e-07, |
|
"loss": 0.2026, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 3.690132717830352, |
|
"grad_norm": 0.49987127156630407, |
|
"learning_rate": 3.6347866491675323e-07, |
|
"loss": 0.2016, |
|
"step": 6395 |
|
}, |
|
{ |
|
"epoch": 3.693017888055395, |
|
"grad_norm": 0.5019423482974231, |
|
"learning_rate": 3.5678183955849323e-07, |
|
"loss": 0.2123, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.6959030582804386, |
|
"grad_norm": 0.49420552946041524, |
|
"learning_rate": 3.5014616058372306e-07, |
|
"loss": 0.2066, |
|
"step": 6405 |
|
}, |
|
{ |
|
"epoch": 3.6987882285054816, |
|
"grad_norm": 0.483613460902034, |
|
"learning_rate": 3.435716700684133e-07, |
|
"loss": 0.2142, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 3.701673398730525, |
|
"grad_norm": 0.8317874119600014, |
|
"learning_rate": 3.3705840970054916e-07, |
|
"loss": 0.1973, |
|
"step": 6415 |
|
}, |
|
{ |
|
"epoch": 3.7045585689555685, |
|
"grad_norm": 0.4583040079124961, |
|
"learning_rate": 3.3060642077986313e-07, |
|
"loss": 0.2027, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 3.7074437391806114, |
|
"grad_norm": 0.4698006325080622, |
|
"learning_rate": 3.242157442175686e-07, |
|
"loss": 0.1926, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 3.710328909405655, |
|
"grad_norm": 0.4935876897031322, |
|
"learning_rate": 3.1788642053610984e-07, |
|
"loss": 0.1982, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 3.7132140796306983, |
|
"grad_norm": 0.4871065386845346, |
|
"learning_rate": 3.1161848986890117e-07, |
|
"loss": 0.201, |
|
"step": 6435 |
|
}, |
|
{ |
|
"epoch": 3.7160992498557412, |
|
"grad_norm": 0.5134006486764362, |
|
"learning_rate": 3.054119919600673e-07, |
|
"loss": 0.2103, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 3.7189844200807847, |
|
"grad_norm": 0.5247799897213314, |
|
"learning_rate": 2.9926696616420227e-07, |
|
"loss": 0.2158, |
|
"step": 6445 |
|
}, |
|
{ |
|
"epoch": 3.721869590305828, |
|
"grad_norm": 0.45147630286318213, |
|
"learning_rate": 2.9318345144610627e-07, |
|
"loss": 0.2007, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 3.724754760530871, |
|
"grad_norm": 0.480778845212167, |
|
"learning_rate": 2.8716148638055166e-07, |
|
"loss": 0.216, |
|
"step": 6455 |
|
}, |
|
{ |
|
"epoch": 3.7276399307559145, |
|
"grad_norm": 0.49023422691624635, |
|
"learning_rate": 2.8120110915202945e-07, |
|
"loss": 0.2131, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 3.730525100980958, |
|
"grad_norm": 0.5078622584490374, |
|
"learning_rate": 2.7530235755450886e-07, |
|
"loss": 0.2085, |
|
"step": 6465 |
|
}, |
|
{ |
|
"epoch": 3.733410271206001, |
|
"grad_norm": 0.527238182528253, |
|
"learning_rate": 2.6946526899120693e-07, |
|
"loss": 0.2076, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 3.7362954414310443, |
|
"grad_norm": 0.4829671940741909, |
|
"learning_rate": 2.6368988047433373e-07, |
|
"loss": 0.1978, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 3.7391806116560877, |
|
"grad_norm": 0.5127022353864252, |
|
"learning_rate": 2.5797622862487104e-07, |
|
"loss": 0.2249, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 3.742065781881131, |
|
"grad_norm": 0.4378947388730543, |
|
"learning_rate": 2.523243496723382e-07, |
|
"loss": 0.1999, |
|
"step": 6485 |
|
}, |
|
{ |
|
"epoch": 3.744950952106174, |
|
"grad_norm": 0.4730083265761743, |
|
"learning_rate": 2.467342794545613e-07, |
|
"loss": 0.2036, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 3.7478361223312175, |
|
"grad_norm": 0.5083579379173573, |
|
"learning_rate": 2.412060534174421e-07, |
|
"loss": 0.2134, |
|
"step": 6495 |
|
}, |
|
{ |
|
"epoch": 3.750721292556261, |
|
"grad_norm": 0.5014960077674261, |
|
"learning_rate": 2.35739706614736e-07, |
|
"loss": 0.2066, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.753606462781304, |
|
"grad_norm": 0.505196404447519, |
|
"learning_rate": 2.3033527370783459e-07, |
|
"loss": 0.2118, |
|
"step": 6505 |
|
}, |
|
{ |
|
"epoch": 3.7564916330063474, |
|
"grad_norm": 0.4720849386750261, |
|
"learning_rate": 2.2499278896553678e-07, |
|
"loss": 0.2176, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 3.759376803231391, |
|
"grad_norm": 0.49946759486920755, |
|
"learning_rate": 2.1971228626384233e-07, |
|
"loss": 0.2073, |
|
"step": 6515 |
|
}, |
|
{ |
|
"epoch": 3.762261973456434, |
|
"grad_norm": 0.4836314820421101, |
|
"learning_rate": 2.1449379908572431e-07, |
|
"loss": 0.2079, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 3.765147143681477, |
|
"grad_norm": 0.4848667800466061, |
|
"learning_rate": 2.0933736052092812e-07, |
|
"loss": 0.2244, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 3.7680323139065206, |
|
"grad_norm": 0.4735713481776197, |
|
"learning_rate": 2.0424300326575497e-07, |
|
"loss": 0.2018, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 3.770917484131564, |
|
"grad_norm": 0.5176491575721175, |
|
"learning_rate": 1.992107596228554e-07, |
|
"loss": 0.2109, |
|
"step": 6535 |
|
}, |
|
{ |
|
"epoch": 3.773802654356607, |
|
"grad_norm": 0.49048701673925643, |
|
"learning_rate": 1.942406615010295e-07, |
|
"loss": 0.2048, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 3.7766878245816504, |
|
"grad_norm": 0.4951411517869068, |
|
"learning_rate": 1.8933274041501915e-07, |
|
"loss": 0.1982, |
|
"step": 6545 |
|
}, |
|
{ |
|
"epoch": 3.779572994806694, |
|
"grad_norm": 0.4615884792028935, |
|
"learning_rate": 1.8448702748530723e-07, |
|
"loss": 0.2105, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 3.782458165031737, |
|
"grad_norm": 0.503114785095133, |
|
"learning_rate": 1.7970355343792434e-07, |
|
"loss": 0.1995, |
|
"step": 6555 |
|
}, |
|
{ |
|
"epoch": 3.7853433352567802, |
|
"grad_norm": 0.4531549414892555, |
|
"learning_rate": 1.7498234860425567e-07, |
|
"loss": 0.1954, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 3.7882285054818237, |
|
"grad_norm": 0.4476621701726881, |
|
"learning_rate": 1.7032344292084112e-07, |
|
"loss": 0.207, |
|
"step": 6565 |
|
}, |
|
{ |
|
"epoch": 3.7911136757068666, |
|
"grad_norm": 0.5301338039777519, |
|
"learning_rate": 1.6572686592919107e-07, |
|
"loss": 0.2047, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 3.79399884593191, |
|
"grad_norm": 0.5215830928997757, |
|
"learning_rate": 1.6119264677559977e-07, |
|
"loss": 0.2164, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 3.7968840161569535, |
|
"grad_norm": 0.4581052464950232, |
|
"learning_rate": 1.5672081421095552e-07, |
|
"loss": 0.1954, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 3.7997691863819965, |
|
"grad_norm": 0.4658151305069496, |
|
"learning_rate": 1.5231139659056426e-07, |
|
"loss": 0.2065, |
|
"step": 6585 |
|
}, |
|
{ |
|
"epoch": 3.80265435660704, |
|
"grad_norm": 0.5087191746251587, |
|
"learning_rate": 1.4796442187396397e-07, |
|
"loss": 0.2027, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 3.8055395268320833, |
|
"grad_norm": 0.5505757142185078, |
|
"learning_rate": 1.4367991762475497e-07, |
|
"loss": 0.2095, |
|
"step": 6595 |
|
}, |
|
{ |
|
"epoch": 3.8084246970571263, |
|
"grad_norm": 0.5414562907573153, |
|
"learning_rate": 1.394579110104144e-07, |
|
"loss": 0.2143, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.8113098672821697, |
|
"grad_norm": 0.46524927624850704, |
|
"learning_rate": 1.352984288021353e-07, |
|
"loss": 0.1978, |
|
"step": 6605 |
|
}, |
|
{ |
|
"epoch": 3.814195037507213, |
|
"grad_norm": 0.4628457981686407, |
|
"learning_rate": 1.3120149737464893e-07, |
|
"loss": 0.201, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 3.817080207732256, |
|
"grad_norm": 0.4966243391627818, |
|
"learning_rate": 1.2716714270606057e-07, |
|
"loss": 0.2002, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 3.8199653779572995, |
|
"grad_norm": 0.46771609095809985, |
|
"learning_rate": 1.2319539037768614e-07, |
|
"loss": 0.2009, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 3.822850548182343, |
|
"grad_norm": 0.47874741702870294, |
|
"learning_rate": 1.192862655738858e-07, |
|
"loss": 0.2091, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 3.825735718407386, |
|
"grad_norm": 0.48329839687108117, |
|
"learning_rate": 1.1543979308190844e-07, |
|
"loss": 0.2005, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 3.8286208886324293, |
|
"grad_norm": 0.4891511461415484, |
|
"learning_rate": 1.1165599729173193e-07, |
|
"loss": 0.2088, |
|
"step": 6635 |
|
}, |
|
{ |
|
"epoch": 3.8315060588574728, |
|
"grad_norm": 0.52591217962733, |
|
"learning_rate": 1.0793490219591085e-07, |
|
"loss": 0.2048, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 3.8343912290825157, |
|
"grad_norm": 0.49626455120247676, |
|
"learning_rate": 1.0427653138942118e-07, |
|
"loss": 0.2033, |
|
"step": 6645 |
|
}, |
|
{ |
|
"epoch": 3.837276399307559, |
|
"grad_norm": 0.4673741618418967, |
|
"learning_rate": 1.0068090806951369e-07, |
|
"loss": 0.221, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 3.8401615695326026, |
|
"grad_norm": 0.47678344134659506, |
|
"learning_rate": 9.714805503556413e-08, |
|
"loss": 0.2276, |
|
"step": 6655 |
|
}, |
|
{ |
|
"epoch": 3.8430467397576455, |
|
"grad_norm": 1.143709435388536, |
|
"learning_rate": 9.367799468893102e-08, |
|
"loss": 0.1927, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 3.845931909982689, |
|
"grad_norm": 0.46684620921540665, |
|
"learning_rate": 9.027074903281364e-08, |
|
"loss": 0.1986, |
|
"step": 6665 |
|
}, |
|
{ |
|
"epoch": 3.8488170802077324, |
|
"grad_norm": 0.5307872034727703, |
|
"learning_rate": 8.692633967210872e-08, |
|
"loss": 0.2188, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 3.8517022504327754, |
|
"grad_norm": 0.4735022804301449, |
|
"learning_rate": 8.364478781327956e-08, |
|
"loss": 0.2141, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 3.854587420657819, |
|
"grad_norm": 0.45935591835994294, |
|
"learning_rate": 8.042611426421598e-08, |
|
"loss": 0.2084, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 3.857472590882862, |
|
"grad_norm": 0.515095531011111, |
|
"learning_rate": 7.727033943410678e-08, |
|
"loss": 0.2099, |
|
"step": 6685 |
|
}, |
|
{ |
|
"epoch": 3.860357761107905, |
|
"grad_norm": 0.48719846805820766, |
|
"learning_rate": 7.417748333330533e-08, |
|
"loss": 0.2108, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 3.8632429313329486, |
|
"grad_norm": 0.4535374141566299, |
|
"learning_rate": 7.114756557320745e-08, |
|
"loss": 0.2101, |
|
"step": 6695 |
|
}, |
|
{ |
|
"epoch": 3.866128101557992, |
|
"grad_norm": 0.48820441041592805, |
|
"learning_rate": 6.818060536612381e-08, |
|
"loss": 0.1999, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.869013271783035, |
|
"grad_norm": 0.4752993276059485, |
|
"learning_rate": 6.527662152516323e-08, |
|
"loss": 0.1855, |
|
"step": 6705 |
|
}, |
|
{ |
|
"epoch": 3.8718984420080784, |
|
"grad_norm": 0.4986229869294503, |
|
"learning_rate": 6.243563246410622e-08, |
|
"loss": 0.2105, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 3.874783612233122, |
|
"grad_norm": 0.49185252551299946, |
|
"learning_rate": 5.965765619728947e-08, |
|
"loss": 0.2239, |
|
"step": 6715 |
|
}, |
|
{ |
|
"epoch": 3.877668782458165, |
|
"grad_norm": 0.5713339630980684, |
|
"learning_rate": 5.694271033950038e-08, |
|
"loss": 0.2193, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 3.8805539526832082, |
|
"grad_norm": 0.5344135726437659, |
|
"learning_rate": 5.429081210585274e-08, |
|
"loss": 0.2163, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 3.8834391229082517, |
|
"grad_norm": 0.46373171517071093, |
|
"learning_rate": 5.170197831168677e-08, |
|
"loss": 0.2072, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 3.8863242931332946, |
|
"grad_norm": 0.5108545524577744, |
|
"learning_rate": 4.917622537245925e-08, |
|
"loss": 0.1981, |
|
"step": 6735 |
|
}, |
|
{ |
|
"epoch": 3.889209463358338, |
|
"grad_norm": 0.5134285782488883, |
|
"learning_rate": 4.671356930363691e-08, |
|
"loss": 0.2115, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 3.8920946335833815, |
|
"grad_norm": 0.5112672014805328, |
|
"learning_rate": 4.431402572060095e-08, |
|
"loss": 0.2067, |
|
"step": 6745 |
|
}, |
|
{ |
|
"epoch": 3.8949798038084245, |
|
"grad_norm": 0.5093768129247295, |
|
"learning_rate": 4.197760983854271e-08, |
|
"loss": 0.2094, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 3.897864974033468, |
|
"grad_norm": 0.49789509988732483, |
|
"learning_rate": 3.970433647236926e-08, |
|
"loss": 0.2109, |
|
"step": 6755 |
|
}, |
|
{ |
|
"epoch": 3.9007501442585113, |
|
"grad_norm": 0.45027822200576423, |
|
"learning_rate": 3.749422003661018e-08, |
|
"loss": 0.1992, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 3.9036353144835543, |
|
"grad_norm": 0.4956800653304501, |
|
"learning_rate": 3.5347274545322055e-08, |
|
"loss": 0.2255, |
|
"step": 6765 |
|
}, |
|
{ |
|
"epoch": 3.9065204847085977, |
|
"grad_norm": 0.5131612404624672, |
|
"learning_rate": 3.3263513612006305e-08, |
|
"loss": 0.2123, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 3.909405654933641, |
|
"grad_norm": 0.4801277473215345, |
|
"learning_rate": 3.124295044951931e-08, |
|
"loss": 0.2064, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 3.912290825158684, |
|
"grad_norm": 0.47685091887520487, |
|
"learning_rate": 2.928559786998575e-08, |
|
"loss": 0.2026, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 3.9151759953837275, |
|
"grad_norm": 0.4948070608182228, |
|
"learning_rate": 2.7391468284722056e-08, |
|
"loss": 0.2015, |
|
"step": 6785 |
|
}, |
|
{ |
|
"epoch": 3.918061165608771, |
|
"grad_norm": 0.49666575468745433, |
|
"learning_rate": 2.5560573704157543e-08, |
|
"loss": 0.2127, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 3.9209463358338144, |
|
"grad_norm": 0.517584053580229, |
|
"learning_rate": 2.379292573775338e-08, |
|
"loss": 0.2103, |
|
"step": 6795 |
|
}, |
|
{ |
|
"epoch": 3.9238315060588573, |
|
"grad_norm": 0.4715466573891517, |
|
"learning_rate": 2.2088535593933756e-08, |
|
"loss": 0.2034, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.9267166762839008, |
|
"grad_norm": 0.48184559815395395, |
|
"learning_rate": 2.0447414080014826e-08, |
|
"loss": 0.2163, |
|
"step": 6805 |
|
}, |
|
{ |
|
"epoch": 3.929601846508944, |
|
"grad_norm": 0.47730934367308564, |
|
"learning_rate": 1.8869571602132543e-08, |
|
"loss": 0.2055, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 3.932487016733987, |
|
"grad_norm": 0.509248139581449, |
|
"learning_rate": 1.735501816517937e-08, |
|
"loss": 0.2001, |
|
"step": 6815 |
|
}, |
|
{ |
|
"epoch": 3.9353721869590306, |
|
"grad_norm": 0.5087728221195549, |
|
"learning_rate": 1.59037633727388e-08, |
|
"loss": 0.2079, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 3.938257357184074, |
|
"grad_norm": 0.4986147495940189, |
|
"learning_rate": 1.4515816427029816e-08, |
|
"loss": 0.2173, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 3.9411425274091174, |
|
"grad_norm": 0.5024656322502186, |
|
"learning_rate": 1.3191186128841404e-08, |
|
"loss": 0.1988, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 3.9440276976341604, |
|
"grad_norm": 0.49534516720622374, |
|
"learning_rate": 1.192988087748037e-08, |
|
"loss": 0.2081, |
|
"step": 6835 |
|
}, |
|
{ |
|
"epoch": 3.946912867859204, |
|
"grad_norm": 0.479255083905943, |
|
"learning_rate": 1.073190867071805e-08, |
|
"loss": 0.2017, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 3.9497980380842472, |
|
"grad_norm": 0.4632454498993253, |
|
"learning_rate": 9.597277104739233e-09, |
|
"loss": 0.2044, |
|
"step": 6845 |
|
}, |
|
{ |
|
"epoch": 3.95268320830929, |
|
"grad_norm": 0.5211397474253632, |
|
"learning_rate": 8.525993374095543e-09, |
|
"loss": 0.2058, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 3.9555683785343336, |
|
"grad_norm": 0.48574173728886655, |
|
"learning_rate": 7.518064271654357e-09, |
|
"loss": 0.215, |
|
"step": 6855 |
|
}, |
|
{ |
|
"epoch": 3.958453548759377, |
|
"grad_norm": 0.4668619180516313, |
|
"learning_rate": 6.573496188565509e-09, |
|
"loss": 0.1986, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 3.96133871898442, |
|
"grad_norm": 0.47768436258947716, |
|
"learning_rate": 5.6922951142079904e-09, |
|
"loss": 0.1948, |
|
"step": 6865 |
|
}, |
|
{ |
|
"epoch": 3.9642238892094634, |
|
"grad_norm": 0.5138599831276915, |
|
"learning_rate": 4.874466636164421e-09, |
|
"loss": 0.2113, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 3.967109059434507, |
|
"grad_norm": 0.5000185830706366, |
|
"learning_rate": 4.120015940177746e-09, |
|
"loss": 0.2256, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 3.96999422965955, |
|
"grad_norm": 0.4866192841385196, |
|
"learning_rate": 3.4289478101201536e-09, |
|
"loss": 0.2063, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 3.9728793998845933, |
|
"grad_norm": 0.47456211919715674, |
|
"learning_rate": 2.8012666279653155e-09, |
|
"loss": 0.2031, |
|
"step": 6885 |
|
}, |
|
{ |
|
"epoch": 3.9757645701096367, |
|
"grad_norm": 0.4973916489751191, |
|
"learning_rate": 2.2369763737573046e-09, |
|
"loss": 0.2013, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 3.9786497403346797, |
|
"grad_norm": 0.50633061407573, |
|
"learning_rate": 1.7360806255861673e-09, |
|
"loss": 0.2108, |
|
"step": 6895 |
|
}, |
|
{ |
|
"epoch": 3.981534910559723, |
|
"grad_norm": 0.4761645591299464, |
|
"learning_rate": 1.29858255956683e-09, |
|
"loss": 0.2145, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.9844200807847665, |
|
"grad_norm": 0.4667942094809914, |
|
"learning_rate": 9.244849498168951e-10, |
|
"loss": 0.1998, |
|
"step": 6905 |
|
}, |
|
{ |
|
"epoch": 3.9873052510098095, |
|
"grad_norm": 0.4758585527520601, |
|
"learning_rate": 6.137901684399871e-10, |
|
"loss": 0.2103, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 3.990190421234853, |
|
"grad_norm": 0.5058399856877976, |
|
"learning_rate": 3.665001855113204e-10, |
|
"loss": 0.2037, |
|
"step": 6915 |
|
}, |
|
{ |
|
"epoch": 3.9930755914598963, |
|
"grad_norm": 0.4840062336803948, |
|
"learning_rate": 1.8261656906437553e-10, |
|
"loss": 0.2114, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 3.9959607616849393, |
|
"grad_norm": 0.49250096214422046, |
|
"learning_rate": 6.21404850809082e-11, |
|
"loss": 0.2046, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 3.9988459319099827, |
|
"grad_norm": 0.521991079354229, |
|
"learning_rate": 5.0726974842874035e-12, |
|
"loss": 0.2225, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.5301879048347473, |
|
"eval_runtime": 0.5871, |
|
"eval_samples_per_second": 131.147, |
|
"eval_steps_per_second": 3.406, |
|
"step": 6932 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 6932, |
|
"total_flos": 340525408321536.0, |
|
"train_loss": 0.3252074327502578, |
|
"train_runtime": 5343.4474, |
|
"train_samples_per_second": 41.505, |
|
"train_steps_per_second": 1.297 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 6932, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 694, |
|
"total_flos": 340525408321536.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|