|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1664, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001201923076923077, |
|
"grad_norm": 354.53998230392546, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"loss": 7.4318, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002403846153846154, |
|
"grad_norm": 371.91393245440116, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 7.4355, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003605769230769231, |
|
"grad_norm": 346.9801049377746, |
|
"learning_rate": 7.5e-07, |
|
"loss": 7.2172, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004807692307692308, |
|
"grad_norm": 449.5218610086392, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 5.8268, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.006009615384615385, |
|
"grad_norm": 139.92367559356336, |
|
"learning_rate": 1.25e-06, |
|
"loss": 2.5854, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007211538461538462, |
|
"grad_norm": 52.715150799729045, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.5361, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.008413461538461538, |
|
"grad_norm": 19.976853421159532, |
|
"learning_rate": 1.75e-06, |
|
"loss": 0.2878, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.009615384615384616, |
|
"grad_norm": 20.500149107050714, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.2495, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.010817307692307692, |
|
"grad_norm": 17.263902163149385, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.2177, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01201923076923077, |
|
"grad_norm": 12.128764817788255, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.212, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.013221153846153846, |
|
"grad_norm": 8.417030179298662, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"loss": 0.2035, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.014423076923076924, |
|
"grad_norm": 10.874846551654207, |
|
"learning_rate": 3e-06, |
|
"loss": 0.1999, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.015625, |
|
"grad_norm": 2.553082597942841, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"loss": 0.1884, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.016826923076923076, |
|
"grad_norm": 8.091183712435873, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.1729, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.018028846153846152, |
|
"grad_norm": 6.473289695229128, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.1858, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.019230769230769232, |
|
"grad_norm": 10.845224583341055, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.1779, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.020432692307692308, |
|
"grad_norm": 7.588560990570617, |
|
"learning_rate": 4.25e-06, |
|
"loss": 0.1807, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.021634615384615384, |
|
"grad_norm": 3.2833536176531437, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.1741, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02283653846153846, |
|
"grad_norm": 9.48172518986478, |
|
"learning_rate": 4.75e-06, |
|
"loss": 0.17, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02403846153846154, |
|
"grad_norm": 10.178982049068438, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1603, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.025240384615384616, |
|
"grad_norm": 2.410777001535273, |
|
"learning_rate": 4.999981288993133e-06, |
|
"loss": 0.1772, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.026442307692307692, |
|
"grad_norm": 11.143463053654319, |
|
"learning_rate": 4.999925156252611e-06, |
|
"loss": 0.1799, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.027644230769230768, |
|
"grad_norm": 9.886918600788055, |
|
"learning_rate": 4.9998316026186755e-06, |
|
"loss": 0.1773, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.028846153846153848, |
|
"grad_norm": 7.527819471799396, |
|
"learning_rate": 4.999700629491713e-06, |
|
"loss": 0.176, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.030048076923076924, |
|
"grad_norm": 7.572713386840099, |
|
"learning_rate": 4.999532238832233e-06, |
|
"loss": 0.153, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 3.477539644705173, |
|
"learning_rate": 4.999326433160844e-06, |
|
"loss": 0.1588, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03245192307692308, |
|
"grad_norm": 2.5927408403215266, |
|
"learning_rate": 4.999083215558211e-06, |
|
"loss": 0.1657, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03365384615384615, |
|
"grad_norm": 1.8986386268295627, |
|
"learning_rate": 4.998802589665009e-06, |
|
"loss": 0.1624, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03485576923076923, |
|
"grad_norm": 4.511287232603737, |
|
"learning_rate": 4.998484559681875e-06, |
|
"loss": 0.1604, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.036057692307692304, |
|
"grad_norm": 3.1615021035675586, |
|
"learning_rate": 4.998129130369338e-06, |
|
"loss": 0.1541, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.037259615384615384, |
|
"grad_norm": 2.3641598718509163, |
|
"learning_rate": 4.997736307047748e-06, |
|
"loss": 0.1609, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.038461538461538464, |
|
"grad_norm": 2.1050208268263018, |
|
"learning_rate": 4.997306095597203e-06, |
|
"loss": 0.1628, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.039663461538461536, |
|
"grad_norm": 4.670481996347925, |
|
"learning_rate": 4.996838502457453e-06, |
|
"loss": 0.1605, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.040865384615384616, |
|
"grad_norm": 2.812878853953604, |
|
"learning_rate": 4.99633353462781e-06, |
|
"loss": 0.1394, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.042067307692307696, |
|
"grad_norm": 3.3453547259139658, |
|
"learning_rate": 4.995791199667038e-06, |
|
"loss": 0.1353, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04326923076923077, |
|
"grad_norm": 9.508467033910211, |
|
"learning_rate": 4.9952115056932445e-06, |
|
"loss": 0.1464, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.04447115384615385, |
|
"grad_norm": 6.12221208629695, |
|
"learning_rate": 4.994594461383756e-06, |
|
"loss": 0.1534, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.04567307692307692, |
|
"grad_norm": 3.2936985183570644, |
|
"learning_rate": 4.993940075974988e-06, |
|
"loss": 0.1551, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.046875, |
|
"grad_norm": 4.147948524170783, |
|
"learning_rate": 4.993248359262308e-06, |
|
"loss": 0.1599, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.04807692307692308, |
|
"grad_norm": 6.328245841206747, |
|
"learning_rate": 4.99251932159989e-06, |
|
"loss": 0.1433, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04927884615384615, |
|
"grad_norm": 3.454386437171457, |
|
"learning_rate": 4.991752973900558e-06, |
|
"loss": 0.1589, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.05048076923076923, |
|
"grad_norm": 4.628897164295182, |
|
"learning_rate": 4.9909493276356184e-06, |
|
"loss": 0.16, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.051682692307692304, |
|
"grad_norm": 4.959693562958358, |
|
"learning_rate": 4.990108394834698e-06, |
|
"loss": 0.1504, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.052884615384615384, |
|
"grad_norm": 2.037506021404142, |
|
"learning_rate": 4.9892301880855565e-06, |
|
"loss": 0.1469, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.054086538461538464, |
|
"grad_norm": 3.2873483486362898, |
|
"learning_rate": 4.988314720533899e-06, |
|
"loss": 0.152, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.055288461538461536, |
|
"grad_norm": 3.5444104019705165, |
|
"learning_rate": 4.987362005883182e-06, |
|
"loss": 0.1396, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.056490384615384616, |
|
"grad_norm": 2.43315278564924, |
|
"learning_rate": 4.986372058394404e-06, |
|
"loss": 0.1365, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.057692307692307696, |
|
"grad_norm": 3.1953964952618015, |
|
"learning_rate": 4.985344892885899e-06, |
|
"loss": 0.158, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.05889423076923077, |
|
"grad_norm": 2.0524573584491814, |
|
"learning_rate": 4.984280524733107e-06, |
|
"loss": 0.1571, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.06009615384615385, |
|
"grad_norm": 6.255981390751074, |
|
"learning_rate": 4.983178969868346e-06, |
|
"loss": 0.1464, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06129807692307692, |
|
"grad_norm": 2.401646579094803, |
|
"learning_rate": 4.98204024478058e-06, |
|
"loss": 0.1417, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 4.086961173737914, |
|
"learning_rate": 4.980864366515159e-06, |
|
"loss": 0.1541, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.06370192307692307, |
|
"grad_norm": 5.143484575050959, |
|
"learning_rate": 4.97965135267358e-06, |
|
"loss": 0.1499, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.06490384615384616, |
|
"grad_norm": 9.335258253064257, |
|
"learning_rate": 4.978401221413209e-06, |
|
"loss": 0.1684, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.06610576923076923, |
|
"grad_norm": 7.980472809228967, |
|
"learning_rate": 4.977113991447017e-06, |
|
"loss": 0.1663, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0673076923076923, |
|
"grad_norm": 5.567091210665335, |
|
"learning_rate": 4.9757896820433015e-06, |
|
"loss": 0.1496, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.06850961538461539, |
|
"grad_norm": 5.007400500292786, |
|
"learning_rate": 4.9744283130253905e-06, |
|
"loss": 0.1415, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.06971153846153846, |
|
"grad_norm": 2.720822334329411, |
|
"learning_rate": 4.973029904771353e-06, |
|
"loss": 0.1541, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.07091346153846154, |
|
"grad_norm": 6.0293181825471605, |
|
"learning_rate": 4.97159447821369e-06, |
|
"loss": 0.1334, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.07211538461538461, |
|
"grad_norm": 2.190960264135707, |
|
"learning_rate": 4.9701220548390215e-06, |
|
"loss": 0.1353, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0733173076923077, |
|
"grad_norm": 1.8394820554560345, |
|
"learning_rate": 4.968612656687768e-06, |
|
"loss": 0.1424, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.07451923076923077, |
|
"grad_norm": 1.890530118257683, |
|
"learning_rate": 4.967066306353816e-06, |
|
"loss": 0.161, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.07572115384615384, |
|
"grad_norm": 1.74782914530488, |
|
"learning_rate": 4.965483026984182e-06, |
|
"loss": 0.1391, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"grad_norm": 2.529223967308172, |
|
"learning_rate": 4.963862842278669e-06, |
|
"loss": 0.1509, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.078125, |
|
"grad_norm": 3.2375323783057546, |
|
"learning_rate": 4.962205776489506e-06, |
|
"loss": 0.1452, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07932692307692307, |
|
"grad_norm": 3.289988413600293, |
|
"learning_rate": 4.9605118544209874e-06, |
|
"loss": 0.1369, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.08052884615384616, |
|
"grad_norm": 2.533449561050558, |
|
"learning_rate": 4.958781101429104e-06, |
|
"loss": 0.157, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.08173076923076923, |
|
"grad_norm": 2.3698097270600846, |
|
"learning_rate": 4.9570135434211615e-06, |
|
"loss": 0.1586, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.0829326923076923, |
|
"grad_norm": 2.9294128800046835, |
|
"learning_rate": 4.95520920685539e-06, |
|
"loss": 0.1438, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.08413461538461539, |
|
"grad_norm": 3.199532338265642, |
|
"learning_rate": 4.953368118740555e-06, |
|
"loss": 0.1404, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08533653846153846, |
|
"grad_norm": 5.007008541932283, |
|
"learning_rate": 4.951490306635543e-06, |
|
"loss": 0.1595, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.08653846153846154, |
|
"grad_norm": 6.472389725628937, |
|
"learning_rate": 4.949575798648962e-06, |
|
"loss": 0.1589, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.08774038461538461, |
|
"grad_norm": 2.0940111362516998, |
|
"learning_rate": 4.947624623438707e-06, |
|
"loss": 0.1352, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.0889423076923077, |
|
"grad_norm": 2.6543426483078214, |
|
"learning_rate": 4.9456368102115414e-06, |
|
"loss": 0.1396, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.09014423076923077, |
|
"grad_norm": 4.492736133720869, |
|
"learning_rate": 4.943612388722654e-06, |
|
"loss": 0.1362, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09134615384615384, |
|
"grad_norm": 3.1413400916428698, |
|
"learning_rate": 4.941551389275217e-06, |
|
"loss": 0.1398, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.09254807692307693, |
|
"grad_norm": 7.121010742342612, |
|
"learning_rate": 4.9394538427199305e-06, |
|
"loss": 0.1612, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 2.0494879939162773, |
|
"learning_rate": 4.937319780454559e-06, |
|
"loss": 0.1372, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.09495192307692307, |
|
"grad_norm": 3.41511576689734, |
|
"learning_rate": 4.935149234423468e-06, |
|
"loss": 0.1463, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.09615384615384616, |
|
"grad_norm": 4.070726562354956, |
|
"learning_rate": 4.9329422371171375e-06, |
|
"loss": 0.1534, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09735576923076923, |
|
"grad_norm": 4.8008312467816125, |
|
"learning_rate": 4.930698821571681e-06, |
|
"loss": 0.1603, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.0985576923076923, |
|
"grad_norm": 6.5840675047315225, |
|
"learning_rate": 4.928419021368349e-06, |
|
"loss": 0.1472, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.09975961538461539, |
|
"grad_norm": 4.906437852842057, |
|
"learning_rate": 4.926102870633029e-06, |
|
"loss": 0.1518, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.10096153846153846, |
|
"grad_norm": 2.2753956803841424, |
|
"learning_rate": 4.923750404035729e-06, |
|
"loss": 0.132, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.10216346153846154, |
|
"grad_norm": 10.360890258671878, |
|
"learning_rate": 4.921361656790065e-06, |
|
"loss": 0.1615, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.10336538461538461, |
|
"grad_norm": 3.781202441981427, |
|
"learning_rate": 4.918936664652729e-06, |
|
"loss": 0.1317, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1045673076923077, |
|
"grad_norm": 4.612706315229004, |
|
"learning_rate": 4.9164754639229575e-06, |
|
"loss": 0.1556, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.10576923076923077, |
|
"grad_norm": 10.089023799727872, |
|
"learning_rate": 4.913978091441985e-06, |
|
"loss": 0.1366, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.10697115384615384, |
|
"grad_norm": 2.3844132422742215, |
|
"learning_rate": 4.911444584592495e-06, |
|
"loss": 0.1364, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.10817307692307693, |
|
"grad_norm": 8.01833294402442, |
|
"learning_rate": 4.908874981298058e-06, |
|
"loss": 0.1367, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.109375, |
|
"grad_norm": 2.6816022197266083, |
|
"learning_rate": 4.906269320022566e-06, |
|
"loss": 0.1357, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.11057692307692307, |
|
"grad_norm": 3.540312508006275, |
|
"learning_rate": 4.903627639769656e-06, |
|
"loss": 0.1485, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.11177884615384616, |
|
"grad_norm": 2.127391987641345, |
|
"learning_rate": 4.900949980082127e-06, |
|
"loss": 0.1491, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.11298076923076923, |
|
"grad_norm": 1.8381751149591552, |
|
"learning_rate": 4.898236381041343e-06, |
|
"loss": 0.1378, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.1141826923076923, |
|
"grad_norm": 2.4143842380581355, |
|
"learning_rate": 4.895486883266644e-06, |
|
"loss": 0.134, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11538461538461539, |
|
"grad_norm": 2.33879580368458, |
|
"learning_rate": 4.892701527914725e-06, |
|
"loss": 0.1274, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.11658653846153846, |
|
"grad_norm": 2.677161136207072, |
|
"learning_rate": 4.88988035667903e-06, |
|
"loss": 0.1247, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.11778846153846154, |
|
"grad_norm": 2.224205454303052, |
|
"learning_rate": 4.88702341178912e-06, |
|
"loss": 0.1171, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.11899038461538461, |
|
"grad_norm": 2.331487573448718, |
|
"learning_rate": 4.88413073601005e-06, |
|
"loss": 0.1304, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.1201923076923077, |
|
"grad_norm": 3.904053698054214, |
|
"learning_rate": 4.8812023726417194e-06, |
|
"loss": 0.1441, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12139423076923077, |
|
"grad_norm": 1.9333636759461283, |
|
"learning_rate": 4.878238365518231e-06, |
|
"loss": 0.1473, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.12259615384615384, |
|
"grad_norm": 4.5752867405646205, |
|
"learning_rate": 4.87523875900723e-06, |
|
"loss": 0.1337, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.12379807692307693, |
|
"grad_norm": 1.857974634859215, |
|
"learning_rate": 4.872203598009244e-06, |
|
"loss": 0.127, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 3.153527922810332, |
|
"learning_rate": 4.869132927957007e-06, |
|
"loss": 0.1484, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.12620192307692307, |
|
"grad_norm": 2.2228179237011534, |
|
"learning_rate": 4.866026794814781e-06, |
|
"loss": 0.1306, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12740384615384615, |
|
"grad_norm": 1.7350718661408604, |
|
"learning_rate": 4.862885245077669e-06, |
|
"loss": 0.1352, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.12860576923076922, |
|
"grad_norm": 2.1132426954959924, |
|
"learning_rate": 4.859708325770919e-06, |
|
"loss": 0.1416, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.12980769230769232, |
|
"grad_norm": 1.8563726212012472, |
|
"learning_rate": 4.856496084449218e-06, |
|
"loss": 0.1461, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.1310096153846154, |
|
"grad_norm": 1.8179558309835169, |
|
"learning_rate": 4.85324856919598e-06, |
|
"loss": 0.1322, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.13221153846153846, |
|
"grad_norm": 4.497720485766678, |
|
"learning_rate": 4.849965828622632e-06, |
|
"loss": 0.1275, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13341346153846154, |
|
"grad_norm": 3.0404012207264843, |
|
"learning_rate": 4.846647911867877e-06, |
|
"loss": 0.1436, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1346153846153846, |
|
"grad_norm": 3.224075088217143, |
|
"learning_rate": 4.8432948685969646e-06, |
|
"loss": 0.1656, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.13581730769230768, |
|
"grad_norm": 3.2443798600258686, |
|
"learning_rate": 4.83990674900095e-06, |
|
"loss": 0.1393, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.13701923076923078, |
|
"grad_norm": 1.786558241709454, |
|
"learning_rate": 4.836483603795935e-06, |
|
"loss": 0.1263, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.13822115384615385, |
|
"grad_norm": 2.1509725801613513, |
|
"learning_rate": 4.8330254842223155e-06, |
|
"loss": 0.1409, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13942307692307693, |
|
"grad_norm": 2.835049924036413, |
|
"learning_rate": 4.829532442044008e-06, |
|
"loss": 0.1319, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.140625, |
|
"grad_norm": 4.679921143965946, |
|
"learning_rate": 4.8260045295476846e-06, |
|
"loss": 0.1506, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.14182692307692307, |
|
"grad_norm": 1.9142698244457717, |
|
"learning_rate": 4.822441799541979e-06, |
|
"loss": 0.15, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.14302884615384615, |
|
"grad_norm": 8.216926584060278, |
|
"learning_rate": 4.818844305356705e-06, |
|
"loss": 0.1508, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.14423076923076922, |
|
"grad_norm": 1.5774872715864894, |
|
"learning_rate": 4.815212100842053e-06, |
|
"loss": 0.1365, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.14543269230769232, |
|
"grad_norm": 4.90886123617284, |
|
"learning_rate": 4.811545240367785e-06, |
|
"loss": 0.1488, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.1466346153846154, |
|
"grad_norm": 2.867193865140862, |
|
"learning_rate": 4.807843778822424e-06, |
|
"loss": 0.1403, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.14783653846153846, |
|
"grad_norm": 2.8742525824591123, |
|
"learning_rate": 4.804107771612427e-06, |
|
"loss": 0.1543, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.14903846153846154, |
|
"grad_norm": 2.3762533430208563, |
|
"learning_rate": 4.800337274661358e-06, |
|
"loss": 0.1375, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.1502403846153846, |
|
"grad_norm": 2.0839909923885447, |
|
"learning_rate": 4.796532344409055e-06, |
|
"loss": 0.1501, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15144230769230768, |
|
"grad_norm": 4.198893033771618, |
|
"learning_rate": 4.7926930378107765e-06, |
|
"loss": 0.1323, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.15264423076923078, |
|
"grad_norm": 6.846739098146311, |
|
"learning_rate": 4.788819412336358e-06, |
|
"loss": 0.1399, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 5.537919034600803, |
|
"learning_rate": 4.784911525969344e-06, |
|
"loss": 0.1233, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.15504807692307693, |
|
"grad_norm": 3.033292609600203, |
|
"learning_rate": 4.780969437206128e-06, |
|
"loss": 0.1478, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 4.62635643989095, |
|
"learning_rate": 4.776993205055067e-06, |
|
"loss": 0.1465, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.15745192307692307, |
|
"grad_norm": 1.6930995723930686, |
|
"learning_rate": 4.772982889035609e-06, |
|
"loss": 0.134, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.15865384615384615, |
|
"grad_norm": 5.190694670180204, |
|
"learning_rate": 4.7689385491773934e-06, |
|
"loss": 0.1397, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.15985576923076922, |
|
"grad_norm": 3.0169437457346104, |
|
"learning_rate": 4.764860246019356e-06, |
|
"loss": 0.1462, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.16105769230769232, |
|
"grad_norm": 6.609894055693969, |
|
"learning_rate": 4.760748040608826e-06, |
|
"loss": 0.1349, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.1622596153846154, |
|
"grad_norm": 4.93984379875883, |
|
"learning_rate": 4.756601994500604e-06, |
|
"loss": 0.1336, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.16346153846153846, |
|
"grad_norm": 4.75518188154229, |
|
"learning_rate": 4.752422169756048e-06, |
|
"loss": 0.146, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.16466346153846154, |
|
"grad_norm": 2.086098294528403, |
|
"learning_rate": 4.748208628942143e-06, |
|
"loss": 0.1419, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.1658653846153846, |
|
"grad_norm": 5.396963944335693, |
|
"learning_rate": 4.7439614351305614e-06, |
|
"loss": 0.1432, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.16706730769230768, |
|
"grad_norm": 7.532501685521908, |
|
"learning_rate": 4.739680651896721e-06, |
|
"loss": 0.145, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.16826923076923078, |
|
"grad_norm": 2.336463554377762, |
|
"learning_rate": 4.7353663433188325e-06, |
|
"loss": 0.1475, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.16947115384615385, |
|
"grad_norm": 5.48654115007209, |
|
"learning_rate": 4.731018573976943e-06, |
|
"loss": 0.1544, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.17067307692307693, |
|
"grad_norm": 2.120931360446975, |
|
"learning_rate": 4.726637408951966e-06, |
|
"loss": 0.1286, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.171875, |
|
"grad_norm": 3.3875599595704498, |
|
"learning_rate": 4.7222229138247076e-06, |
|
"loss": 0.1383, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.17307692307692307, |
|
"grad_norm": 2.0989272460873796, |
|
"learning_rate": 4.717775154674888e-06, |
|
"loss": 0.1168, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.17427884615384615, |
|
"grad_norm": 3.817152405138102, |
|
"learning_rate": 4.713294198080149e-06, |
|
"loss": 0.1257, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.17548076923076922, |
|
"grad_norm": 2.293976238111847, |
|
"learning_rate": 4.708780111115058e-06, |
|
"loss": 0.1358, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.17668269230769232, |
|
"grad_norm": 2.0161046467731407, |
|
"learning_rate": 4.7042329613501035e-06, |
|
"loss": 0.1214, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.1778846153846154, |
|
"grad_norm": 2.3356279678505674, |
|
"learning_rate": 4.699652816850686e-06, |
|
"loss": 0.1296, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.17908653846153846, |
|
"grad_norm": 2.034038118147649, |
|
"learning_rate": 4.6950397461761e-06, |
|
"loss": 0.1163, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.18028846153846154, |
|
"grad_norm": 2.63792392669932, |
|
"learning_rate": 4.690393818378501e-06, |
|
"loss": 0.1269, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1814903846153846, |
|
"grad_norm": 2.75722633258936, |
|
"learning_rate": 4.685715103001879e-06, |
|
"loss": 0.1243, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.18269230769230768, |
|
"grad_norm": 2.0819705788021183, |
|
"learning_rate": 4.681003670081015e-06, |
|
"loss": 0.1304, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.18389423076923078, |
|
"grad_norm": 3.4298950454490176, |
|
"learning_rate": 4.676259590140431e-06, |
|
"loss": 0.1377, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.18509615384615385, |
|
"grad_norm": 2.471860576622299, |
|
"learning_rate": 4.671482934193337e-06, |
|
"loss": 0.1356, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.18629807692307693, |
|
"grad_norm": 4.8199475175470825, |
|
"learning_rate": 4.666673773740568e-06, |
|
"loss": 0.125, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 3.178092400708656, |
|
"learning_rate": 4.66183218076951e-06, |
|
"loss": 0.1365, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.18870192307692307, |
|
"grad_norm": 5.888487682413386, |
|
"learning_rate": 4.656958227753028e-06, |
|
"loss": 0.1415, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.18990384615384615, |
|
"grad_norm": 1.7792863825981573, |
|
"learning_rate": 4.652051987648375e-06, |
|
"loss": 0.1416, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.19110576923076922, |
|
"grad_norm": 2.9722465375990836, |
|
"learning_rate": 4.647113533896106e-06, |
|
"loss": 0.1396, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 2.4672094667500475, |
|
"learning_rate": 4.642142940418973e-06, |
|
"loss": 0.1248, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1935096153846154, |
|
"grad_norm": 2.7077423897634914, |
|
"learning_rate": 4.637140281620825e-06, |
|
"loss": 0.1383, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.19471153846153846, |
|
"grad_norm": 4.807221287056447, |
|
"learning_rate": 4.632105632385488e-06, |
|
"loss": 0.1361, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.19591346153846154, |
|
"grad_norm": 5.728075789744543, |
|
"learning_rate": 4.627039068075647e-06, |
|
"loss": 0.1444, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.1971153846153846, |
|
"grad_norm": 4.398279898396926, |
|
"learning_rate": 4.621940664531718e-06, |
|
"loss": 0.1486, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.19831730769230768, |
|
"grad_norm": 2.610023546419106, |
|
"learning_rate": 4.6168104980707105e-06, |
|
"loss": 0.1263, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.19951923076923078, |
|
"grad_norm": 3.5181999115242943, |
|
"learning_rate": 4.61164864548509e-06, |
|
"loss": 0.1308, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.20072115384615385, |
|
"grad_norm": 2.1510452933939903, |
|
"learning_rate": 4.606455184041623e-06, |
|
"loss": 0.14, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.20192307692307693, |
|
"grad_norm": 3.244280076674407, |
|
"learning_rate": 4.6012301914802245e-06, |
|
"loss": 0.1211, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.203125, |
|
"grad_norm": 2.4060103217800726, |
|
"learning_rate": 4.595973746012791e-06, |
|
"loss": 0.1331, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.20432692307692307, |
|
"grad_norm": 5.706691276517432, |
|
"learning_rate": 4.590685926322032e-06, |
|
"loss": 0.1275, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.20552884615384615, |
|
"grad_norm": 1.982976110922252, |
|
"learning_rate": 4.585366811560293e-06, |
|
"loss": 0.1236, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.20673076923076922, |
|
"grad_norm": 4.23602021986134, |
|
"learning_rate": 4.580016481348367e-06, |
|
"loss": 0.1361, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.20793269230769232, |
|
"grad_norm": 2.211392940952842, |
|
"learning_rate": 4.574635015774308e-06, |
|
"loss": 0.1255, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.2091346153846154, |
|
"grad_norm": 6.442272520375928, |
|
"learning_rate": 4.569222495392227e-06, |
|
"loss": 0.1344, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.21033653846153846, |
|
"grad_norm": 3.8749351925382594, |
|
"learning_rate": 4.563779001221087e-06, |
|
"loss": 0.1501, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21153846153846154, |
|
"grad_norm": 1.7448331765525071, |
|
"learning_rate": 4.558304614743496e-06, |
|
"loss": 0.1381, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.2127403846153846, |
|
"grad_norm": 5.668086585104286, |
|
"learning_rate": 4.5527994179044785e-06, |
|
"loss": 0.1306, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.21394230769230768, |
|
"grad_norm": 2.5525220134836677, |
|
"learning_rate": 4.547263493110257e-06, |
|
"loss": 0.1386, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.21514423076923078, |
|
"grad_norm": 4.733640695947825, |
|
"learning_rate": 4.54169692322701e-06, |
|
"loss": 0.131, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.21634615384615385, |
|
"grad_norm": 2.4560081882135965, |
|
"learning_rate": 4.536099791579643e-06, |
|
"loss": 0.1332, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.21754807692307693, |
|
"grad_norm": 3.30310384335084, |
|
"learning_rate": 4.530472181950528e-06, |
|
"loss": 0.1452, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 3.96117046469673, |
|
"learning_rate": 4.524814178578261e-06, |
|
"loss": 0.1258, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.21995192307692307, |
|
"grad_norm": 2.1571934099507324, |
|
"learning_rate": 4.519125866156392e-06, |
|
"loss": 0.1268, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.22115384615384615, |
|
"grad_norm": 2.731599995456764, |
|
"learning_rate": 4.5134073298321655e-06, |
|
"loss": 0.1275, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.22235576923076922, |
|
"grad_norm": 6.257464871792732, |
|
"learning_rate": 4.5076586552052375e-06, |
|
"loss": 0.136, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.22355769230769232, |
|
"grad_norm": 2.1253597649510496, |
|
"learning_rate": 4.501879928326402e-06, |
|
"loss": 0.1097, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.2247596153846154, |
|
"grad_norm": 4.866268104213111, |
|
"learning_rate": 4.496071235696296e-06, |
|
"loss": 0.1172, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.22596153846153846, |
|
"grad_norm": 2.65071594422531, |
|
"learning_rate": 4.49023266426411e-06, |
|
"loss": 0.1167, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.22716346153846154, |
|
"grad_norm": 3.5486758869705266, |
|
"learning_rate": 4.484364301426285e-06, |
|
"loss": 0.1276, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.2283653846153846, |
|
"grad_norm": 6.057336639310383, |
|
"learning_rate": 4.478466235025203e-06, |
|
"loss": 0.1393, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.22956730769230768, |
|
"grad_norm": 3.204768478540457, |
|
"learning_rate": 4.472538553347871e-06, |
|
"loss": 0.1208, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.23076923076923078, |
|
"grad_norm": 3.509914333448296, |
|
"learning_rate": 4.466581345124605e-06, |
|
"loss": 0.138, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.23197115384615385, |
|
"grad_norm": 1.9711436203178656, |
|
"learning_rate": 4.460594699527695e-06, |
|
"loss": 0.1263, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.23317307692307693, |
|
"grad_norm": 3.331060762805983, |
|
"learning_rate": 4.454578706170075e-06, |
|
"loss": 0.1424, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.234375, |
|
"grad_norm": 2.467895906628356, |
|
"learning_rate": 4.448533455103979e-06, |
|
"loss": 0.1324, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.23557692307692307, |
|
"grad_norm": 1.5497019030552028, |
|
"learning_rate": 4.442459036819595e-06, |
|
"loss": 0.1319, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.23677884615384615, |
|
"grad_norm": 2.822888845766881, |
|
"learning_rate": 4.4363555422437095e-06, |
|
"loss": 0.1272, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.23798076923076922, |
|
"grad_norm": 2.5029600610225695, |
|
"learning_rate": 4.430223062738344e-06, |
|
"loss": 0.128, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.23918269230769232, |
|
"grad_norm": 1.9911030442643596, |
|
"learning_rate": 4.424061690099392e-06, |
|
"loss": 0.1365, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.2403846153846154, |
|
"grad_norm": 2.062089943745463, |
|
"learning_rate": 4.417871516555241e-06, |
|
"loss": 0.1287, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24158653846153846, |
|
"grad_norm": 3.3929814258858784, |
|
"learning_rate": 4.411652634765398e-06, |
|
"loss": 0.1354, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.24278846153846154, |
|
"grad_norm": 2.47220919536628, |
|
"learning_rate": 4.4054051378190915e-06, |
|
"loss": 0.1243, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.2439903846153846, |
|
"grad_norm": 4.8206335139544265, |
|
"learning_rate": 4.39912911923389e-06, |
|
"loss": 0.1225, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.24519230769230768, |
|
"grad_norm": 6.242330191426054, |
|
"learning_rate": 4.392824672954295e-06, |
|
"loss": 0.1495, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.24639423076923078, |
|
"grad_norm": 4.388543194744337, |
|
"learning_rate": 4.386491893350334e-06, |
|
"loss": 0.1225, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.24759615384615385, |
|
"grad_norm": 3.190150517704449, |
|
"learning_rate": 4.380130875216156e-06, |
|
"loss": 0.1255, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.24879807692307693, |
|
"grad_norm": 2.213731831929862, |
|
"learning_rate": 4.373741713768605e-06, |
|
"loss": 0.1356, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.7046643388864546, |
|
"learning_rate": 4.367324504645793e-06, |
|
"loss": 0.1374, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.2512019230769231, |
|
"grad_norm": 2.1009699551445977, |
|
"learning_rate": 4.360879343905677e-06, |
|
"loss": 0.1332, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.25240384615384615, |
|
"grad_norm": 2.650338768654261, |
|
"learning_rate": 4.354406328024613e-06, |
|
"loss": 0.1314, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2536057692307692, |
|
"grad_norm": 2.810331149813075, |
|
"learning_rate": 4.347905553895918e-06, |
|
"loss": 0.1295, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.2548076923076923, |
|
"grad_norm": 3.814782755239228, |
|
"learning_rate": 4.341377118828415e-06, |
|
"loss": 0.1193, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.25600961538461536, |
|
"grad_norm": 2.9998629650762405, |
|
"learning_rate": 4.33482112054498e-06, |
|
"loss": 0.131, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.25721153846153844, |
|
"grad_norm": 2.7743819788707365, |
|
"learning_rate": 4.3282376571810745e-06, |
|
"loss": 0.1262, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.25841346153846156, |
|
"grad_norm": 2.9124275491739255, |
|
"learning_rate": 4.32162682728328e-06, |
|
"loss": 0.1256, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.25961538461538464, |
|
"grad_norm": 2.1687486550981805, |
|
"learning_rate": 4.3149887298078275e-06, |
|
"loss": 0.1355, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.2608173076923077, |
|
"grad_norm": 3.46835598153599, |
|
"learning_rate": 4.308323464119103e-06, |
|
"loss": 0.1294, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.2620192307692308, |
|
"grad_norm": 3.1610918409226603, |
|
"learning_rate": 4.301631129988174e-06, |
|
"loss": 0.1179, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.26322115384615385, |
|
"grad_norm": 3.5692561412500914, |
|
"learning_rate": 4.294911827591288e-06, |
|
"loss": 0.1316, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.2644230769230769, |
|
"grad_norm": 3.1685646314642955, |
|
"learning_rate": 4.288165657508377e-06, |
|
"loss": 0.1287, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.265625, |
|
"grad_norm": 2.6180618695713695, |
|
"learning_rate": 4.281392720721546e-06, |
|
"loss": 0.1225, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.2668269230769231, |
|
"grad_norm": 1.8843206020979073, |
|
"learning_rate": 4.274593118613569e-06, |
|
"loss": 0.1116, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.26802884615384615, |
|
"grad_norm": 2.541701491013687, |
|
"learning_rate": 4.267766952966369e-06, |
|
"loss": 0.131, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.2692307692307692, |
|
"grad_norm": 2.7752887478918185, |
|
"learning_rate": 4.260914325959491e-06, |
|
"loss": 0.134, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.2704326923076923, |
|
"grad_norm": 2.2244522364780748, |
|
"learning_rate": 4.254035340168577e-06, |
|
"loss": 0.1331, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.27163461538461536, |
|
"grad_norm": 2.1577691411009186, |
|
"learning_rate": 4.247130098563825e-06, |
|
"loss": 0.1356, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.27283653846153844, |
|
"grad_norm": 2.495807078564746, |
|
"learning_rate": 4.2401987045084544e-06, |
|
"loss": 0.1285, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.27403846153846156, |
|
"grad_norm": 2.315378410961849, |
|
"learning_rate": 4.233241261757155e-06, |
|
"loss": 0.1314, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.27524038461538464, |
|
"grad_norm": 2.3360381121240485, |
|
"learning_rate": 4.226257874454535e-06, |
|
"loss": 0.1335, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.2764423076923077, |
|
"grad_norm": 4.342066939412811, |
|
"learning_rate": 4.219248647133559e-06, |
|
"loss": 0.1407, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.2776442307692308, |
|
"grad_norm": 2.3663888374606032, |
|
"learning_rate": 4.212213684713987e-06, |
|
"loss": 0.1224, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.27884615384615385, |
|
"grad_norm": 3.0614706455153553, |
|
"learning_rate": 4.205153092500805e-06, |
|
"loss": 0.1229, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.2800480769230769, |
|
"grad_norm": 2.380259494398439, |
|
"learning_rate": 4.198066976182644e-06, |
|
"loss": 0.1292, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 4.013842010005791, |
|
"learning_rate": 4.1909554418302e-06, |
|
"loss": 0.134, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.2824519230769231, |
|
"grad_norm": 1.778945958084193, |
|
"learning_rate": 4.183818595894648e-06, |
|
"loss": 0.1428, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.28365384615384615, |
|
"grad_norm": 3.750377365680276, |
|
"learning_rate": 4.176656545206046e-06, |
|
"loss": 0.1291, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.2848557692307692, |
|
"grad_norm": 1.9066583171893872, |
|
"learning_rate": 4.169469396971739e-06, |
|
"loss": 0.1176, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.2860576923076923, |
|
"grad_norm": 3.7582224188634736, |
|
"learning_rate": 4.16225725877475e-06, |
|
"loss": 0.1249, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.28725961538461536, |
|
"grad_norm": 2.7825989989563564, |
|
"learning_rate": 4.155020238572174e-06, |
|
"loss": 0.1109, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.28846153846153844, |
|
"grad_norm": 4.879245102252371, |
|
"learning_rate": 4.147758444693557e-06, |
|
"loss": 0.1364, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.28966346153846156, |
|
"grad_norm": 3.2182991915950394, |
|
"learning_rate": 4.140471985839281e-06, |
|
"loss": 0.1271, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.29086538461538464, |
|
"grad_norm": 2.166479148262207, |
|
"learning_rate": 4.13316097107893e-06, |
|
"loss": 0.1213, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.2920673076923077, |
|
"grad_norm": 2.47776248902879, |
|
"learning_rate": 4.125825509849662e-06, |
|
"loss": 0.1193, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.2932692307692308, |
|
"grad_norm": 2.540451340281278, |
|
"learning_rate": 4.11846571195457e-06, |
|
"loss": 0.119, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.29447115384615385, |
|
"grad_norm": 3.2230059766589814, |
|
"learning_rate": 4.111081687561036e-06, |
|
"loss": 0.1276, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2956730769230769, |
|
"grad_norm": 2.835333516397744, |
|
"learning_rate": 4.103673547199087e-06, |
|
"loss": 0.1241, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.296875, |
|
"grad_norm": 2.752629007829119, |
|
"learning_rate": 4.096241401759732e-06, |
|
"loss": 0.1239, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.2980769230769231, |
|
"grad_norm": 1.8919133248892268, |
|
"learning_rate": 4.0887853624933134e-06, |
|
"loss": 0.1239, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.29927884615384615, |
|
"grad_norm": 2.8561871397763774, |
|
"learning_rate": 4.081305541007832e-06, |
|
"loss": 0.1289, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.3004807692307692, |
|
"grad_norm": 1.6600940797126917, |
|
"learning_rate": 4.07380204926728e-06, |
|
"loss": 0.1384, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3016826923076923, |
|
"grad_norm": 2.404290817625276, |
|
"learning_rate": 4.066274999589967e-06, |
|
"loss": 0.1299, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.30288461538461536, |
|
"grad_norm": 1.9475394667243153, |
|
"learning_rate": 4.058724504646834e-06, |
|
"loss": 0.1259, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.30408653846153844, |
|
"grad_norm": 3.0051337393851143, |
|
"learning_rate": 4.051150677459772e-06, |
|
"loss": 0.1237, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.30528846153846156, |
|
"grad_norm": 2.1578955093603063, |
|
"learning_rate": 4.043553631399928e-06, |
|
"loss": 0.1202, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.30649038461538464, |
|
"grad_norm": 6.142783994800525, |
|
"learning_rate": 4.035933480186005e-06, |
|
"loss": 0.1347, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 2.954830082548502, |
|
"learning_rate": 4.028290337882565e-06, |
|
"loss": 0.1437, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.3088942307692308, |
|
"grad_norm": 2.2019067150054386, |
|
"learning_rate": 4.020624318898319e-06, |
|
"loss": 0.1307, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.31009615384615385, |
|
"grad_norm": 4.54110621977567, |
|
"learning_rate": 4.012935537984414e-06, |
|
"loss": 0.1335, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.3112980769230769, |
|
"grad_norm": 2.85625320530939, |
|
"learning_rate": 4.005224110232715e-06, |
|
"loss": 0.1317, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 2.3576527639858895, |
|
"learning_rate": 3.997490151074085e-06, |
|
"loss": 0.1284, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3137019230769231, |
|
"grad_norm": 2.4537651255404214, |
|
"learning_rate": 3.989733776276654e-06, |
|
"loss": 0.1211, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.31490384615384615, |
|
"grad_norm": 3.352379617409583, |
|
"learning_rate": 3.981955101944088e-06, |
|
"loss": 0.1223, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.3161057692307692, |
|
"grad_norm": 2.233759105251149, |
|
"learning_rate": 3.9741542445138505e-06, |
|
"loss": 0.1279, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.3173076923076923, |
|
"grad_norm": 3.26893899586464, |
|
"learning_rate": 3.966331320755457e-06, |
|
"loss": 0.1308, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.31850961538461536, |
|
"grad_norm": 1.9938930635011727, |
|
"learning_rate": 3.958486447768736e-06, |
|
"loss": 0.1191, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.31971153846153844, |
|
"grad_norm": 1.6739004551575976, |
|
"learning_rate": 3.95061974298206e-06, |
|
"loss": 0.1114, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.32091346153846156, |
|
"grad_norm": 2.1002098020462574, |
|
"learning_rate": 3.942731324150606e-06, |
|
"loss": 0.1229, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.32211538461538464, |
|
"grad_norm": 3.227784545692672, |
|
"learning_rate": 3.934821309354581e-06, |
|
"loss": 0.1282, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.3233173076923077, |
|
"grad_norm": 2.6927966633468134, |
|
"learning_rate": 3.926889816997457e-06, |
|
"loss": 0.1274, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.3245192307692308, |
|
"grad_norm": 3.841849853659577, |
|
"learning_rate": 3.9189369658042e-06, |
|
"loss": 0.1316, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.32572115384615385, |
|
"grad_norm": 2.6872062042849727, |
|
"learning_rate": 3.910962874819495e-06, |
|
"loss": 0.1275, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.3269230769230769, |
|
"grad_norm": 3.6657337480434946, |
|
"learning_rate": 3.9029676634059565e-06, |
|
"loss": 0.1254, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.328125, |
|
"grad_norm": 2.8137841340293352, |
|
"learning_rate": 3.894951451242351e-06, |
|
"loss": 0.1316, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.3293269230769231, |
|
"grad_norm": 1.5503149824535458, |
|
"learning_rate": 3.886914358321796e-06, |
|
"loss": 0.1199, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.33052884615384615, |
|
"grad_norm": 1.9124225846435765, |
|
"learning_rate": 3.8788565049499746e-06, |
|
"loss": 0.1144, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3317307692307692, |
|
"grad_norm": 2.2194257928538974, |
|
"learning_rate": 3.8707780117433276e-06, |
|
"loss": 0.1203, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.3329326923076923, |
|
"grad_norm": 2.2430374522475556, |
|
"learning_rate": 3.8626789996272466e-06, |
|
"loss": 0.1254, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.33413461538461536, |
|
"grad_norm": 1.656547967694163, |
|
"learning_rate": 3.854559589834269e-06, |
|
"loss": 0.1155, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.33533653846153844, |
|
"grad_norm": 2.71535491729536, |
|
"learning_rate": 3.846419903902261e-06, |
|
"loss": 0.1248, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.33653846153846156, |
|
"grad_norm": 4.963796667259708, |
|
"learning_rate": 3.838260063672599e-06, |
|
"loss": 0.1201, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.33774038461538464, |
|
"grad_norm": 2.02605376529183, |
|
"learning_rate": 3.830080191288342e-06, |
|
"loss": 0.1264, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.3389423076923077, |
|
"grad_norm": 2.2760213880197124, |
|
"learning_rate": 3.82188040919241e-06, |
|
"loss": 0.1121, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.3401442307692308, |
|
"grad_norm": 2.458521927082506, |
|
"learning_rate": 3.813660840125747e-06, |
|
"loss": 0.1322, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.34134615384615385, |
|
"grad_norm": 4.210654399847963, |
|
"learning_rate": 3.805421607125482e-06, |
|
"loss": 0.128, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.3425480769230769, |
|
"grad_norm": 2.161926215111614, |
|
"learning_rate": 3.7971628335230932e-06, |
|
"loss": 0.13, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 3.904255891368641, |
|
"learning_rate": 3.788884642942555e-06, |
|
"loss": 0.1317, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.3449519230769231, |
|
"grad_norm": 2.71934531169795, |
|
"learning_rate": 3.780587159298492e-06, |
|
"loss": 0.1359, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.34615384615384615, |
|
"grad_norm": 2.8064237134830274, |
|
"learning_rate": 3.7722705067943227e-06, |
|
"loss": 0.133, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.3473557692307692, |
|
"grad_norm": 2.5669808093942272, |
|
"learning_rate": 3.763934809920401e-06, |
|
"loss": 0.1312, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.3485576923076923, |
|
"grad_norm": 2.6878698838883883, |
|
"learning_rate": 3.755580193452153e-06, |
|
"loss": 0.126, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.34975961538461536, |
|
"grad_norm": 1.9940547887564615, |
|
"learning_rate": 3.747206782448207e-06, |
|
"loss": 0.1215, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.35096153846153844, |
|
"grad_norm": 2.4246119443294147, |
|
"learning_rate": 3.738814702248524e-06, |
|
"loss": 0.1259, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.35216346153846156, |
|
"grad_norm": 2.448624947878468, |
|
"learning_rate": 3.7304040784725183e-06, |
|
"loss": 0.1265, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.35336538461538464, |
|
"grad_norm": 2.6611405194352544, |
|
"learning_rate": 3.7219750370171843e-06, |
|
"loss": 0.1258, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.3545673076923077, |
|
"grad_norm": 3.9151580028753092, |
|
"learning_rate": 3.7135277040552014e-06, |
|
"loss": 0.1269, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3557692307692308, |
|
"grad_norm": 1.902396245377977, |
|
"learning_rate": 3.7050622060330553e-06, |
|
"loss": 0.1269, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.35697115384615385, |
|
"grad_norm": 2.200109114807576, |
|
"learning_rate": 3.6965786696691386e-06, |
|
"loss": 0.1297, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.3581730769230769, |
|
"grad_norm": 2.640515221983352, |
|
"learning_rate": 3.688077221951857e-06, |
|
"loss": 0.1217, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.359375, |
|
"grad_norm": 2.9478456557798194, |
|
"learning_rate": 3.6795579901377277e-06, |
|
"loss": 0.1206, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.3605769230769231, |
|
"grad_norm": 4.499371410793944, |
|
"learning_rate": 3.671021101749476e-06, |
|
"loss": 0.1159, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.36177884615384615, |
|
"grad_norm": 3.2861013876529266, |
|
"learning_rate": 3.662466684574122e-06, |
|
"loss": 0.1147, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.3629807692307692, |
|
"grad_norm": 2.936797344536718, |
|
"learning_rate": 3.653894866661073e-06, |
|
"loss": 0.1218, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.3641826923076923, |
|
"grad_norm": 2.5284722183745347, |
|
"learning_rate": 3.645305776320205e-06, |
|
"loss": 0.1277, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.36538461538461536, |
|
"grad_norm": 2.0656418154561416, |
|
"learning_rate": 3.636699542119939e-06, |
|
"loss": 0.1226, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.36658653846153844, |
|
"grad_norm": 2.761257208121012, |
|
"learning_rate": 3.628076292885322e-06, |
|
"loss": 0.1176, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.36778846153846156, |
|
"grad_norm": 4.409264331933305, |
|
"learning_rate": 3.6194361576960944e-06, |
|
"loss": 0.1303, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.36899038461538464, |
|
"grad_norm": 2.2897088881849483, |
|
"learning_rate": 3.6107792658847597e-06, |
|
"loss": 0.1166, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.3701923076923077, |
|
"grad_norm": 2.556001831241419, |
|
"learning_rate": 3.602105747034646e-06, |
|
"loss": 0.1238, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.3713942307692308, |
|
"grad_norm": 2.3832438875718442, |
|
"learning_rate": 3.5934157309779714e-06, |
|
"loss": 0.1189, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.37259615384615385, |
|
"grad_norm": 2.256691965422808, |
|
"learning_rate": 3.5847093477938955e-06, |
|
"loss": 0.1324, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.3737980769230769, |
|
"grad_norm": 4.4764970926214325, |
|
"learning_rate": 3.5759867278065752e-06, |
|
"loss": 0.1266, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 2.8438597379920045, |
|
"learning_rate": 3.5672480015832117e-06, |
|
"loss": 0.1258, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.3762019230769231, |
|
"grad_norm": 2.5547304438348193, |
|
"learning_rate": 3.5584932999320986e-06, |
|
"loss": 0.1189, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.37740384615384615, |
|
"grad_norm": 3.861193208078938, |
|
"learning_rate": 3.549722753900662e-06, |
|
"loss": 0.12, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.3786057692307692, |
|
"grad_norm": 2.0271164351237076, |
|
"learning_rate": 3.5409364947734994e-06, |
|
"loss": 0.1034, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3798076923076923, |
|
"grad_norm": 2.661574124686293, |
|
"learning_rate": 3.532134654070415e-06, |
|
"loss": 0.1179, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.38100961538461536, |
|
"grad_norm": 4.444020843792755, |
|
"learning_rate": 3.523317363544449e-06, |
|
"loss": 0.1383, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.38221153846153844, |
|
"grad_norm": 2.0898293018736145, |
|
"learning_rate": 3.5144847551799105e-06, |
|
"loss": 0.128, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.38341346153846156, |
|
"grad_norm": 6.381896171861657, |
|
"learning_rate": 3.5056369611903945e-06, |
|
"loss": 0.135, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 3.3029527185373913, |
|
"learning_rate": 3.496774114016809e-06, |
|
"loss": 0.1367, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3858173076923077, |
|
"grad_norm": 2.3200365246792094, |
|
"learning_rate": 3.487896346325389e-06, |
|
"loss": 0.1244, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.3870192307692308, |
|
"grad_norm": 3.598439324678028, |
|
"learning_rate": 3.4790037910057128e-06, |
|
"loss": 0.131, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.38822115384615385, |
|
"grad_norm": 1.4871335164149173, |
|
"learning_rate": 3.4700965811687106e-06, |
|
"loss": 0.1194, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.3894230769230769, |
|
"grad_norm": 2.4184023479090024, |
|
"learning_rate": 3.461174850144674e-06, |
|
"loss": 0.1213, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"grad_norm": 3.436257185320764, |
|
"learning_rate": 3.4522387314812606e-06, |
|
"loss": 0.1324, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3918269230769231, |
|
"grad_norm": 1.8151625861479124, |
|
"learning_rate": 3.443288358941491e-06, |
|
"loss": 0.1108, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.39302884615384615, |
|
"grad_norm": 1.5261810547328365, |
|
"learning_rate": 3.4343238665017512e-06, |
|
"loss": 0.1105, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.3942307692307692, |
|
"grad_norm": 2.8091934186049063, |
|
"learning_rate": 3.425345388349787e-06, |
|
"loss": 0.1348, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.3954326923076923, |
|
"grad_norm": 2.002504867469609, |
|
"learning_rate": 3.4163530588826877e-06, |
|
"loss": 0.1075, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.39663461538461536, |
|
"grad_norm": 1.925848303593358, |
|
"learning_rate": 3.4073470127048867e-06, |
|
"loss": 0.121, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.39783653846153844, |
|
"grad_norm": 3.4486630510150134, |
|
"learning_rate": 3.3983273846261373e-06, |
|
"loss": 0.13, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.39903846153846156, |
|
"grad_norm": 2.29190337434423, |
|
"learning_rate": 3.3892943096594968e-06, |
|
"loss": 0.1175, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.40024038461538464, |
|
"grad_norm": 2.7382806950058574, |
|
"learning_rate": 3.3802479230193074e-06, |
|
"loss": 0.1355, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.4014423076923077, |
|
"grad_norm": 3.8969395559370286, |
|
"learning_rate": 3.371188360119173e-06, |
|
"loss": 0.1265, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.4026442307692308, |
|
"grad_norm": 2.0972867493422567, |
|
"learning_rate": 3.3621157565699265e-06, |
|
"loss": 0.1182, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.40384615384615385, |
|
"grad_norm": 3.7477223788217673, |
|
"learning_rate": 3.3530302481776062e-06, |
|
"loss": 0.1147, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.4050480769230769, |
|
"grad_norm": 2.585644020351654, |
|
"learning_rate": 3.343931970941421e-06, |
|
"loss": 0.1184, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 2.6033563821440664, |
|
"learning_rate": 3.3348210610517117e-06, |
|
"loss": 0.1221, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.4074519230769231, |
|
"grad_norm": 3.1763777004125067, |
|
"learning_rate": 3.3256976548879183e-06, |
|
"loss": 0.1149, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.40865384615384615, |
|
"grad_norm": 2.7352894929472535, |
|
"learning_rate": 3.3165618890165306e-06, |
|
"loss": 0.1205, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4098557692307692, |
|
"grad_norm": 3.574807534485726, |
|
"learning_rate": 3.307413900189054e-06, |
|
"loss": 0.1073, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.4110576923076923, |
|
"grad_norm": 3.311593916021147, |
|
"learning_rate": 3.29825382533995e-06, |
|
"loss": 0.1152, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.41225961538461536, |
|
"grad_norm": 2.6214370492688692, |
|
"learning_rate": 3.289081801584601e-06, |
|
"loss": 0.1178, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.41346153846153844, |
|
"grad_norm": 2.28098423314985, |
|
"learning_rate": 3.2798979662172446e-06, |
|
"loss": 0.1175, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.41466346153846156, |
|
"grad_norm": 4.235250427718613, |
|
"learning_rate": 3.2707024567089267e-06, |
|
"loss": 0.1504, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.41586538461538464, |
|
"grad_norm": 1.9122767567805194, |
|
"learning_rate": 3.2614954107054405e-06, |
|
"loss": 0.1294, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.4170673076923077, |
|
"grad_norm": 3.054582085992648, |
|
"learning_rate": 3.2522769660252673e-06, |
|
"loss": 0.1223, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.4182692307692308, |
|
"grad_norm": 1.6351923608702348, |
|
"learning_rate": 3.243047260657511e-06, |
|
"loss": 0.1197, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.41947115384615385, |
|
"grad_norm": 2.7477487145437576, |
|
"learning_rate": 3.233806432759837e-06, |
|
"loss": 0.1293, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.4206730769230769, |
|
"grad_norm": 2.4016286502537505, |
|
"learning_rate": 3.2245546206564015e-06, |
|
"loss": 0.1154, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.421875, |
|
"grad_norm": 1.9800234381047233, |
|
"learning_rate": 3.215291962835779e-06, |
|
"loss": 0.123, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.4230769230769231, |
|
"grad_norm": 3.217074666511334, |
|
"learning_rate": 3.206018597948893e-06, |
|
"loss": 0.1208, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.42427884615384615, |
|
"grad_norm": 3.25172973443265, |
|
"learning_rate": 3.1967346648069397e-06, |
|
"loss": 0.1244, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.4254807692307692, |
|
"grad_norm": 2.2450714988867353, |
|
"learning_rate": 3.1874403023793078e-06, |
|
"loss": 0.1179, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.4266826923076923, |
|
"grad_norm": 3.2488238286410875, |
|
"learning_rate": 3.1781356497914995e-06, |
|
"loss": 0.1245, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.42788461538461536, |
|
"grad_norm": 2.218601857724757, |
|
"learning_rate": 3.168820846323053e-06, |
|
"loss": 0.1251, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.42908653846153844, |
|
"grad_norm": 2.088964444672931, |
|
"learning_rate": 3.1594960314054455e-06, |
|
"loss": 0.1193, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.43028846153846156, |
|
"grad_norm": 4.741704269019802, |
|
"learning_rate": 3.150161344620021e-06, |
|
"loss": 0.1322, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.43149038461538464, |
|
"grad_norm": 3.493342583852878, |
|
"learning_rate": 3.1408169256958888e-06, |
|
"loss": 0.1278, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.4326923076923077, |
|
"grad_norm": 2.351714268349835, |
|
"learning_rate": 3.1314629145078377e-06, |
|
"loss": 0.116, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.4338942307692308, |
|
"grad_norm": 3.8649842638324015, |
|
"learning_rate": 3.1220994510742432e-06, |
|
"loss": 0.1297, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.43509615384615385, |
|
"grad_norm": 2.841739719188719, |
|
"learning_rate": 3.1127266755549673e-06, |
|
"loss": 0.1238, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.4362980769230769, |
|
"grad_norm": 2.0373254493345843, |
|
"learning_rate": 3.1033447282492645e-06, |
|
"loss": 0.1339, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 1.8332876940880098, |
|
"learning_rate": 3.0939537495936784e-06, |
|
"loss": 0.1255, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.4387019230769231, |
|
"grad_norm": 1.9574438212255216, |
|
"learning_rate": 3.0845538801599423e-06, |
|
"loss": 0.1197, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.43990384615384615, |
|
"grad_norm": 1.7871551779346857, |
|
"learning_rate": 3.075145260652873e-06, |
|
"loss": 0.1344, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.4411057692307692, |
|
"grad_norm": 3.6706640863007416, |
|
"learning_rate": 3.0657280319082657e-06, |
|
"loss": 0.116, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.4423076923076923, |
|
"grad_norm": 1.6394420662743008, |
|
"learning_rate": 3.056302334890786e-06, |
|
"loss": 0.123, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.44350961538461536, |
|
"grad_norm": 1.8174034087550737, |
|
"learning_rate": 3.0468683106918608e-06, |
|
"loss": 0.1203, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.44471153846153844, |
|
"grad_norm": 2.028279546605494, |
|
"learning_rate": 3.0374261005275606e-06, |
|
"loss": 0.1153, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.44591346153846156, |
|
"grad_norm": 3.1742172663448893, |
|
"learning_rate": 3.0279758457364943e-06, |
|
"loss": 0.1119, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.44711538461538464, |
|
"grad_norm": 2.1542693819149994, |
|
"learning_rate": 3.018517687777688e-06, |
|
"loss": 0.1152, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.4483173076923077, |
|
"grad_norm": 4.6204720149874605, |
|
"learning_rate": 3.009051768228468e-06, |
|
"loss": 0.1297, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.4495192307692308, |
|
"grad_norm": 2.0445376227310095, |
|
"learning_rate": 2.9995782287823428e-06, |
|
"loss": 0.115, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.45072115384615385, |
|
"grad_norm": 2.320840534894566, |
|
"learning_rate": 2.9900972112468823e-06, |
|
"loss": 0.1257, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4519230769230769, |
|
"grad_norm": 4.0732649101420915, |
|
"learning_rate": 2.9806088575415926e-06, |
|
"loss": 0.1182, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.453125, |
|
"grad_norm": 3.8261178694802327, |
|
"learning_rate": 2.971113309695796e-06, |
|
"loss": 0.1202, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.4543269230769231, |
|
"grad_norm": 2.393271946060094, |
|
"learning_rate": 2.961610709846501e-06, |
|
"loss": 0.1171, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.45552884615384615, |
|
"grad_norm": 1.8371462666695046, |
|
"learning_rate": 2.9521012002362766e-06, |
|
"loss": 0.1142, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.4567307692307692, |
|
"grad_norm": 2.08485758756134, |
|
"learning_rate": 2.942584923211121e-06, |
|
"loss": 0.1154, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.4579326923076923, |
|
"grad_norm": 2.6562279999651257, |
|
"learning_rate": 2.933062021218337e-06, |
|
"loss": 0.1063, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.45913461538461536, |
|
"grad_norm": 2.533470915365061, |
|
"learning_rate": 2.9235326368043885e-06, |
|
"loss": 0.1135, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.46033653846153844, |
|
"grad_norm": 2.4011631762333905, |
|
"learning_rate": 2.9139969126127803e-06, |
|
"loss": 0.1134, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 2.252103330371488, |
|
"learning_rate": 2.9044549913819125e-06, |
|
"loss": 0.1329, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.46274038461538464, |
|
"grad_norm": 2.111392303163354, |
|
"learning_rate": 2.8949070159429473e-06, |
|
"loss": 0.1167, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4639423076923077, |
|
"grad_norm": 2.10465453166218, |
|
"learning_rate": 2.885353129217671e-06, |
|
"loss": 0.1294, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.4651442307692308, |
|
"grad_norm": 1.7606762750864913, |
|
"learning_rate": 2.875793474216358e-06, |
|
"loss": 0.1195, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.46634615384615385, |
|
"grad_norm": 3.4911755377127665, |
|
"learning_rate": 2.8662281940356234e-06, |
|
"loss": 0.1197, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.4675480769230769, |
|
"grad_norm": 2.485129458685194, |
|
"learning_rate": 2.8566574318562855e-06, |
|
"loss": 0.1257, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 3.0980789105745536, |
|
"learning_rate": 2.8470813309412222e-06, |
|
"loss": 0.1159, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4699519230769231, |
|
"grad_norm": 2.06101810490773, |
|
"learning_rate": 2.8375000346332256e-06, |
|
"loss": 0.1114, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.47115384615384615, |
|
"grad_norm": 2.5211271193230567, |
|
"learning_rate": 2.827913686352856e-06, |
|
"loss": 0.1278, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.4723557692307692, |
|
"grad_norm": 2.1529408157219825, |
|
"learning_rate": 2.818322429596297e-06, |
|
"loss": 0.1206, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.4735576923076923, |
|
"grad_norm": 2.366887732661358, |
|
"learning_rate": 2.808726407933205e-06, |
|
"loss": 0.1149, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.47475961538461536, |
|
"grad_norm": 2.16343980990941, |
|
"learning_rate": 2.7991257650045606e-06, |
|
"loss": 0.1208, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.47596153846153844, |
|
"grad_norm": 2.8342000182216345, |
|
"learning_rate": 2.7895206445205226e-06, |
|
"loss": 0.1217, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.47716346153846156, |
|
"grad_norm": 1.852391269800072, |
|
"learning_rate": 2.7799111902582697e-06, |
|
"loss": 0.1155, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.47836538461538464, |
|
"grad_norm": 2.5799284357343484, |
|
"learning_rate": 2.7702975460598545e-06, |
|
"loss": 0.1283, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.4795673076923077, |
|
"grad_norm": 1.881492308096937, |
|
"learning_rate": 2.760679855830047e-06, |
|
"loss": 0.1081, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.4807692307692308, |
|
"grad_norm": 2.5186830859263436, |
|
"learning_rate": 2.7510582635341815e-06, |
|
"loss": 0.1187, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.48197115384615385, |
|
"grad_norm": 2.6559499054158615, |
|
"learning_rate": 2.7414329131960004e-06, |
|
"loss": 0.1233, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.4831730769230769, |
|
"grad_norm": 4.62630178829242, |
|
"learning_rate": 2.731803948895503e-06, |
|
"loss": 0.124, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.484375, |
|
"grad_norm": 1.914060815314394, |
|
"learning_rate": 2.722171514766781e-06, |
|
"loss": 0.1123, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.4855769230769231, |
|
"grad_norm": 2.4270202069774145, |
|
"learning_rate": 2.7125357549958687e-06, |
|
"loss": 0.1287, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.48677884615384615, |
|
"grad_norm": 2.1602891567758746, |
|
"learning_rate": 2.7028968138185783e-06, |
|
"loss": 0.1143, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4879807692307692, |
|
"grad_norm": 2.68436864433482, |
|
"learning_rate": 2.6932548355183476e-06, |
|
"loss": 0.1166, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.4891826923076923, |
|
"grad_norm": 2.4944384936946196, |
|
"learning_rate": 2.6836099644240727e-06, |
|
"loss": 0.1133, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.49038461538461536, |
|
"grad_norm": 1.8613202081753457, |
|
"learning_rate": 2.673962344907953e-06, |
|
"loss": 0.109, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.49158653846153844, |
|
"grad_norm": 2.219693506080579, |
|
"learning_rate": 2.6643121213833306e-06, |
|
"loss": 0.1145, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.49278846153846156, |
|
"grad_norm": 2.8619481470099117, |
|
"learning_rate": 2.6546594383025214e-06, |
|
"loss": 0.1115, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.49399038461538464, |
|
"grad_norm": 2.666948981163753, |
|
"learning_rate": 2.6450044401546632e-06, |
|
"loss": 0.1305, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.4951923076923077, |
|
"grad_norm": 2.09947237601635, |
|
"learning_rate": 2.6353472714635443e-06, |
|
"loss": 0.1099, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.4963942307692308, |
|
"grad_norm": 1.9473099754220278, |
|
"learning_rate": 2.625688076785445e-06, |
|
"loss": 0.1208, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.49759615384615385, |
|
"grad_norm": 2.0864251934157774, |
|
"learning_rate": 2.6160270007069703e-06, |
|
"loss": 0.1257, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.4987980769230769, |
|
"grad_norm": 1.9893158881100514, |
|
"learning_rate": 2.606364187842891e-06, |
|
"loss": 0.1264, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.3582369528291083, |
|
"learning_rate": 2.5966997828339724e-06, |
|
"loss": 0.1147, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.5012019230769231, |
|
"grad_norm": 2.146899490039593, |
|
"learning_rate": 2.5870339303448127e-06, |
|
"loss": 0.1152, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.5024038461538461, |
|
"grad_norm": 1.990060058415754, |
|
"learning_rate": 2.5773667750616783e-06, |
|
"loss": 0.1041, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.5036057692307693, |
|
"grad_norm": 2.178599936980344, |
|
"learning_rate": 2.5676984616903367e-06, |
|
"loss": 0.1286, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.5048076923076923, |
|
"grad_norm": 2.562250086026024, |
|
"learning_rate": 2.5580291349538895e-06, |
|
"loss": 0.1146, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5060096153846154, |
|
"grad_norm": 2.4548795707580418, |
|
"learning_rate": 2.5483589395906084e-06, |
|
"loss": 0.1232, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.5072115384615384, |
|
"grad_norm": 2.0576956102764536, |
|
"learning_rate": 2.5386880203517665e-06, |
|
"loss": 0.1091, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.5084134615384616, |
|
"grad_norm": 1.7798937747570411, |
|
"learning_rate": 2.5290165219994734e-06, |
|
"loss": 0.122, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.5096153846153846, |
|
"grad_norm": 3.2665103557785473, |
|
"learning_rate": 2.5193445893045054e-06, |
|
"loss": 0.119, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.5108173076923077, |
|
"grad_norm": 2.3751458473034175, |
|
"learning_rate": 2.5096723670441437e-06, |
|
"loss": 0.1161, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5120192307692307, |
|
"grad_norm": 1.7591316722682409, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1151, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.5132211538461539, |
|
"grad_norm": 2.2115382855282464, |
|
"learning_rate": 2.4903276329558567e-06, |
|
"loss": 0.1313, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.5144230769230769, |
|
"grad_norm": 3.714925572378303, |
|
"learning_rate": 2.480655410695495e-06, |
|
"loss": 0.118, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.515625, |
|
"grad_norm": 2.292092125779591, |
|
"learning_rate": 2.4709834780005283e-06, |
|
"loss": 0.1105, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.5168269230769231, |
|
"grad_norm": 2.8062030763080066, |
|
"learning_rate": 2.4613119796482343e-06, |
|
"loss": 0.1279, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5180288461538461, |
|
"grad_norm": 3.0016696690528684, |
|
"learning_rate": 2.4516410604093924e-06, |
|
"loss": 0.124, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.5192307692307693, |
|
"grad_norm": 2.6910032305249776, |
|
"learning_rate": 2.441970865046111e-06, |
|
"loss": 0.1164, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.5204326923076923, |
|
"grad_norm": 2.790603434708355, |
|
"learning_rate": 2.4323015383096645e-06, |
|
"loss": 0.1284, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.5216346153846154, |
|
"grad_norm": 1.88418937937736, |
|
"learning_rate": 2.422633224938323e-06, |
|
"loss": 0.1197, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.5228365384615384, |
|
"grad_norm": 2.268135867297592, |
|
"learning_rate": 2.412966069655188e-06, |
|
"loss": 0.1087, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5240384615384616, |
|
"grad_norm": 1.7727390247554256, |
|
"learning_rate": 2.403300217166028e-06, |
|
"loss": 0.1047, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.5252403846153846, |
|
"grad_norm": 2.6452934155833, |
|
"learning_rate": 2.39363581215711e-06, |
|
"loss": 0.1209, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.5264423076923077, |
|
"grad_norm": 2.231648348284633, |
|
"learning_rate": 2.38397299929303e-06, |
|
"loss": 0.1225, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.5276442307692307, |
|
"grad_norm": 3.1589321862401323, |
|
"learning_rate": 2.374311923214556e-06, |
|
"loss": 0.1278, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.5288461538461539, |
|
"grad_norm": 2.538600702095854, |
|
"learning_rate": 2.3646527285364565e-06, |
|
"loss": 0.1133, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5300480769230769, |
|
"grad_norm": 1.8010218639998627, |
|
"learning_rate": 2.3549955598453384e-06, |
|
"loss": 0.1102, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"grad_norm": 1.9859781619064247, |
|
"learning_rate": 2.3453405616974794e-06, |
|
"loss": 0.1223, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.5324519230769231, |
|
"grad_norm": 2.48433192428649, |
|
"learning_rate": 2.3356878786166703e-06, |
|
"loss": 0.1276, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.5336538461538461, |
|
"grad_norm": 1.9977524593562115, |
|
"learning_rate": 2.3260376550920472e-06, |
|
"loss": 0.1219, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.5348557692307693, |
|
"grad_norm": 1.9708632642889377, |
|
"learning_rate": 2.3163900355759277e-06, |
|
"loss": 0.117, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5360576923076923, |
|
"grad_norm": 2.3393380650509146, |
|
"learning_rate": 2.3067451644816537e-06, |
|
"loss": 0.1328, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.5372596153846154, |
|
"grad_norm": 2.016237150348988, |
|
"learning_rate": 2.2971031861814225e-06, |
|
"loss": 0.115, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.5384615384615384, |
|
"grad_norm": 2.254008950804627, |
|
"learning_rate": 2.287464245004132e-06, |
|
"loss": 0.1184, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.5396634615384616, |
|
"grad_norm": 3.2043282624716403, |
|
"learning_rate": 2.27782848523322e-06, |
|
"loss": 0.1193, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.5408653846153846, |
|
"grad_norm": 1.4454861904743852, |
|
"learning_rate": 2.268196051104499e-06, |
|
"loss": 0.1104, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5420673076923077, |
|
"grad_norm": 2.397925567908216, |
|
"learning_rate": 2.2585670868040004e-06, |
|
"loss": 0.1173, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.5432692307692307, |
|
"grad_norm": 2.6349415212538503, |
|
"learning_rate": 2.2489417364658194e-06, |
|
"loss": 0.1175, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.5444711538461539, |
|
"grad_norm": 1.912841995108057, |
|
"learning_rate": 2.2393201441699535e-06, |
|
"loss": 0.1124, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.5456730769230769, |
|
"grad_norm": 2.2214483754728396, |
|
"learning_rate": 2.2297024539401463e-06, |
|
"loss": 0.1169, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.546875, |
|
"grad_norm": 2.6645784394778995, |
|
"learning_rate": 2.2200888097417308e-06, |
|
"loss": 0.1124, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.5480769230769231, |
|
"grad_norm": 2.3991327890112757, |
|
"learning_rate": 2.2104793554794783e-06, |
|
"loss": 0.1082, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.5492788461538461, |
|
"grad_norm": 2.4642009420576487, |
|
"learning_rate": 2.2008742349954394e-06, |
|
"loss": 0.119, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.5504807692307693, |
|
"grad_norm": 2.5918285453531116, |
|
"learning_rate": 2.1912735920667966e-06, |
|
"loss": 0.1055, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.5516826923076923, |
|
"grad_norm": 2.0680446180956373, |
|
"learning_rate": 2.181677570403704e-06, |
|
"loss": 0.1109, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.5528846153846154, |
|
"grad_norm": 2.193301046368466, |
|
"learning_rate": 2.1720863136471447e-06, |
|
"loss": 0.1277, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5540865384615384, |
|
"grad_norm": 2.5163737723965736, |
|
"learning_rate": 2.162499965366775e-06, |
|
"loss": 0.1219, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.5552884615384616, |
|
"grad_norm": 3.521848753217605, |
|
"learning_rate": 2.1529186690587786e-06, |
|
"loss": 0.114, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.5564903846153846, |
|
"grad_norm": 3.069616221629034, |
|
"learning_rate": 2.1433425681437154e-06, |
|
"loss": 0.1071, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.5576923076923077, |
|
"grad_norm": 3.53398612074779, |
|
"learning_rate": 2.1337718059643774e-06, |
|
"loss": 0.1236, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.5588942307692307, |
|
"grad_norm": 5.058223699592573, |
|
"learning_rate": 2.124206525783643e-06, |
|
"loss": 0.1109, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5600961538461539, |
|
"grad_norm": 2.5478159897083352, |
|
"learning_rate": 2.114646870782329e-06, |
|
"loss": 0.1167, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.5612980769230769, |
|
"grad_norm": 2.354071051813213, |
|
"learning_rate": 2.1050929840570544e-06, |
|
"loss": 0.1011, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 4.853864942677267, |
|
"learning_rate": 2.0955450086180883e-06, |
|
"loss": 0.116, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.5637019230769231, |
|
"grad_norm": 2.722700474105122, |
|
"learning_rate": 2.08600308738722e-06, |
|
"loss": 0.1108, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.5649038461538461, |
|
"grad_norm": 4.630914205750646, |
|
"learning_rate": 2.0764673631956115e-06, |
|
"loss": 0.1172, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5661057692307693, |
|
"grad_norm": 2.0844097872671616, |
|
"learning_rate": 2.0669379787816644e-06, |
|
"loss": 0.1086, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.5673076923076923, |
|
"grad_norm": 2.2079993034525147, |
|
"learning_rate": 2.0574150767888795e-06, |
|
"loss": 0.1199, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.5685096153846154, |
|
"grad_norm": 3.624361624117408, |
|
"learning_rate": 2.0478987997637246e-06, |
|
"loss": 0.1028, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.5697115384615384, |
|
"grad_norm": 2.304855132990531, |
|
"learning_rate": 2.0383892901534995e-06, |
|
"loss": 0.1143, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.5709134615384616, |
|
"grad_norm": 2.303913178369359, |
|
"learning_rate": 2.0288866903042055e-06, |
|
"loss": 0.1149, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5721153846153846, |
|
"grad_norm": 2.5242944062982944, |
|
"learning_rate": 2.0193911424584082e-06, |
|
"loss": 0.1271, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.5733173076923077, |
|
"grad_norm": 2.2840209986395643, |
|
"learning_rate": 2.0099027887531186e-06, |
|
"loss": 0.1025, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.5745192307692307, |
|
"grad_norm": 2.1475236126016757, |
|
"learning_rate": 2.0004217712176576e-06, |
|
"loss": 0.1052, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.5757211538461539, |
|
"grad_norm": 2.2062922770065625, |
|
"learning_rate": 1.9909482317715335e-06, |
|
"loss": 0.1261, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 2.573858967297316, |
|
"learning_rate": 1.9814823122223125e-06, |
|
"loss": 0.1206, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.578125, |
|
"grad_norm": 3.5542312782650267, |
|
"learning_rate": 1.972024154263506e-06, |
|
"loss": 0.118, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.5793269230769231, |
|
"grad_norm": 3.043328928606157, |
|
"learning_rate": 1.96257389947244e-06, |
|
"loss": 0.1148, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.5805288461538461, |
|
"grad_norm": 2.186779926589517, |
|
"learning_rate": 1.9531316893081396e-06, |
|
"loss": 0.1028, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.5817307692307693, |
|
"grad_norm": 1.9507127168704683, |
|
"learning_rate": 1.9436976651092143e-06, |
|
"loss": 0.1069, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.5829326923076923, |
|
"grad_norm": 2.2374696361826403, |
|
"learning_rate": 1.934271968091735e-06, |
|
"loss": 0.1172, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5841346153846154, |
|
"grad_norm": 2.55684351637379, |
|
"learning_rate": 1.924854739347128e-06, |
|
"loss": 0.1084, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.5853365384615384, |
|
"grad_norm": 2.325113870079778, |
|
"learning_rate": 1.9154461198400585e-06, |
|
"loss": 0.1235, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.5865384615384616, |
|
"grad_norm": 2.6657491779145976, |
|
"learning_rate": 1.9060462504063229e-06, |
|
"loss": 0.1071, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.5877403846153846, |
|
"grad_norm": 1.956462181600069, |
|
"learning_rate": 1.8966552717507364e-06, |
|
"loss": 0.119, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.5889423076923077, |
|
"grad_norm": 2.705164990543757, |
|
"learning_rate": 1.8872733244450331e-06, |
|
"loss": 0.1023, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5901442307692307, |
|
"grad_norm": 1.9312443310397687, |
|
"learning_rate": 1.8779005489257572e-06, |
|
"loss": 0.1053, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.5913461538461539, |
|
"grad_norm": 2.855051765002529, |
|
"learning_rate": 1.8685370854921631e-06, |
|
"loss": 0.1072, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.5925480769230769, |
|
"grad_norm": 2.3926479059613373, |
|
"learning_rate": 1.8591830743041123e-06, |
|
"loss": 0.1226, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"grad_norm": 3.5632611766021465, |
|
"learning_rate": 1.8498386553799802e-06, |
|
"loss": 0.1003, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.5949519230769231, |
|
"grad_norm": 3.0673131415514803, |
|
"learning_rate": 1.8405039685945547e-06, |
|
"loss": 0.1103, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5961538461538461, |
|
"grad_norm": 3.875636229689881, |
|
"learning_rate": 1.8311791536769485e-06, |
|
"loss": 0.1301, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.5973557692307693, |
|
"grad_norm": 3.2290497985605775, |
|
"learning_rate": 1.821864350208501e-06, |
|
"loss": 0.1149, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.5985576923076923, |
|
"grad_norm": 1.9347455813936323, |
|
"learning_rate": 1.8125596976206933e-06, |
|
"loss": 0.1087, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.5997596153846154, |
|
"grad_norm": 2.04110053593177, |
|
"learning_rate": 1.8032653351930607e-06, |
|
"loss": 0.112, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.6009615384615384, |
|
"grad_norm": 2.4285768876217637, |
|
"learning_rate": 1.793981402051107e-06, |
|
"loss": 0.1184, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6021634615384616, |
|
"grad_norm": 2.056516574333895, |
|
"learning_rate": 1.7847080371642222e-06, |
|
"loss": 0.1089, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.6033653846153846, |
|
"grad_norm": 2.872014799730928, |
|
"learning_rate": 1.7754453793435995e-06, |
|
"loss": 0.1203, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.6045673076923077, |
|
"grad_norm": 3.1357344499654225, |
|
"learning_rate": 1.7661935672401635e-06, |
|
"loss": 0.1057, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.6057692307692307, |
|
"grad_norm": 2.2990506646197684, |
|
"learning_rate": 1.7569527393424894e-06, |
|
"loss": 0.1102, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.6069711538461539, |
|
"grad_norm": 1.9080185886472223, |
|
"learning_rate": 1.7477230339747342e-06, |
|
"loss": 0.1128, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.6081730769230769, |
|
"grad_norm": 2.4866351605585217, |
|
"learning_rate": 1.7385045892945603e-06, |
|
"loss": 0.1082, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.609375, |
|
"grad_norm": 2.408386002877475, |
|
"learning_rate": 1.7292975432910738e-06, |
|
"loss": 0.1065, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.6105769230769231, |
|
"grad_norm": 2.3425864575127724, |
|
"learning_rate": 1.7201020337827556e-06, |
|
"loss": 0.0992, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.6117788461538461, |
|
"grad_norm": 3.3764842505199897, |
|
"learning_rate": 1.7109181984154e-06, |
|
"loss": 0.0994, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.6129807692307693, |
|
"grad_norm": 3.0511908446927105, |
|
"learning_rate": 1.7017461746600506e-06, |
|
"loss": 0.1116, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.6141826923076923, |
|
"grad_norm": 2.0835754940115048, |
|
"learning_rate": 1.6925860998109472e-06, |
|
"loss": 0.1027, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 2.1369310697938873, |
|
"learning_rate": 1.6834381109834696e-06, |
|
"loss": 0.1082, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.6165865384615384, |
|
"grad_norm": 3.633547678025306, |
|
"learning_rate": 1.6743023451120831e-06, |
|
"loss": 0.1179, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.6177884615384616, |
|
"grad_norm": 3.222183839199512, |
|
"learning_rate": 1.6651789389482885e-06, |
|
"loss": 0.1047, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.6189903846153846, |
|
"grad_norm": 3.373496600017691, |
|
"learning_rate": 1.6560680290585798e-06, |
|
"loss": 0.1174, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.6201923076923077, |
|
"grad_norm": 3.355771088882065, |
|
"learning_rate": 1.646969751822394e-06, |
|
"loss": 0.1225, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.6213942307692307, |
|
"grad_norm": 2.132501441209862, |
|
"learning_rate": 1.6378842434300746e-06, |
|
"loss": 0.1085, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.6225961538461539, |
|
"grad_norm": 1.8201904843022139, |
|
"learning_rate": 1.6288116398808278e-06, |
|
"loss": 0.1072, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.6237980769230769, |
|
"grad_norm": 1.9943546756148034, |
|
"learning_rate": 1.619752076980693e-06, |
|
"loss": 0.1175, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 1.9417071481978827, |
|
"learning_rate": 1.6107056903405038e-06, |
|
"loss": 0.1031, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.6262019230769231, |
|
"grad_norm": 2.0812507755491776, |
|
"learning_rate": 1.6016726153738638e-06, |
|
"loss": 0.1181, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.6274038461538461, |
|
"grad_norm": 1.9437266222472136, |
|
"learning_rate": 1.5926529872951144e-06, |
|
"loss": 0.1104, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.6286057692307693, |
|
"grad_norm": 2.0078937220346265, |
|
"learning_rate": 1.583646941117313e-06, |
|
"loss": 0.1044, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.6298076923076923, |
|
"grad_norm": 2.2331084033833366, |
|
"learning_rate": 1.574654611650214e-06, |
|
"loss": 0.1147, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.6310096153846154, |
|
"grad_norm": 2.133371687932722, |
|
"learning_rate": 1.5656761334982487e-06, |
|
"loss": 0.1159, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6322115384615384, |
|
"grad_norm": 2.068123773517536, |
|
"learning_rate": 1.5567116410585101e-06, |
|
"loss": 0.1038, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.6334134615384616, |
|
"grad_norm": 2.5576918982500683, |
|
"learning_rate": 1.5477612685187405e-06, |
|
"loss": 0.1169, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.6346153846153846, |
|
"grad_norm": 2.694751080220668, |
|
"learning_rate": 1.5388251498553263e-06, |
|
"loss": 0.1081, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.6358173076923077, |
|
"grad_norm": 2.135244446442495, |
|
"learning_rate": 1.52990341883129e-06, |
|
"loss": 0.1075, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.6370192307692307, |
|
"grad_norm": 2.1823074476166764, |
|
"learning_rate": 1.5209962089942885e-06, |
|
"loss": 0.1085, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6382211538461539, |
|
"grad_norm": 1.9277746702424785, |
|
"learning_rate": 1.5121036536746119e-06, |
|
"loss": 0.1049, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.6394230769230769, |
|
"grad_norm": 2.365543759553611, |
|
"learning_rate": 1.5032258859831916e-06, |
|
"loss": 0.1093, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.640625, |
|
"grad_norm": 2.4257341316404406, |
|
"learning_rate": 1.4943630388096055e-06, |
|
"loss": 0.1175, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.6418269230769231, |
|
"grad_norm": 2.653293916979889, |
|
"learning_rate": 1.4855152448200901e-06, |
|
"loss": 0.1153, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.6430288461538461, |
|
"grad_norm": 2.419944610975381, |
|
"learning_rate": 1.4766826364555514e-06, |
|
"loss": 0.1159, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.6442307692307693, |
|
"grad_norm": 2.0103810925549626, |
|
"learning_rate": 1.467865345929586e-06, |
|
"loss": 0.1143, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.6454326923076923, |
|
"grad_norm": 2.01089727654853, |
|
"learning_rate": 1.4590635052265008e-06, |
|
"loss": 0.1106, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.6466346153846154, |
|
"grad_norm": 1.748446439918439, |
|
"learning_rate": 1.4502772460993387e-06, |
|
"loss": 0.1018, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.6478365384615384, |
|
"grad_norm": 2.484572897708403, |
|
"learning_rate": 1.4415067000679029e-06, |
|
"loss": 0.1104, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.6490384615384616, |
|
"grad_norm": 2.4037649077365657, |
|
"learning_rate": 1.4327519984167887e-06, |
|
"loss": 0.1189, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6502403846153846, |
|
"grad_norm": 1.8720994441559204, |
|
"learning_rate": 1.4240132721934256e-06, |
|
"loss": 0.118, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.6514423076923077, |
|
"grad_norm": 1.9961620517391614, |
|
"learning_rate": 1.415290652206105e-06, |
|
"loss": 0.1062, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.6526442307692307, |
|
"grad_norm": 3.3559687716616, |
|
"learning_rate": 1.4065842690220294e-06, |
|
"loss": 0.1192, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.6538461538461539, |
|
"grad_norm": 2.1228084765105373, |
|
"learning_rate": 1.3978942529653549e-06, |
|
"loss": 0.0997, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.6550480769230769, |
|
"grad_norm": 2.609409554692004, |
|
"learning_rate": 1.3892207341152416e-06, |
|
"loss": 0.1146, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"grad_norm": 3.084566569938987, |
|
"learning_rate": 1.3805638423039056e-06, |
|
"loss": 0.1238, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.6574519230769231, |
|
"grad_norm": 2.755372215903661, |
|
"learning_rate": 1.371923707114679e-06, |
|
"loss": 0.1091, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.6586538461538461, |
|
"grad_norm": 2.119982444557482, |
|
"learning_rate": 1.3633004578800613e-06, |
|
"loss": 0.099, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.6598557692307693, |
|
"grad_norm": 2.701943705630255, |
|
"learning_rate": 1.354694223679796e-06, |
|
"loss": 0.1235, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.6610576923076923, |
|
"grad_norm": 2.383471150976908, |
|
"learning_rate": 1.3461051333389275e-06, |
|
"loss": 0.1031, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6622596153846154, |
|
"grad_norm": 3.00768304842994, |
|
"learning_rate": 1.3375333154258788e-06, |
|
"loss": 0.1087, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.6634615384615384, |
|
"grad_norm": 2.088505043527597, |
|
"learning_rate": 1.328978898250525e-06, |
|
"loss": 0.1166, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.6646634615384616, |
|
"grad_norm": 2.434276624558114, |
|
"learning_rate": 1.3204420098622727e-06, |
|
"loss": 0.11, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.6658653846153846, |
|
"grad_norm": 1.8412804984656046, |
|
"learning_rate": 1.3119227780481442e-06, |
|
"loss": 0.113, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.6670673076923077, |
|
"grad_norm": 2.0956844206733405, |
|
"learning_rate": 1.3034213303308627e-06, |
|
"loss": 0.1144, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.6682692307692307, |
|
"grad_norm": 2.1476124760530566, |
|
"learning_rate": 1.294937793966946e-06, |
|
"loss": 0.1095, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.6694711538461539, |
|
"grad_norm": 2.292664553276864, |
|
"learning_rate": 1.286472295944799e-06, |
|
"loss": 0.1146, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.6706730769230769, |
|
"grad_norm": 2.1662467131117404, |
|
"learning_rate": 1.2780249629828161e-06, |
|
"loss": 0.1097, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.671875, |
|
"grad_norm": 2.906015346971846, |
|
"learning_rate": 1.2695959215274817e-06, |
|
"loss": 0.1148, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.6730769230769231, |
|
"grad_norm": 2.1982439434562737, |
|
"learning_rate": 1.261185297751477e-06, |
|
"loss": 0.1053, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6742788461538461, |
|
"grad_norm": 2.018201703916458, |
|
"learning_rate": 1.2527932175517934e-06, |
|
"loss": 0.115, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.6754807692307693, |
|
"grad_norm": 2.6111890149300976, |
|
"learning_rate": 1.2444198065478475e-06, |
|
"loss": 0.1224, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.6766826923076923, |
|
"grad_norm": 2.5284325319117267, |
|
"learning_rate": 1.2360651900795995e-06, |
|
"loss": 0.1207, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.6778846153846154, |
|
"grad_norm": 2.2545340347392955, |
|
"learning_rate": 1.2277294932056783e-06, |
|
"loss": 0.112, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.6790865384615384, |
|
"grad_norm": 3.362156324890133, |
|
"learning_rate": 1.2194128407015094e-06, |
|
"loss": 0.1164, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6802884615384616, |
|
"grad_norm": 1.632189263569225, |
|
"learning_rate": 1.2111153570574454e-06, |
|
"loss": 0.1012, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.6814903846153846, |
|
"grad_norm": 2.092206166748186, |
|
"learning_rate": 1.202837166476907e-06, |
|
"loss": 0.1085, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.6826923076923077, |
|
"grad_norm": 2.7031603463833704, |
|
"learning_rate": 1.1945783928745187e-06, |
|
"loss": 0.1109, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.6838942307692307, |
|
"grad_norm": 3.4147286461299355, |
|
"learning_rate": 1.1863391598742535e-06, |
|
"loss": 0.1133, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.6850961538461539, |
|
"grad_norm": 2.1129785563716994, |
|
"learning_rate": 1.1781195908075903e-06, |
|
"loss": 0.1097, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6862980769230769, |
|
"grad_norm": 3.2026130054118593, |
|
"learning_rate": 1.169919808711659e-06, |
|
"loss": 0.1184, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 2.249630204645609, |
|
"learning_rate": 1.1617399363274024e-06, |
|
"loss": 0.1106, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.6887019230769231, |
|
"grad_norm": 3.2963891692649514, |
|
"learning_rate": 1.1535800960977398e-06, |
|
"loss": 0.1196, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.6899038461538461, |
|
"grad_norm": 3.186358499780556, |
|
"learning_rate": 1.1454404101657319e-06, |
|
"loss": 0.1121, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.6911057692307693, |
|
"grad_norm": 2.47209843153002, |
|
"learning_rate": 1.1373210003727536e-06, |
|
"loss": 0.1167, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6923076923076923, |
|
"grad_norm": 2.4518332512722876, |
|
"learning_rate": 1.1292219882566726e-06, |
|
"loss": 0.1148, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.6935096153846154, |
|
"grad_norm": 3.262824991958051, |
|
"learning_rate": 1.121143495050026e-06, |
|
"loss": 0.106, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.6947115384615384, |
|
"grad_norm": 2.338537712422274, |
|
"learning_rate": 1.1130856416782046e-06, |
|
"loss": 0.106, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.6959134615384616, |
|
"grad_norm": 2.2204770447922297, |
|
"learning_rate": 1.1050485487576506e-06, |
|
"loss": 0.1101, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.6971153846153846, |
|
"grad_norm": 2.0597064409649892, |
|
"learning_rate": 1.0970323365940443e-06, |
|
"loss": 0.0959, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6983173076923077, |
|
"grad_norm": 2.222014493052337, |
|
"learning_rate": 1.089037125180506e-06, |
|
"loss": 0.1034, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.6995192307692307, |
|
"grad_norm": 3.1568264036888265, |
|
"learning_rate": 1.0810630341958004e-06, |
|
"loss": 0.1224, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.7007211538461539, |
|
"grad_norm": 2.1388673626652817, |
|
"learning_rate": 1.0731101830025442e-06, |
|
"loss": 0.1024, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.7019230769230769, |
|
"grad_norm": 2.844127901014447, |
|
"learning_rate": 1.0651786906454192e-06, |
|
"loss": 0.1236, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.703125, |
|
"grad_norm": 1.9936288576811623, |
|
"learning_rate": 1.057268675849395e-06, |
|
"loss": 0.1006, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.7043269230769231, |
|
"grad_norm": 1.8480926642214928, |
|
"learning_rate": 1.0493802570179411e-06, |
|
"loss": 0.1001, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.7055288461538461, |
|
"grad_norm": 2.261808036207062, |
|
"learning_rate": 1.041513552231265e-06, |
|
"loss": 0.1038, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.7067307692307693, |
|
"grad_norm": 2.2099301841197545, |
|
"learning_rate": 1.0336686792445424e-06, |
|
"loss": 0.1101, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.7079326923076923, |
|
"grad_norm": 2.180203910907892, |
|
"learning_rate": 1.0258457554861502e-06, |
|
"loss": 0.1057, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.7091346153846154, |
|
"grad_norm": 2.797064097348832, |
|
"learning_rate": 1.0180448980559125e-06, |
|
"loss": 0.0926, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.7103365384615384, |
|
"grad_norm": 3.1147260554752147, |
|
"learning_rate": 1.0102662237233465e-06, |
|
"loss": 0.1191, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.7115384615384616, |
|
"grad_norm": 2.488698925587082, |
|
"learning_rate": 1.0025098489259161e-06, |
|
"loss": 0.1014, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.7127403846153846, |
|
"grad_norm": 2.2420640526045927, |
|
"learning_rate": 9.947758897672855e-07, |
|
"loss": 0.1125, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.7139423076923077, |
|
"grad_norm": 2.634277342582424, |
|
"learning_rate": 9.870644620155878e-07, |
|
"loss": 0.1104, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.7151442307692307, |
|
"grad_norm": 2.278093380222903, |
|
"learning_rate": 9.793756811016824e-07, |
|
"loss": 0.1045, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7163461538461539, |
|
"grad_norm": 2.1408407961096088, |
|
"learning_rate": 9.717096621174355e-07, |
|
"loss": 0.1154, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.7175480769230769, |
|
"grad_norm": 2.2023983168340413, |
|
"learning_rate": 9.640665198139957e-07, |
|
"loss": 0.1147, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.71875, |
|
"grad_norm": 1.9253814839904362, |
|
"learning_rate": 9.564463686000728e-07, |
|
"loss": 0.1157, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.7199519230769231, |
|
"grad_norm": 2.036041516452903, |
|
"learning_rate": 9.488493225402282e-07, |
|
"loss": 0.0948, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.7211538461538461, |
|
"grad_norm": 3.306618789071036, |
|
"learning_rate": 9.412754953531664e-07, |
|
"loss": 0.101, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7223557692307693, |
|
"grad_norm": 2.818632480308661, |
|
"learning_rate": 9.337250004100337e-07, |
|
"loss": 0.1232, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.7235576923076923, |
|
"grad_norm": 2.3587067360069334, |
|
"learning_rate": 9.261979507327204e-07, |
|
"loss": 0.1062, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.7247596153846154, |
|
"grad_norm": 2.3843210548687908, |
|
"learning_rate": 9.186944589921687e-07, |
|
"loss": 0.1161, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.7259615384615384, |
|
"grad_norm": 2.069552811499533, |
|
"learning_rate": 9.112146375066872e-07, |
|
"loss": 0.1037, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.7271634615384616, |
|
"grad_norm": 2.5490211308951487, |
|
"learning_rate": 9.037585982402678e-07, |
|
"loss": 0.1182, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.7283653846153846, |
|
"grad_norm": 2.2537446863547177, |
|
"learning_rate": 8.96326452800915e-07, |
|
"loss": 0.1024, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.7295673076923077, |
|
"grad_norm": 2.1542852130856085, |
|
"learning_rate": 8.889183124389645e-07, |
|
"loss": 0.1102, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.7307692307692307, |
|
"grad_norm": 1.8957554942236439, |
|
"learning_rate": 8.815342880454312e-07, |
|
"loss": 0.107, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.7319711538461539, |
|
"grad_norm": 2.4674322862732314, |
|
"learning_rate": 8.741744901503387e-07, |
|
"loss": 0.114, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.7331730769230769, |
|
"grad_norm": 2.039475353958351, |
|
"learning_rate": 8.66839028921071e-07, |
|
"loss": 0.1106, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.734375, |
|
"grad_norm": 2.3489571512912364, |
|
"learning_rate": 8.595280141607198e-07, |
|
"loss": 0.1073, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.7355769230769231, |
|
"grad_norm": 3.073761818193723, |
|
"learning_rate": 8.522415553064433e-07, |
|
"loss": 0.1069, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.7367788461538461, |
|
"grad_norm": 2.4347506007521433, |
|
"learning_rate": 8.44979761427826e-07, |
|
"loss": 0.1064, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.7379807692307693, |
|
"grad_norm": 2.3883683060647134, |
|
"learning_rate": 8.377427412252495e-07, |
|
"loss": 0.1063, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.7391826923076923, |
|
"grad_norm": 2.7706472616211077, |
|
"learning_rate": 8.305306030282618e-07, |
|
"loss": 0.1126, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.7403846153846154, |
|
"grad_norm": 2.5634994337657413, |
|
"learning_rate": 8.233434547939539e-07, |
|
"loss": 0.112, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.7415865384615384, |
|
"grad_norm": 2.397107165704345, |
|
"learning_rate": 8.161814041053526e-07, |
|
"loss": 0.1106, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.7427884615384616, |
|
"grad_norm": 2.2450002047020807, |
|
"learning_rate": 8.090445581698006e-07, |
|
"loss": 0.108, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.7439903846153846, |
|
"grad_norm": 2.014500102466641, |
|
"learning_rate": 8.019330238173568e-07, |
|
"loss": 0.1077, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.7451923076923077, |
|
"grad_norm": 2.168024712104591, |
|
"learning_rate": 7.948469074991955e-07, |
|
"loss": 0.1045, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.7463942307692307, |
|
"grad_norm": 3.0079126945368904, |
|
"learning_rate": 7.877863152860133e-07, |
|
"loss": 0.1092, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.7475961538461539, |
|
"grad_norm": 2.4795980921136294, |
|
"learning_rate": 7.807513528664415e-07, |
|
"loss": 0.1107, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.7487980769230769, |
|
"grad_norm": 2.247412162226902, |
|
"learning_rate": 7.737421255454661e-07, |
|
"loss": 0.1198, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.3971310721116983, |
|
"learning_rate": 7.667587382428455e-07, |
|
"loss": 0.1161, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.7512019230769231, |
|
"grad_norm": 1.8099165767446914, |
|
"learning_rate": 7.598012954915457e-07, |
|
"loss": 0.0973, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7524038461538461, |
|
"grad_norm": 2.3170574343599286, |
|
"learning_rate": 7.528699014361757e-07, |
|
"loss": 0.1093, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 0.7536057692307693, |
|
"grad_norm": 1.7417689564815537, |
|
"learning_rate": 7.459646598314246e-07, |
|
"loss": 0.1021, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 0.7548076923076923, |
|
"grad_norm": 2.012989717897973, |
|
"learning_rate": 7.390856740405092e-07, |
|
"loss": 0.1022, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 0.7560096153846154, |
|
"grad_norm": 2.7755030823894082, |
|
"learning_rate": 7.322330470336314e-07, |
|
"loss": 0.108, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 0.7572115384615384, |
|
"grad_norm": 2.7553825309268305, |
|
"learning_rate": 7.254068813864315e-07, |
|
"loss": 0.1164, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.7584134615384616, |
|
"grad_norm": 3.2871528914249164, |
|
"learning_rate": 7.186072792784549e-07, |
|
"loss": 0.1018, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 0.7596153846153846, |
|
"grad_norm": 2.3825605880656826, |
|
"learning_rate": 7.118343424916249e-07, |
|
"loss": 0.1006, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 0.7608173076923077, |
|
"grad_norm": 2.8679237655683627, |
|
"learning_rate": 7.050881724087125e-07, |
|
"loss": 0.1043, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 0.7620192307692307, |
|
"grad_norm": 2.6274099068260557, |
|
"learning_rate": 6.983688700118257e-07, |
|
"loss": 0.1084, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 0.7632211538461539, |
|
"grad_norm": 2.432380483126836, |
|
"learning_rate": 6.916765358808969e-07, |
|
"loss": 0.1098, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.7644230769230769, |
|
"grad_norm": 2.2356194365705218, |
|
"learning_rate": 6.850112701921735e-07, |
|
"loss": 0.0974, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.765625, |
|
"grad_norm": 2.322442564380917, |
|
"learning_rate": 6.783731727167195e-07, |
|
"loss": 0.1149, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 0.7668269230769231, |
|
"grad_norm": 2.7155413629798777, |
|
"learning_rate": 6.717623428189262e-07, |
|
"loss": 0.1107, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 0.7680288461538461, |
|
"grad_norm": 2.5926151388895184, |
|
"learning_rate": 6.65178879455021e-07, |
|
"loss": 0.0961, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 2.053872678475484, |
|
"learning_rate": 6.586228811715853e-07, |
|
"loss": 0.104, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.7704326923076923, |
|
"grad_norm": 2.414054056484151, |
|
"learning_rate": 6.520944461040829e-07, |
|
"loss": 0.0987, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 0.7716346153846154, |
|
"grad_norm": 2.2800029934734014, |
|
"learning_rate": 6.455936719753883e-07, |
|
"loss": 0.1109, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 0.7728365384615384, |
|
"grad_norm": 2.254565376531854, |
|
"learning_rate": 6.391206560943241e-07, |
|
"loss": 0.0972, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 0.7740384615384616, |
|
"grad_norm": 2.4180234435201866, |
|
"learning_rate": 6.326754953542086e-07, |
|
"loss": 0.1055, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 0.7752403846153846, |
|
"grad_norm": 2.864294623339486, |
|
"learning_rate": 6.262582862313968e-07, |
|
"loss": 0.1073, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.7764423076923077, |
|
"grad_norm": 2.5508498340470465, |
|
"learning_rate": 6.198691247838437e-07, |
|
"loss": 0.1072, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 0.7776442307692307, |
|
"grad_norm": 2.7551427113131233, |
|
"learning_rate": 6.135081066496662e-07, |
|
"loss": 0.0988, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 0.7788461538461539, |
|
"grad_norm": 2.891789692416902, |
|
"learning_rate": 6.071753270457065e-07, |
|
"loss": 0.1214, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 0.7800480769230769, |
|
"grad_norm": 2.1917615480612165, |
|
"learning_rate": 6.00870880766111e-07, |
|
"loss": 0.1027, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 2.323581668550887, |
|
"learning_rate": 5.945948621809092e-07, |
|
"loss": 0.0992, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7824519230769231, |
|
"grad_norm": 2.166198462254229, |
|
"learning_rate": 5.883473652346031e-07, |
|
"loss": 0.1107, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 0.7836538461538461, |
|
"grad_norm": 2.1325952846722043, |
|
"learning_rate": 5.821284834447586e-07, |
|
"loss": 0.1137, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 0.7848557692307693, |
|
"grad_norm": 2.3389500061042856, |
|
"learning_rate": 5.759383099006094e-07, |
|
"loss": 0.114, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 0.7860576923076923, |
|
"grad_norm": 2.392023151903911, |
|
"learning_rate": 5.697769372616565e-07, |
|
"loss": 0.1154, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.7872596153846154, |
|
"grad_norm": 1.9514556840096247, |
|
"learning_rate": 5.636444577562911e-07, |
|
"loss": 0.1071, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7884615384615384, |
|
"grad_norm": 1.7463857786932386, |
|
"learning_rate": 5.575409631804049e-07, |
|
"loss": 0.0932, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.7896634615384616, |
|
"grad_norm": 2.5321085368327023, |
|
"learning_rate": 5.51466544896021e-07, |
|
"loss": 0.1249, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 0.7908653846153846, |
|
"grad_norm": 2.7367754184042794, |
|
"learning_rate": 5.454212938299256e-07, |
|
"loss": 0.1083, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 0.7920673076923077, |
|
"grad_norm": 2.1480042151176795, |
|
"learning_rate": 5.39405300472306e-07, |
|
"loss": 0.1135, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 0.7932692307692307, |
|
"grad_norm": 2.1126995444295895, |
|
"learning_rate": 5.334186548753961e-07, |
|
"loss": 0.0993, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7944711538461539, |
|
"grad_norm": 1.9387325114766338, |
|
"learning_rate": 5.2746144665213e-07, |
|
"loss": 0.0975, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 0.7956730769230769, |
|
"grad_norm": 2.5557991339707193, |
|
"learning_rate": 5.215337649747986e-07, |
|
"loss": 0.1062, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 0.796875, |
|
"grad_norm": 1.9233646398585384, |
|
"learning_rate": 5.156356985737154e-07, |
|
"loss": 0.0983, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 0.7980769230769231, |
|
"grad_norm": 2.2467131024558182, |
|
"learning_rate": 5.097673357358906e-07, |
|
"loss": 0.0968, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 0.7992788461538461, |
|
"grad_norm": 2.4109454813538442, |
|
"learning_rate": 5.039287643037058e-07, |
|
"loss": 0.0979, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.8004807692307693, |
|
"grad_norm": 3.124394231436496, |
|
"learning_rate": 4.981200716735993e-07, |
|
"loss": 0.1265, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 0.8016826923076923, |
|
"grad_norm": 2.6675264999412, |
|
"learning_rate": 4.92341344794763e-07, |
|
"loss": 0.1049, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 0.8028846153846154, |
|
"grad_norm": 2.848770862565795, |
|
"learning_rate": 4.865926701678353e-07, |
|
"loss": 0.1025, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 0.8040865384615384, |
|
"grad_norm": 2.6854316431958867, |
|
"learning_rate": 4.808741338436082e-07, |
|
"loss": 0.1073, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 0.8052884615384616, |
|
"grad_norm": 3.1092668803437515, |
|
"learning_rate": 4.7518582142174e-07, |
|
"loss": 0.0928, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.8064903846153846, |
|
"grad_norm": 2.1214191642164266, |
|
"learning_rate": 4.695278180494725e-07, |
|
"loss": 0.1012, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 0.8076923076923077, |
|
"grad_norm": 2.5730076842528553, |
|
"learning_rate": 4.6390020842035755e-07, |
|
"loss": 0.11, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.8088942307692307, |
|
"grad_norm": 2.68220531087873, |
|
"learning_rate": 4.5830307677298984e-07, |
|
"loss": 0.1188, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 0.8100961538461539, |
|
"grad_norm": 2.363649492332498, |
|
"learning_rate": 4.5273650688974437e-07, |
|
"loss": 0.1021, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 0.8112980769230769, |
|
"grad_norm": 2.541964709244174, |
|
"learning_rate": 4.4720058209552163e-07, |
|
"loss": 0.0925, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 3.265981110682609, |
|
"learning_rate": 4.4169538525650453e-07, |
|
"loss": 0.1037, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 0.8137019230769231, |
|
"grad_norm": 2.4485095937525854, |
|
"learning_rate": 4.362209987789129e-07, |
|
"loss": 0.1086, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 0.8149038461538461, |
|
"grad_norm": 2.2907426923363805, |
|
"learning_rate": 4.307775046077739e-07, |
|
"loss": 0.0986, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 0.8161057692307693, |
|
"grad_norm": 2.0945358815806387, |
|
"learning_rate": 4.2536498422569237e-07, |
|
"loss": 0.0955, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 0.8173076923076923, |
|
"grad_norm": 2.211078181765995, |
|
"learning_rate": 4.1998351865163323e-07, |
|
"loss": 0.1005, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.8185096153846154, |
|
"grad_norm": 2.3888674275205473, |
|
"learning_rate": 4.1463318843970727e-07, |
|
"loss": 0.0946, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 0.8197115384615384, |
|
"grad_norm": 2.8100928189396783, |
|
"learning_rate": 4.093140736779691e-07, |
|
"loss": 0.1072, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 0.8209134615384616, |
|
"grad_norm": 2.5814035620911775, |
|
"learning_rate": 4.0402625398721056e-07, |
|
"loss": 0.1085, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 0.8221153846153846, |
|
"grad_norm": 2.2204309134850604, |
|
"learning_rate": 3.987698085197761e-07, |
|
"loss": 0.1057, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 0.8233173076923077, |
|
"grad_norm": 2.284890393659316, |
|
"learning_rate": 3.935448159583774e-07, |
|
"loss": 0.1095, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.8245192307692307, |
|
"grad_norm": 2.9277446873455233, |
|
"learning_rate": 3.8835135451491037e-07, |
|
"loss": 0.0972, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 0.8257211538461539, |
|
"grad_norm": 2.624827973263955, |
|
"learning_rate": 3.831895019292897e-07, |
|
"loss": 0.1103, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 0.8269230769230769, |
|
"grad_norm": 2.680261506966643, |
|
"learning_rate": 3.7805933546828265e-07, |
|
"loss": 0.1172, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.828125, |
|
"grad_norm": 2.194552961136517, |
|
"learning_rate": 3.7296093192435325e-07, |
|
"loss": 0.1003, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 0.8293269230769231, |
|
"grad_norm": 2.559847310791807, |
|
"learning_rate": 3.6789436761451135e-07, |
|
"loss": 0.1039, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.8305288461538461, |
|
"grad_norm": 2.1332823235020575, |
|
"learning_rate": 3.6285971837917514e-07, |
|
"loss": 0.1004, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 0.8317307692307693, |
|
"grad_norm": 2.260620258768886, |
|
"learning_rate": 3.578570595810274e-07, |
|
"loss": 0.1043, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.8329326923076923, |
|
"grad_norm": 2.1460191050714768, |
|
"learning_rate": 3.5288646610389497e-07, |
|
"loss": 0.0973, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 0.8341346153846154, |
|
"grad_norm": 2.4453293937330804, |
|
"learning_rate": 3.4794801235162575e-07, |
|
"loss": 0.0982, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 0.8353365384615384, |
|
"grad_norm": 2.5470784403076823, |
|
"learning_rate": 3.4304177224697284e-07, |
|
"loss": 0.1071, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.8365384615384616, |
|
"grad_norm": 2.1819545974434194, |
|
"learning_rate": 3.3816781923049047e-07, |
|
"loss": 0.0977, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 0.8377403846153846, |
|
"grad_norm": 2.66559740829053, |
|
"learning_rate": 3.333262262594328e-07, |
|
"loss": 0.1013, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 0.8389423076923077, |
|
"grad_norm": 2.4531040234693493, |
|
"learning_rate": 3.285170658066636e-07, |
|
"loss": 0.1136, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 0.8401442307692307, |
|
"grad_norm": 2.222643442815888, |
|
"learning_rate": 3.2374040985957005e-07, |
|
"loss": 0.1069, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 0.8413461538461539, |
|
"grad_norm": 2.513474365488169, |
|
"learning_rate": 3.1899632991898634e-07, |
|
"loss": 0.1115, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8425480769230769, |
|
"grad_norm": 2.4676487353640537, |
|
"learning_rate": 3.1428489699812187e-07, |
|
"loss": 0.1134, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 0.84375, |
|
"grad_norm": 2.5478148761024344, |
|
"learning_rate": 3.096061816214993e-07, |
|
"loss": 0.1125, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 0.8449519230769231, |
|
"grad_norm": 2.693250913616855, |
|
"learning_rate": 3.0496025382390023e-07, |
|
"loss": 0.1101, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 0.8461538461538461, |
|
"grad_norm": 2.1582519269517846, |
|
"learning_rate": 3.0034718314931376e-07, |
|
"loss": 0.0987, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.8473557692307693, |
|
"grad_norm": 2.356390319809914, |
|
"learning_rate": 2.9576703864989705e-07, |
|
"loss": 0.1103, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.8485576923076923, |
|
"grad_norm": 2.4939149281676944, |
|
"learning_rate": 2.9121988888494297e-07, |
|
"loss": 0.1075, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 0.8497596153846154, |
|
"grad_norm": 2.560971562380158, |
|
"learning_rate": 2.8670580191985096e-07, |
|
"loss": 0.1047, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 0.8509615384615384, |
|
"grad_norm": 2.328897343184531, |
|
"learning_rate": 2.822248453251117e-07, |
|
"loss": 0.0952, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 0.8521634615384616, |
|
"grad_norm": 2.741941178369846, |
|
"learning_rate": 2.7777708617529263e-07, |
|
"loss": 0.114, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 0.8533653846153846, |
|
"grad_norm": 2.8152555502780747, |
|
"learning_rate": 2.73362591048035e-07, |
|
"loss": 0.1118, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.8545673076923077, |
|
"grad_norm": 2.308625479951822, |
|
"learning_rate": 2.689814260230575e-07, |
|
"loss": 0.0916, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 0.8557692307692307, |
|
"grad_norm": 2.7282177204631655, |
|
"learning_rate": 2.646336566811686e-07, |
|
"loss": 0.0998, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 0.8569711538461539, |
|
"grad_norm": 2.3678387797587415, |
|
"learning_rate": 2.6031934810328006e-07, |
|
"loss": 0.097, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 0.8581730769230769, |
|
"grad_norm": 2.2659866299053864, |
|
"learning_rate": 2.560385648694394e-07, |
|
"loss": 0.1035, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 0.859375, |
|
"grad_norm": 2.1167601912463665, |
|
"learning_rate": 2.5179137105785733e-07, |
|
"loss": 0.1133, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.8605769230769231, |
|
"grad_norm": 3.156290005065614, |
|
"learning_rate": 2.4757783024395244e-07, |
|
"loss": 0.1083, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 0.8617788461538461, |
|
"grad_norm": 2.3830390104253514, |
|
"learning_rate": 2.43398005499397e-07, |
|
"loss": 0.1142, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 0.8629807692307693, |
|
"grad_norm": 2.1651688894763805, |
|
"learning_rate": 2.3925195939117516e-07, |
|
"loss": 0.1008, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 0.8641826923076923, |
|
"grad_norm": 2.906596215817537, |
|
"learning_rate": 2.3513975398064382e-07, |
|
"loss": 0.109, |
|
"step": 1438 |
|
}, |
|
{ |
|
"epoch": 0.8653846153846154, |
|
"grad_norm": 2.822390125625684, |
|
"learning_rate": 2.3106145082260777e-07, |
|
"loss": 0.11, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.8665865384615384, |
|
"grad_norm": 4.20785338557248, |
|
"learning_rate": 2.2701711096439177e-07, |
|
"loss": 0.0926, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 0.8677884615384616, |
|
"grad_norm": 2.401735024395661, |
|
"learning_rate": 2.23006794944933e-07, |
|
"loss": 0.1096, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 0.8689903846153846, |
|
"grad_norm": 2.324535843969192, |
|
"learning_rate": 2.1903056279387242e-07, |
|
"loss": 0.0979, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 0.8701923076923077, |
|
"grad_norm": 2.3309020366100395, |
|
"learning_rate": 2.1508847403065582e-07, |
|
"loss": 0.1003, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 0.8713942307692307, |
|
"grad_norm": 1.8021632811568191, |
|
"learning_rate": 2.1118058766364245e-07, |
|
"loss": 0.0973, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8725961538461539, |
|
"grad_norm": 3.2593847440771753, |
|
"learning_rate": 2.0730696218922376e-07, |
|
"loss": 0.1181, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 0.8737980769230769, |
|
"grad_norm": 2.822198349147213, |
|
"learning_rate": 2.0346765559094566e-07, |
|
"loss": 0.1011, |
|
"step": 1454 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 2.281439077352929, |
|
"learning_rate": 1.9966272533864183e-07, |
|
"loss": 0.1078, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 0.8762019230769231, |
|
"grad_norm": 2.113587059455818, |
|
"learning_rate": 1.9589222838757416e-07, |
|
"loss": 0.101, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 0.8774038461538461, |
|
"grad_norm": 2.8223349033116034, |
|
"learning_rate": 1.9215622117757683e-07, |
|
"loss": 0.1061, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.8786057692307693, |
|
"grad_norm": 3.472491327729482, |
|
"learning_rate": 1.8845475963221504e-07, |
|
"loss": 0.1025, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 0.8798076923076923, |
|
"grad_norm": 1.8581268435798293, |
|
"learning_rate": 1.847878991579477e-07, |
|
"loss": 0.095, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 0.8810096153846154, |
|
"grad_norm": 2.730059314985803, |
|
"learning_rate": 1.8115569464329602e-07, |
|
"loss": 0.1186, |
|
"step": 1466 |
|
}, |
|
{ |
|
"epoch": 0.8822115384615384, |
|
"grad_norm": 2.1077055410907906, |
|
"learning_rate": 1.7755820045802146e-07, |
|
"loss": 0.1038, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 0.8834134615384616, |
|
"grad_norm": 2.425923169061633, |
|
"learning_rate": 1.7399547045231612e-07, |
|
"loss": 0.1052, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.8846153846153846, |
|
"grad_norm": 3.112007013321009, |
|
"learning_rate": 1.7046755795599224e-07, |
|
"loss": 0.1081, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.8858173076923077, |
|
"grad_norm": 2.569797668666943, |
|
"learning_rate": 1.6697451577768558e-07, |
|
"loss": 0.1066, |
|
"step": 1474 |
|
}, |
|
{ |
|
"epoch": 0.8870192307692307, |
|
"grad_norm": 2.2793145814741815, |
|
"learning_rate": 1.6351639620406506e-07, |
|
"loss": 0.093, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 0.8882211538461539, |
|
"grad_norm": 2.5222639348107148, |
|
"learning_rate": 1.600932509990502e-07, |
|
"loss": 0.1044, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 0.8894230769230769, |
|
"grad_norm": 2.588170889871738, |
|
"learning_rate": 1.567051314030349e-07, |
|
"loss": 0.1095, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.890625, |
|
"grad_norm": 2.291713127389421, |
|
"learning_rate": 1.5335208813212376e-07, |
|
"loss": 0.108, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 0.8918269230769231, |
|
"grad_norm": 2.466650814807856, |
|
"learning_rate": 1.500341713773687e-07, |
|
"loss": 0.0961, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 0.8930288461538461, |
|
"grad_norm": 2.7084007905892733, |
|
"learning_rate": 1.4675143080401965e-07, |
|
"loss": 0.1085, |
|
"step": 1486 |
|
}, |
|
{ |
|
"epoch": 0.8942307692307693, |
|
"grad_norm": 2.3729169086566286, |
|
"learning_rate": 1.4350391555078253e-07, |
|
"loss": 0.0961, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 0.8954326923076923, |
|
"grad_norm": 2.6818163643038995, |
|
"learning_rate": 1.4029167422908107e-07, |
|
"loss": 0.1043, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.8966346153846154, |
|
"grad_norm": 2.8498937973846066, |
|
"learning_rate": 1.3711475492233116e-07, |
|
"loss": 0.1005, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 0.8978365384615384, |
|
"grad_norm": 2.3727561897542184, |
|
"learning_rate": 1.3397320518521993e-07, |
|
"loss": 0.1083, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 0.8990384615384616, |
|
"grad_norm": 2.864804942811416, |
|
"learning_rate": 1.3086707204299415e-07, |
|
"loss": 0.1042, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 0.9002403846153846, |
|
"grad_norm": 2.5133132508801537, |
|
"learning_rate": 1.2779640199075627e-07, |
|
"loss": 0.1155, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 0.9014423076923077, |
|
"grad_norm": 2.534644984379523, |
|
"learning_rate": 1.2476124099277038e-07, |
|
"loss": 0.1136, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9026442307692307, |
|
"grad_norm": 2.3008784304419714, |
|
"learning_rate": 1.217616344817693e-07, |
|
"loss": 0.0916, |
|
"step": 1502 |
|
}, |
|
{ |
|
"epoch": 0.9038461538461539, |
|
"grad_norm": 3.1775796763443047, |
|
"learning_rate": 1.1879762735828081e-07, |
|
"loss": 0.1042, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.9050480769230769, |
|
"grad_norm": 2.497072810226958, |
|
"learning_rate": 1.1586926398995057e-07, |
|
"loss": 0.1107, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 0.90625, |
|
"grad_norm": 2.5866908472155923, |
|
"learning_rate": 1.129765882108802e-07, |
|
"loss": 0.1043, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 0.9074519230769231, |
|
"grad_norm": 2.2123227768120626, |
|
"learning_rate": 1.1011964332097114e-07, |
|
"loss": 0.1056, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.9086538461538461, |
|
"grad_norm": 2.2624610691482134, |
|
"learning_rate": 1.0729847208527516e-07, |
|
"loss": 0.1097, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 0.9098557692307693, |
|
"grad_norm": 2.4925186199498115, |
|
"learning_rate": 1.045131167333563e-07, |
|
"loss": 0.1055, |
|
"step": 1514 |
|
}, |
|
{ |
|
"epoch": 0.9110576923076923, |
|
"grad_norm": 2.0402886616020846, |
|
"learning_rate": 1.0176361895865683e-07, |
|
"loss": 0.1012, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 0.9122596153846154, |
|
"grad_norm": 3.0557112542579583, |
|
"learning_rate": 9.9050019917874e-08, |
|
"loss": 0.0904, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 0.9134615384615384, |
|
"grad_norm": 2.8003064126365653, |
|
"learning_rate": 9.637236023034403e-08, |
|
"loss": 0.096, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.9146634615384616, |
|
"grad_norm": 2.3950834236132548, |
|
"learning_rate": 9.373067997743429e-08, |
|
"loss": 0.1103, |
|
"step": 1522 |
|
}, |
|
{ |
|
"epoch": 0.9158653846153846, |
|
"grad_norm": 2.3223933444523275, |
|
"learning_rate": 9.112501870194273e-08, |
|
"loss": 0.1051, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 0.9170673076923077, |
|
"grad_norm": 2.6778326848037084, |
|
"learning_rate": 8.855541540750579e-08, |
|
"loss": 0.1079, |
|
"step": 1526 |
|
}, |
|
{ |
|
"epoch": 0.9182692307692307, |
|
"grad_norm": 2.527199338042573, |
|
"learning_rate": 8.602190855801523e-08, |
|
"loss": 0.1109, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 0.9194711538461539, |
|
"grad_norm": 2.2105422763119598, |
|
"learning_rate": 8.352453607704286e-08, |
|
"loss": 0.0994, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.9206730769230769, |
|
"grad_norm": 2.4639244734521357, |
|
"learning_rate": 8.106333534727145e-08, |
|
"loss": 0.1108, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 0.921875, |
|
"grad_norm": 2.2497655156731162, |
|
"learning_rate": 7.86383432099358e-08, |
|
"loss": 0.0991, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 2.3748694066193115, |
|
"learning_rate": 7.624959596427145e-08, |
|
"loss": 0.0998, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.9242788461538461, |
|
"grad_norm": 3.0743080890043983, |
|
"learning_rate": 7.38971293669713e-08, |
|
"loss": 0.1068, |
|
"step": 1538 |
|
}, |
|
{ |
|
"epoch": 0.9254807692307693, |
|
"grad_norm": 2.1876563987125675, |
|
"learning_rate": 7.15809786316507e-08, |
|
"loss": 0.1021, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9266826923076923, |
|
"grad_norm": 2.0975147997242143, |
|
"learning_rate": 6.930117842831958e-08, |
|
"loss": 0.1046, |
|
"step": 1542 |
|
}, |
|
{ |
|
"epoch": 0.9278846153846154, |
|
"grad_norm": 2.075830478745358, |
|
"learning_rate": 6.705776288286281e-08, |
|
"loss": 0.0954, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 0.9290865384615384, |
|
"grad_norm": 2.6184435003671362, |
|
"learning_rate": 6.485076557653236e-08, |
|
"loss": 0.1175, |
|
"step": 1546 |
|
}, |
|
{ |
|
"epoch": 0.9302884615384616, |
|
"grad_norm": 1.9245903381939464, |
|
"learning_rate": 6.268021954544095e-08, |
|
"loss": 0.1013, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.9314903846153846, |
|
"grad_norm": 2.2746719047460853, |
|
"learning_rate": 6.05461572800703e-08, |
|
"loss": 0.1126, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9326923076923077, |
|
"grad_norm": 2.340635789861226, |
|
"learning_rate": 5.844861072478336e-08, |
|
"loss": 0.1123, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 0.9338942307692307, |
|
"grad_norm": 2.1875192748623418, |
|
"learning_rate": 5.6387611277346486e-08, |
|
"loss": 0.1207, |
|
"step": 1554 |
|
}, |
|
{ |
|
"epoch": 0.9350961538461539, |
|
"grad_norm": 2.7002532298324997, |
|
"learning_rate": 5.436318978845917e-08, |
|
"loss": 0.1021, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 0.9362980769230769, |
|
"grad_norm": 2.50568764777332, |
|
"learning_rate": 5.237537656129332e-08, |
|
"loss": 0.0963, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 2.4255108084712806, |
|
"learning_rate": 5.042420135103865e-08, |
|
"loss": 0.1056, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.9387019230769231, |
|
"grad_norm": 2.3846332604623215, |
|
"learning_rate": 4.850969336445688e-08, |
|
"loss": 0.1018, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 0.9399038461538461, |
|
"grad_norm": 1.9410217717252691, |
|
"learning_rate": 4.663188125944601e-08, |
|
"loss": 0.1034, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 0.9411057692307693, |
|
"grad_norm": 1.9719340906948433, |
|
"learning_rate": 4.47907931446101e-08, |
|
"loss": 0.1002, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 0.9423076923076923, |
|
"grad_norm": 2.7209374640824073, |
|
"learning_rate": 4.298645657883904e-08, |
|
"loss": 0.1025, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.9435096153846154, |
|
"grad_norm": 2.5221543954885, |
|
"learning_rate": 4.121889857089584e-08, |
|
"loss": 0.1129, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.9447115384615384, |
|
"grad_norm": 2.362069229118289, |
|
"learning_rate": 3.948814557901276e-08, |
|
"loss": 0.1076, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 0.9459134615384616, |
|
"grad_norm": 2.383853603153857, |
|
"learning_rate": 3.779422351049417e-08, |
|
"loss": 0.116, |
|
"step": 1574 |
|
}, |
|
{ |
|
"epoch": 0.9471153846153846, |
|
"grad_norm": 1.9719712080104705, |
|
"learning_rate": 3.613715772133097e-08, |
|
"loss": 0.0939, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 0.9483173076923077, |
|
"grad_norm": 2.302141720175791, |
|
"learning_rate": 3.451697301581791e-08, |
|
"loss": 0.1108, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 0.9495192307692307, |
|
"grad_norm": 2.259505291636599, |
|
"learning_rate": 3.293369364618465e-08, |
|
"loss": 0.0928, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.9507211538461539, |
|
"grad_norm": 3.306570471168316, |
|
"learning_rate": 3.138734331223248e-08, |
|
"loss": 0.1092, |
|
"step": 1582 |
|
}, |
|
{ |
|
"epoch": 0.9519230769230769, |
|
"grad_norm": 2.132657723925501, |
|
"learning_rate": 2.987794516097875e-08, |
|
"loss": 0.1076, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 0.953125, |
|
"grad_norm": 2.4599044268457995, |
|
"learning_rate": 2.8405521786310508e-08, |
|
"loss": 0.1032, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 0.9543269230769231, |
|
"grad_norm": 2.743173912553656, |
|
"learning_rate": 2.6970095228647243e-08, |
|
"loss": 0.1006, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 0.9555288461538461, |
|
"grad_norm": 2.7902694032785678, |
|
"learning_rate": 2.5571686974609766e-08, |
|
"loss": 0.1082, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.9567307692307693, |
|
"grad_norm": 2.1596954453730617, |
|
"learning_rate": 2.4210317956698814e-08, |
|
"loss": 0.0968, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 0.9579326923076923, |
|
"grad_norm": 2.1748717942295452, |
|
"learning_rate": 2.2886008552983064e-08, |
|
"loss": 0.1159, |
|
"step": 1594 |
|
}, |
|
{ |
|
"epoch": 0.9591346153846154, |
|
"grad_norm": 2.772837831508022, |
|
"learning_rate": 2.1598778586792158e-08, |
|
"loss": 0.1188, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 0.9603365384615384, |
|
"grad_norm": 2.3289207936080105, |
|
"learning_rate": 2.0348647326420835e-08, |
|
"loss": 0.1177, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 2.2952508596930685, |
|
"learning_rate": 1.91356334848411e-08, |
|
"loss": 0.1076, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9627403846153846, |
|
"grad_norm": 2.273702791234848, |
|
"learning_rate": 1.795975521942106e-08, |
|
"loss": 0.1046, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 0.9639423076923077, |
|
"grad_norm": 2.592999996375681, |
|
"learning_rate": 1.682103013165376e-08, |
|
"loss": 0.114, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 0.9651442307692307, |
|
"grad_norm": 2.227735041940276, |
|
"learning_rate": 1.571947526689349e-08, |
|
"loss": 0.1054, |
|
"step": 1606 |
|
}, |
|
{ |
|
"epoch": 0.9663461538461539, |
|
"grad_norm": 2.26729275395297, |
|
"learning_rate": 1.4655107114101008e-08, |
|
"loss": 0.0916, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 0.9675480769230769, |
|
"grad_norm": 2.546233331956148, |
|
"learning_rate": 1.362794160559594e-08, |
|
"loss": 0.1151, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.96875, |
|
"grad_norm": 2.496560671829686, |
|
"learning_rate": 1.263799411681893e-08, |
|
"loss": 0.1161, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 0.9699519230769231, |
|
"grad_norm": 2.2682911816628715, |
|
"learning_rate": 1.1685279466101817e-08, |
|
"loss": 0.1008, |
|
"step": 1614 |
|
}, |
|
{ |
|
"epoch": 0.9711538461538461, |
|
"grad_norm": 2.7304953540259405, |
|
"learning_rate": 1.0769811914444206e-08, |
|
"loss": 0.1041, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 0.9723557692307693, |
|
"grad_norm": 2.6466979182161885, |
|
"learning_rate": 9.89160516530252e-09, |
|
"loss": 0.1044, |
|
"step": 1618 |
|
}, |
|
{ |
|
"epoch": 0.9735576923076923, |
|
"grad_norm": 2.394702205679758, |
|
"learning_rate": 9.050672364382118e-09, |
|
"loss": 0.0955, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.9747596153846154, |
|
"grad_norm": 3.3455759598567836, |
|
"learning_rate": 8.247026099443279e-09, |
|
"loss": 0.1109, |
|
"step": 1622 |
|
}, |
|
{ |
|
"epoch": 0.9759615384615384, |
|
"grad_norm": 2.275839668759994, |
|
"learning_rate": 7.480678400109965e-09, |
|
"loss": 0.1061, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 0.9771634615384616, |
|
"grad_norm": 2.133232464295508, |
|
"learning_rate": 6.751640737691911e-09, |
|
"loss": 0.1042, |
|
"step": 1626 |
|
}, |
|
{ |
|
"epoch": 0.9783653846153846, |
|
"grad_norm": 2.304963137531874, |
|
"learning_rate": 6.059924025012542e-09, |
|
"loss": 0.1038, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 0.9795673076923077, |
|
"grad_norm": 1.9133668945458344, |
|
"learning_rate": 5.405538616244377e-09, |
|
"loss": 0.0946, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.9807692307692307, |
|
"grad_norm": 2.7193573070283596, |
|
"learning_rate": 4.788494306755542e-09, |
|
"loss": 0.1021, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.9819711538461539, |
|
"grad_norm": 2.8786636757179878, |
|
"learning_rate": 4.208800332961838e-09, |
|
"loss": 0.1102, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 0.9831730769230769, |
|
"grad_norm": 2.4551588884935, |
|
"learning_rate": 3.666465372190453e-09, |
|
"loss": 0.0962, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 0.984375, |
|
"grad_norm": 2.1960677965536957, |
|
"learning_rate": 3.1614975425470207e-09, |
|
"loss": 0.1151, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 0.9855769230769231, |
|
"grad_norm": 3.4510437899419992, |
|
"learning_rate": 2.693904402797376e-09, |
|
"loss": 0.0972, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.9867788461538461, |
|
"grad_norm": 2.4216851513924205, |
|
"learning_rate": 2.2636929522520945e-09, |
|
"loss": 0.1199, |
|
"step": 1642 |
|
}, |
|
{ |
|
"epoch": 0.9879807692307693, |
|
"grad_norm": 2.5149641594525214, |
|
"learning_rate": 1.8708696306624087e-09, |
|
"loss": 0.0947, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 0.9891826923076923, |
|
"grad_norm": 2.8133693313833303, |
|
"learning_rate": 1.5154403181247279e-09, |
|
"loss": 0.102, |
|
"step": 1646 |
|
}, |
|
{ |
|
"epoch": 0.9903846153846154, |
|
"grad_norm": 1.9008875611487197, |
|
"learning_rate": 1.1974103349909894e-09, |
|
"loss": 0.0975, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 0.9915865384615384, |
|
"grad_norm": 2.3226078974966184, |
|
"learning_rate": 9.167844417901084e-10, |
|
"loss": 0.115, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.9927884615384616, |
|
"grad_norm": 3.1578748196963695, |
|
"learning_rate": 6.735668391566475e-10, |
|
"loss": 0.1127, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 0.9939903846153846, |
|
"grad_norm": 2.273472912036705, |
|
"learning_rate": 4.677611677675331e-10, |
|
"loss": 0.096, |
|
"step": 1654 |
|
}, |
|
{ |
|
"epoch": 0.9951923076923077, |
|
"grad_norm": 2.0942516236835993, |
|
"learning_rate": 2.993705082879328e-10, |
|
"loss": 0.1131, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 0.9963942307692307, |
|
"grad_norm": 2.5707284998861346, |
|
"learning_rate": 1.683973813249029e-10, |
|
"loss": 0.0986, |
|
"step": 1658 |
|
}, |
|
{ |
|
"epoch": 0.9975961538461539, |
|
"grad_norm": 2.5538888383981013, |
|
"learning_rate": 7.484374738936373e-11, |
|
"loss": 0.1101, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.9987980769230769, |
|
"grad_norm": 2.0051743259281305, |
|
"learning_rate": 1.8711006867788707e-11, |
|
"loss": 0.1041, |
|
"step": 1662 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.79621961431732, |
|
"learning_rate": 0.0, |
|
"loss": 0.1053, |
|
"step": 1664 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 1664, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 522572361891840.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|