{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 8668, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011536686663590217, "grad_norm": 0.39384475350379944, "learning_rate": 2.306805074971165e-07, "loss": 1.2335, "step": 1 }, { "epoch": 0.0005768343331795108, "grad_norm": 0.3301478326320648, "learning_rate": 1.1534025374855826e-06, "loss": 1.1506, "step": 5 }, { "epoch": 0.0011536686663590216, "grad_norm": 0.32749322056770325, "learning_rate": 2.3068050749711653e-06, "loss": 1.1258, "step": 10 }, { "epoch": 0.0017305029995385325, "grad_norm": 0.3380628526210785, "learning_rate": 3.4602076124567477e-06, "loss": 1.2071, "step": 15 }, { "epoch": 0.0023073373327180432, "grad_norm": 0.3165785074234009, "learning_rate": 4.6136101499423305e-06, "loss": 1.1473, "step": 20 }, { "epoch": 0.002884171665897554, "grad_norm": 0.3524048924446106, "learning_rate": 5.7670126874279126e-06, "loss": 1.1617, "step": 25 }, { "epoch": 0.003461005999077065, "grad_norm": 0.31146979331970215, "learning_rate": 6.920415224913495e-06, "loss": 1.1327, "step": 30 }, { "epoch": 0.0040378403322565756, "grad_norm": 0.2818208634853363, "learning_rate": 8.073817762399077e-06, "loss": 1.1434, "step": 35 }, { "epoch": 0.0046146746654360865, "grad_norm": 0.28524142503738403, "learning_rate": 9.227220299884661e-06, "loss": 1.0858, "step": 40 }, { "epoch": 0.005191508998615597, "grad_norm": 0.24573445320129395, "learning_rate": 1.0380622837370241e-05, "loss": 1.0725, "step": 45 }, { "epoch": 0.005768343331795108, "grad_norm": 0.263478547334671, "learning_rate": 1.1534025374855825e-05, "loss": 1.0934, "step": 50 }, { "epoch": 0.006345177664974619, "grad_norm": 0.2519683539867401, "learning_rate": 1.2687427912341407e-05, "loss": 1.0627, "step": 55 }, { "epoch": 0.00692201199815413, "grad_norm": 0.23009565472602844, "learning_rate": 1.384083044982699e-05, "loss": 1.0259, "step": 60 }, { "epoch": 0.007498846331333641, "grad_norm": 0.2220510095357895, "learning_rate": 1.4994232987312573e-05, "loss": 1.0459, "step": 65 }, { "epoch": 0.008075680664513151, "grad_norm": 0.20859792828559875, "learning_rate": 1.6147635524798155e-05, "loss": 1.0493, "step": 70 }, { "epoch": 0.008652514997692663, "grad_norm": 0.24422504007816315, "learning_rate": 1.7301038062283735e-05, "loss": 1.0426, "step": 75 }, { "epoch": 0.009229349330872173, "grad_norm": 0.26209887862205505, "learning_rate": 1.8454440599769322e-05, "loss": 1.1044, "step": 80 }, { "epoch": 0.009806183664051685, "grad_norm": 0.22573940455913544, "learning_rate": 1.9607843137254903e-05, "loss": 1.0154, "step": 85 }, { "epoch": 0.010383017997231195, "grad_norm": 0.23182636499404907, "learning_rate": 2.0761245674740483e-05, "loss": 0.9841, "step": 90 }, { "epoch": 0.010959852330410707, "grad_norm": 0.20166198909282684, "learning_rate": 2.191464821222607e-05, "loss": 0.9969, "step": 95 }, { "epoch": 0.011536686663590217, "grad_norm": 0.24127909541130066, "learning_rate": 2.306805074971165e-05, "loss": 1.0168, "step": 100 }, { "epoch": 0.012113520996769728, "grad_norm": 0.23480503261089325, "learning_rate": 2.422145328719723e-05, "loss": 1.0291, "step": 105 }, { "epoch": 0.012690355329949238, "grad_norm": 0.2245372086763382, "learning_rate": 2.5374855824682814e-05, "loss": 1.0141, "step": 110 }, { "epoch": 0.01326718966312875, "grad_norm": 0.22040125727653503, "learning_rate": 2.6528258362168395e-05, "loss": 1.0327, "step": 115 }, { "epoch": 0.01384402399630826, "grad_norm": 0.22710174322128296, "learning_rate": 2.768166089965398e-05, "loss": 1.055, "step": 120 }, { "epoch": 0.01442085832948777, "grad_norm": 0.21789094805717468, "learning_rate": 2.8835063437139565e-05, "loss": 1.0143, "step": 125 }, { "epoch": 0.014997692662667282, "grad_norm": 0.26034095883369446, "learning_rate": 2.9988465974625146e-05, "loss": 1.082, "step": 130 }, { "epoch": 0.015574526995846792, "grad_norm": 0.24871432781219482, "learning_rate": 3.1141868512110726e-05, "loss": 0.9683, "step": 135 }, { "epoch": 0.016151361329026302, "grad_norm": 0.24066007137298584, "learning_rate": 3.229527104959631e-05, "loss": 1.0045, "step": 140 }, { "epoch": 0.016728195662205816, "grad_norm": 0.24926799535751343, "learning_rate": 3.344867358708189e-05, "loss": 1.0534, "step": 145 }, { "epoch": 0.017305029995385326, "grad_norm": 0.2561403512954712, "learning_rate": 3.460207612456747e-05, "loss": 0.9992, "step": 150 }, { "epoch": 0.017881864328564836, "grad_norm": 0.25197339057922363, "learning_rate": 3.575547866205306e-05, "loss": 1.0213, "step": 155 }, { "epoch": 0.018458698661744346, "grad_norm": 0.257375031709671, "learning_rate": 3.6908881199538644e-05, "loss": 1.0, "step": 160 }, { "epoch": 0.01903553299492386, "grad_norm": 0.26233741641044617, "learning_rate": 3.806228373702422e-05, "loss": 0.9835, "step": 165 }, { "epoch": 0.01961236732810337, "grad_norm": 0.26200389862060547, "learning_rate": 3.9215686274509805e-05, "loss": 1.0475, "step": 170 }, { "epoch": 0.02018920166128288, "grad_norm": 0.3026478886604309, "learning_rate": 4.036908881199539e-05, "loss": 1.0337, "step": 175 }, { "epoch": 0.02076603599446239, "grad_norm": 0.24392971396446228, "learning_rate": 4.1522491349480966e-05, "loss": 1.0181, "step": 180 }, { "epoch": 0.0213428703276419, "grad_norm": 0.2525658905506134, "learning_rate": 4.2675893886966556e-05, "loss": 0.9815, "step": 185 }, { "epoch": 0.021919704660821413, "grad_norm": 0.2874011695384979, "learning_rate": 4.382929642445214e-05, "loss": 0.9717, "step": 190 }, { "epoch": 0.022496538994000923, "grad_norm": 0.3149228096008301, "learning_rate": 4.498269896193772e-05, "loss": 1.0196, "step": 195 }, { "epoch": 0.023073373327180433, "grad_norm": 0.24751496315002441, "learning_rate": 4.61361014994233e-05, "loss": 1.029, "step": 200 }, { "epoch": 0.023650207660359943, "grad_norm": 0.26091670989990234, "learning_rate": 4.7289504036908884e-05, "loss": 0.9799, "step": 205 }, { "epoch": 0.024227041993539457, "grad_norm": 0.24843664467334747, "learning_rate": 4.844290657439446e-05, "loss": 0.948, "step": 210 }, { "epoch": 0.024803876326718967, "grad_norm": 0.26564517617225647, "learning_rate": 4.9596309111880045e-05, "loss": 1.0037, "step": 215 }, { "epoch": 0.025380710659898477, "grad_norm": 0.25427091121673584, "learning_rate": 5.074971164936563e-05, "loss": 0.9891, "step": 220 }, { "epoch": 0.025957544993077987, "grad_norm": 0.24899506568908691, "learning_rate": 5.190311418685121e-05, "loss": 0.9797, "step": 225 }, { "epoch": 0.0265343793262575, "grad_norm": 0.2365058809518814, "learning_rate": 5.305651672433679e-05, "loss": 0.9779, "step": 230 }, { "epoch": 0.02711121365943701, "grad_norm": 0.25148388743400574, "learning_rate": 5.4209919261822386e-05, "loss": 0.9934, "step": 235 }, { "epoch": 0.02768804799261652, "grad_norm": 0.24851441383361816, "learning_rate": 5.536332179930796e-05, "loss": 0.9609, "step": 240 }, { "epoch": 0.02826488232579603, "grad_norm": 0.2596060633659363, "learning_rate": 5.651672433679355e-05, "loss": 0.955, "step": 245 }, { "epoch": 0.02884171665897554, "grad_norm": 0.26578041911125183, "learning_rate": 5.767012687427913e-05, "loss": 1.0006, "step": 250 }, { "epoch": 0.029418550992155054, "grad_norm": 0.25275397300720215, "learning_rate": 5.882352941176471e-05, "loss": 1.0443, "step": 255 }, { "epoch": 0.029995385325334564, "grad_norm": 0.246384397149086, "learning_rate": 5.997693194925029e-05, "loss": 1.0123, "step": 260 }, { "epoch": 0.030572219658514074, "grad_norm": 0.23048047721385956, "learning_rate": 6.113033448673587e-05, "loss": 0.9969, "step": 265 }, { "epoch": 0.031149053991693584, "grad_norm": 0.24622942507266998, "learning_rate": 6.228373702422145e-05, "loss": 1.0324, "step": 270 }, { "epoch": 0.031725888324873094, "grad_norm": 0.23391634225845337, "learning_rate": 6.343713956170704e-05, "loss": 0.9575, "step": 275 }, { "epoch": 0.032302722658052604, "grad_norm": 0.2237214893102646, "learning_rate": 6.459054209919262e-05, "loss": 1.0388, "step": 280 }, { "epoch": 0.03287955699123212, "grad_norm": 0.22574639320373535, "learning_rate": 6.57439446366782e-05, "loss": 1.0074, "step": 285 }, { "epoch": 0.03345639132441163, "grad_norm": 0.23499815165996552, "learning_rate": 6.689734717416379e-05, "loss": 0.9567, "step": 290 }, { "epoch": 0.03403322565759114, "grad_norm": 0.22160688042640686, "learning_rate": 6.805074971164937e-05, "loss": 1.0325, "step": 295 }, { "epoch": 0.03461005999077065, "grad_norm": 0.2253323346376419, "learning_rate": 6.920415224913494e-05, "loss": 0.9682, "step": 300 }, { "epoch": 0.03518689432395016, "grad_norm": 0.22069986164569855, "learning_rate": 7.035755478662054e-05, "loss": 1.0038, "step": 305 }, { "epoch": 0.03576372865712967, "grad_norm": 0.23517417907714844, "learning_rate": 7.151095732410612e-05, "loss": 0.9954, "step": 310 }, { "epoch": 0.03634056299030918, "grad_norm": 0.22023826837539673, "learning_rate": 7.26643598615917e-05, "loss": 1.0708, "step": 315 }, { "epoch": 0.03691739732348869, "grad_norm": 0.22501811385154724, "learning_rate": 7.381776239907729e-05, "loss": 1.0403, "step": 320 }, { "epoch": 0.0374942316566682, "grad_norm": 0.24813653528690338, "learning_rate": 7.497116493656286e-05, "loss": 1.0395, "step": 325 }, { "epoch": 0.03807106598984772, "grad_norm": 0.213524729013443, "learning_rate": 7.612456747404844e-05, "loss": 1.0479, "step": 330 }, { "epoch": 0.03864790032302723, "grad_norm": 0.2197512686252594, "learning_rate": 7.727797001153403e-05, "loss": 0.9709, "step": 335 }, { "epoch": 0.03922473465620674, "grad_norm": 0.21706676483154297, "learning_rate": 7.843137254901961e-05, "loss": 0.9824, "step": 340 }, { "epoch": 0.03980156898938625, "grad_norm": 0.2092558592557907, "learning_rate": 7.95847750865052e-05, "loss": 0.9749, "step": 345 }, { "epoch": 0.04037840332256576, "grad_norm": 0.20605534315109253, "learning_rate": 8.073817762399078e-05, "loss": 0.9728, "step": 350 }, { "epoch": 0.04095523765574527, "grad_norm": 0.20985351502895355, "learning_rate": 8.189158016147636e-05, "loss": 0.9399, "step": 355 }, { "epoch": 0.04153207198892478, "grad_norm": 0.20424993336200714, "learning_rate": 8.304498269896193e-05, "loss": 0.9659, "step": 360 }, { "epoch": 0.04210890632210429, "grad_norm": 0.2047097533941269, "learning_rate": 8.419838523644751e-05, "loss": 0.9825, "step": 365 }, { "epoch": 0.0426857406552838, "grad_norm": 0.2090773731470108, "learning_rate": 8.535178777393311e-05, "loss": 0.9915, "step": 370 }, { "epoch": 0.043262574988463316, "grad_norm": 0.20311830937862396, "learning_rate": 8.65051903114187e-05, "loss": 0.9481, "step": 375 }, { "epoch": 0.043839409321642826, "grad_norm": 0.20276297628879547, "learning_rate": 8.765859284890428e-05, "loss": 1.0057, "step": 380 }, { "epoch": 0.044416243654822336, "grad_norm": 0.20655770599842072, "learning_rate": 8.881199538638986e-05, "loss": 1.0034, "step": 385 }, { "epoch": 0.044993077988001846, "grad_norm": 0.21446913480758667, "learning_rate": 8.996539792387543e-05, "loss": 0.958, "step": 390 }, { "epoch": 0.045569912321181356, "grad_norm": 0.20291991531848907, "learning_rate": 9.111880046136102e-05, "loss": 0.9714, "step": 395 }, { "epoch": 0.046146746654360866, "grad_norm": 0.19942238926887512, "learning_rate": 9.22722029988466e-05, "loss": 0.9791, "step": 400 }, { "epoch": 0.046723580987540377, "grad_norm": 0.21126116812229156, "learning_rate": 9.342560553633218e-05, "loss": 0.9764, "step": 405 }, { "epoch": 0.04730041532071989, "grad_norm": 0.2006087452173233, "learning_rate": 9.457900807381777e-05, "loss": 0.9389, "step": 410 }, { "epoch": 0.0478772496538994, "grad_norm": 0.1997053176164627, "learning_rate": 9.573241061130335e-05, "loss": 0.9795, "step": 415 }, { "epoch": 0.048454083987078914, "grad_norm": 0.19919082522392273, "learning_rate": 9.688581314878892e-05, "loss": 0.9865, "step": 420 }, { "epoch": 0.049030918320258424, "grad_norm": 0.20420940220355988, "learning_rate": 9.80392156862745e-05, "loss": 1.0243, "step": 425 }, { "epoch": 0.049607752653437934, "grad_norm": 0.20005351305007935, "learning_rate": 9.919261822376009e-05, "loss": 0.9298, "step": 430 }, { "epoch": 0.050184586986617444, "grad_norm": 0.2057618349790573, "learning_rate": 0.00010034602076124569, "loss": 0.9673, "step": 435 }, { "epoch": 0.050761421319796954, "grad_norm": 0.2002270221710205, "learning_rate": 0.00010149942329873126, "loss": 1.0362, "step": 440 }, { "epoch": 0.051338255652976464, "grad_norm": 0.23044097423553467, "learning_rate": 0.00010265282583621685, "loss": 1.0132, "step": 445 }, { "epoch": 0.051915089986155974, "grad_norm": 0.19683632254600525, "learning_rate": 0.00010380622837370242, "loss": 0.9883, "step": 450 }, { "epoch": 0.052491924319335484, "grad_norm": 0.18685606122016907, "learning_rate": 0.00010495963091118801, "loss": 1.0097, "step": 455 }, { "epoch": 0.053068758652515, "grad_norm": 0.19922864437103271, "learning_rate": 0.00010611303344867358, "loss": 1.0129, "step": 460 }, { "epoch": 0.05364559298569451, "grad_norm": 0.22423453629016876, "learning_rate": 0.00010726643598615918, "loss": 0.9616, "step": 465 }, { "epoch": 0.05422242731887402, "grad_norm": 0.19105130434036255, "learning_rate": 0.00010841983852364477, "loss": 0.9873, "step": 470 }, { "epoch": 0.05479926165205353, "grad_norm": 0.191370889544487, "learning_rate": 0.00010957324106113034, "loss": 0.9869, "step": 475 }, { "epoch": 0.05537609598523304, "grad_norm": 0.1888877898454666, "learning_rate": 0.00011072664359861593, "loss": 0.9754, "step": 480 }, { "epoch": 0.05595293031841255, "grad_norm": 0.19908085465431213, "learning_rate": 0.0001118800461361015, "loss": 0.9978, "step": 485 }, { "epoch": 0.05652976465159206, "grad_norm": 0.1849226951599121, "learning_rate": 0.0001130334486735871, "loss": 1.0036, "step": 490 }, { "epoch": 0.05710659898477157, "grad_norm": 0.18880179524421692, "learning_rate": 0.00011418685121107266, "loss": 0.9969, "step": 495 }, { "epoch": 0.05768343331795108, "grad_norm": 0.18281018733978271, "learning_rate": 0.00011534025374855826, "loss": 1.0163, "step": 500 }, { "epoch": 0.0582602676511306, "grad_norm": 0.18743227422237396, "learning_rate": 0.00011649365628604383, "loss": 0.9774, "step": 505 }, { "epoch": 0.05883710198431011, "grad_norm": 0.18649840354919434, "learning_rate": 0.00011764705882352942, "loss": 0.9948, "step": 510 }, { "epoch": 0.05941393631748962, "grad_norm": 0.18914422392845154, "learning_rate": 0.00011880046136101499, "loss": 0.9789, "step": 515 }, { "epoch": 0.05999077065066913, "grad_norm": 0.20373612642288208, "learning_rate": 0.00011995386389850058, "loss": 0.9594, "step": 520 }, { "epoch": 0.06056760498384864, "grad_norm": 0.1853344440460205, "learning_rate": 0.00012110726643598615, "loss": 0.9422, "step": 525 }, { "epoch": 0.06114443931702815, "grad_norm": 0.17580455541610718, "learning_rate": 0.00012226066897347174, "loss": 0.962, "step": 530 }, { "epoch": 0.06172127365020766, "grad_norm": 0.18509171903133392, "learning_rate": 0.00012341407151095733, "loss": 0.9269, "step": 535 }, { "epoch": 0.06229810798338717, "grad_norm": 0.17391765117645264, "learning_rate": 0.0001245674740484429, "loss": 0.9379, "step": 540 }, { "epoch": 0.06287494231656668, "grad_norm": 0.185151144862175, "learning_rate": 0.0001257208765859285, "loss": 0.9803, "step": 545 }, { "epoch": 0.06345177664974619, "grad_norm": 0.18012270331382751, "learning_rate": 0.00012687427912341407, "loss": 0.9318, "step": 550 }, { "epoch": 0.0640286109829257, "grad_norm": 0.19646641612052917, "learning_rate": 0.00012802768166089967, "loss": 1.0218, "step": 555 }, { "epoch": 0.06460544531610521, "grad_norm": 0.1846383512020111, "learning_rate": 0.00012918108419838524, "loss": 1.025, "step": 560 }, { "epoch": 0.06518227964928472, "grad_norm": 0.18073053658008575, "learning_rate": 0.00013033448673587084, "loss": 0.9418, "step": 565 }, { "epoch": 0.06575911398246424, "grad_norm": 0.17771542072296143, "learning_rate": 0.0001314878892733564, "loss": 0.939, "step": 570 }, { "epoch": 0.06633594831564375, "grad_norm": 0.17932404577732086, "learning_rate": 0.000132641291810842, "loss": 1.0029, "step": 575 }, { "epoch": 0.06691278264882326, "grad_norm": 0.1846706122159958, "learning_rate": 0.00013379469434832757, "loss": 0.9732, "step": 580 }, { "epoch": 0.06748961698200277, "grad_norm": 0.18250420689582825, "learning_rate": 0.00013494809688581317, "loss": 0.9838, "step": 585 }, { "epoch": 0.06806645131518228, "grad_norm": 0.18181759119033813, "learning_rate": 0.00013610149942329874, "loss": 0.9732, "step": 590 }, { "epoch": 0.0686432856483618, "grad_norm": 0.19068962335586548, "learning_rate": 0.0001372549019607843, "loss": 0.9789, "step": 595 }, { "epoch": 0.0692201199815413, "grad_norm": 0.17766667902469635, "learning_rate": 0.00013840830449826988, "loss": 0.9702, "step": 600 }, { "epoch": 0.06979695431472081, "grad_norm": 0.18375654518604279, "learning_rate": 0.00013956170703575548, "loss": 0.9239, "step": 605 }, { "epoch": 0.07037378864790032, "grad_norm": 0.17888925969600677, "learning_rate": 0.00014071510957324108, "loss": 0.9834, "step": 610 }, { "epoch": 0.07095062298107983, "grad_norm": 0.1771126687526703, "learning_rate": 0.00014186851211072665, "loss": 0.9383, "step": 615 }, { "epoch": 0.07152745731425934, "grad_norm": 0.19248628616333008, "learning_rate": 0.00014302191464821224, "loss": 0.931, "step": 620 }, { "epoch": 0.07210429164743885, "grad_norm": 0.1845206469297409, "learning_rate": 0.0001441753171856978, "loss": 0.9919, "step": 625 }, { "epoch": 0.07268112598061836, "grad_norm": 0.17463363707065582, "learning_rate": 0.0001453287197231834, "loss": 0.9698, "step": 630 }, { "epoch": 0.07325796031379787, "grad_norm": 0.17882540822029114, "learning_rate": 0.00014648212226066898, "loss": 0.9985, "step": 635 }, { "epoch": 0.07383479464697738, "grad_norm": 0.1773298680782318, "learning_rate": 0.00014763552479815458, "loss": 0.9572, "step": 640 }, { "epoch": 0.0744116289801569, "grad_norm": 0.18714497983455658, "learning_rate": 0.00014878892733564015, "loss": 1.0213, "step": 645 }, { "epoch": 0.0749884633133364, "grad_norm": 0.16878995299339294, "learning_rate": 0.00014994232987312572, "loss": 0.9478, "step": 650 }, { "epoch": 0.07556529764651591, "grad_norm": 0.18111148476600647, "learning_rate": 0.0001510957324106113, "loss": 0.9727, "step": 655 }, { "epoch": 0.07614213197969544, "grad_norm": 0.18033739924430847, "learning_rate": 0.00015224913494809689, "loss": 0.9953, "step": 660 }, { "epoch": 0.07671896631287495, "grad_norm": 0.184474378824234, "learning_rate": 0.00015340253748558246, "loss": 1.0182, "step": 665 }, { "epoch": 0.07729580064605446, "grad_norm": 0.17728130519390106, "learning_rate": 0.00015455594002306805, "loss": 0.921, "step": 670 }, { "epoch": 0.07787263497923397, "grad_norm": 0.1744864135980606, "learning_rate": 0.00015570934256055365, "loss": 0.9696, "step": 675 }, { "epoch": 0.07844946931241348, "grad_norm": 0.16884462535381317, "learning_rate": 0.00015686274509803922, "loss": 0.9756, "step": 680 }, { "epoch": 0.07902630364559299, "grad_norm": 0.1904095709323883, "learning_rate": 0.00015801614763552482, "loss": 0.9662, "step": 685 }, { "epoch": 0.0796031379787725, "grad_norm": 0.1834830790758133, "learning_rate": 0.0001591695501730104, "loss": 0.9916, "step": 690 }, { "epoch": 0.08017997231195201, "grad_norm": 0.17388418316841125, "learning_rate": 0.00016032295271049598, "loss": 0.9661, "step": 695 }, { "epoch": 0.08075680664513152, "grad_norm": 0.17867860198020935, "learning_rate": 0.00016147635524798155, "loss": 0.9702, "step": 700 }, { "epoch": 0.08133364097831103, "grad_norm": 0.184253990650177, "learning_rate": 0.00016262975778546715, "loss": 0.9628, "step": 705 }, { "epoch": 0.08191047531149054, "grad_norm": 0.18207858502864838, "learning_rate": 0.00016378316032295272, "loss": 0.9882, "step": 710 }, { "epoch": 0.08248730964467005, "grad_norm": 0.1673344224691391, "learning_rate": 0.0001649365628604383, "loss": 1.0004, "step": 715 }, { "epoch": 0.08306414397784956, "grad_norm": 0.17834331095218658, "learning_rate": 0.00016608996539792386, "loss": 0.9624, "step": 720 }, { "epoch": 0.08364097831102907, "grad_norm": 0.17010116577148438, "learning_rate": 0.00016724336793540946, "loss": 0.9419, "step": 725 }, { "epoch": 0.08421781264420858, "grad_norm": 0.1887020319700241, "learning_rate": 0.00016839677047289503, "loss": 0.9735, "step": 730 }, { "epoch": 0.08479464697738809, "grad_norm": 0.17254365980625153, "learning_rate": 0.00016955017301038063, "loss": 0.9959, "step": 735 }, { "epoch": 0.0853714813105676, "grad_norm": 0.18531525135040283, "learning_rate": 0.00017070357554786622, "loss": 1.0063, "step": 740 }, { "epoch": 0.08594831564374712, "grad_norm": 0.1854889988899231, "learning_rate": 0.0001718569780853518, "loss": 1.0276, "step": 745 }, { "epoch": 0.08652514997692663, "grad_norm": 0.17261534929275513, "learning_rate": 0.0001730103806228374, "loss": 0.9459, "step": 750 }, { "epoch": 0.08710198431010614, "grad_norm": 0.17594070732593536, "learning_rate": 0.00017416378316032296, "loss": 0.981, "step": 755 }, { "epoch": 0.08767881864328565, "grad_norm": 0.17268770933151245, "learning_rate": 0.00017531718569780856, "loss": 0.9591, "step": 760 }, { "epoch": 0.08825565297646516, "grad_norm": 0.1795402467250824, "learning_rate": 0.00017647058823529413, "loss": 1.0009, "step": 765 }, { "epoch": 0.08883248730964467, "grad_norm": 0.17739154398441315, "learning_rate": 0.00017762399077277973, "loss": 1.0325, "step": 770 }, { "epoch": 0.08940932164282418, "grad_norm": 0.1737346351146698, "learning_rate": 0.0001787773933102653, "loss": 0.966, "step": 775 }, { "epoch": 0.08998615597600369, "grad_norm": 0.178639754652977, "learning_rate": 0.00017993079584775087, "loss": 0.9921, "step": 780 }, { "epoch": 0.0905629903091832, "grad_norm": 0.18672531843185425, "learning_rate": 0.00018108419838523644, "loss": 0.9398, "step": 785 }, { "epoch": 0.09113982464236271, "grad_norm": 0.17832833528518677, "learning_rate": 0.00018223760092272203, "loss": 1.0191, "step": 790 }, { "epoch": 0.09171665897554222, "grad_norm": 0.17171098291873932, "learning_rate": 0.0001833910034602076, "loss": 0.9889, "step": 795 }, { "epoch": 0.09229349330872173, "grad_norm": 0.17071138322353363, "learning_rate": 0.0001845444059976932, "loss": 0.9414, "step": 800 }, { "epoch": 0.09287032764190124, "grad_norm": 0.17644046247005463, "learning_rate": 0.0001856978085351788, "loss": 1.0259, "step": 805 }, { "epoch": 0.09344716197508075, "grad_norm": 0.17984060943126678, "learning_rate": 0.00018685121107266437, "loss": 0.9826, "step": 810 }, { "epoch": 0.09402399630826026, "grad_norm": 0.1776990294456482, "learning_rate": 0.00018800461361014997, "loss": 0.9663, "step": 815 }, { "epoch": 0.09460083064143977, "grad_norm": 0.17558909952640533, "learning_rate": 0.00018915801614763554, "loss": 1.0345, "step": 820 }, { "epoch": 0.09517766497461928, "grad_norm": 0.18706142902374268, "learning_rate": 0.00019031141868512113, "loss": 1.0199, "step": 825 }, { "epoch": 0.0957544993077988, "grad_norm": 0.1777406483888626, "learning_rate": 0.0001914648212226067, "loss": 1.0523, "step": 830 }, { "epoch": 0.09633133364097832, "grad_norm": 0.16959840059280396, "learning_rate": 0.00019261822376009227, "loss": 0.9452, "step": 835 }, { "epoch": 0.09690816797415783, "grad_norm": 0.17502658069133759, "learning_rate": 0.00019377162629757784, "loss": 1.0417, "step": 840 }, { "epoch": 0.09748500230733734, "grad_norm": 0.17616289854049683, "learning_rate": 0.00019492502883506344, "loss": 0.9871, "step": 845 }, { "epoch": 0.09806183664051685, "grad_norm": 0.1845337152481079, "learning_rate": 0.000196078431372549, "loss": 0.9917, "step": 850 }, { "epoch": 0.09863867097369636, "grad_norm": 0.1851508468389511, "learning_rate": 0.0001972318339100346, "loss": 1.0393, "step": 855 }, { "epoch": 0.09921550530687587, "grad_norm": 0.1803300529718399, "learning_rate": 0.00019838523644752018, "loss": 0.975, "step": 860 }, { "epoch": 0.09979233964005538, "grad_norm": 0.17024391889572144, "learning_rate": 0.00019953863898500578, "loss": 0.9605, "step": 865 }, { "epoch": 0.10036917397323489, "grad_norm": 0.17882846295833588, "learning_rate": 0.0001999999270186907, "loss": 0.9835, "step": 870 }, { "epoch": 0.1009460083064144, "grad_norm": 0.1729104071855545, "learning_rate": 0.0001999994810221862, "loss": 0.9661, "step": 875 }, { "epoch": 0.10152284263959391, "grad_norm": 0.1663312315940857, "learning_rate": 0.00019999862957615513, "loss": 1.0276, "step": 880 }, { "epoch": 0.10209967697277342, "grad_norm": 0.1741991490125656, "learning_rate": 0.00019999737268404973, "loss": 0.9306, "step": 885 }, { "epoch": 0.10267651130595293, "grad_norm": 0.19253897666931152, "learning_rate": 0.00019999571035096608, "loss": 1.0522, "step": 890 }, { "epoch": 0.10325334563913244, "grad_norm": 0.19019544124603271, "learning_rate": 0.00019999364258364413, "loss": 0.991, "step": 895 }, { "epoch": 0.10383017997231195, "grad_norm": 0.17976698279380798, "learning_rate": 0.00019999116939046764, "loss": 0.9986, "step": 900 }, { "epoch": 0.10440701430549146, "grad_norm": 0.18436984717845917, "learning_rate": 0.0001999882907814643, "loss": 0.9901, "step": 905 }, { "epoch": 0.10498384863867097, "grad_norm": 0.17687635123729706, "learning_rate": 0.0001999850067683054, "loss": 1.0231, "step": 910 }, { "epoch": 0.10556068297185048, "grad_norm": 0.18162792921066284, "learning_rate": 0.00019998131736430604, "loss": 0.9746, "step": 915 }, { "epoch": 0.10613751730503, "grad_norm": 0.18052905797958374, "learning_rate": 0.00019997722258442499, "loss": 0.9929, "step": 920 }, { "epoch": 0.10671435163820951, "grad_norm": 0.1827809065580368, "learning_rate": 0.00019997272244526456, "loss": 1.0031, "step": 925 }, { "epoch": 0.10729118597138902, "grad_norm": 0.1823299676179886, "learning_rate": 0.00019996781696507069, "loss": 0.969, "step": 930 }, { "epoch": 0.10786802030456853, "grad_norm": 0.18722161650657654, "learning_rate": 0.00019996250616373268, "loss": 0.9922, "step": 935 }, { "epoch": 0.10844485463774804, "grad_norm": 0.1760849803686142, "learning_rate": 0.0001999567900627833, "loss": 1.0221, "step": 940 }, { "epoch": 0.10902168897092755, "grad_norm": 0.17676706612110138, "learning_rate": 0.0001999506686853986, "loss": 0.9472, "step": 945 }, { "epoch": 0.10959852330410706, "grad_norm": 0.1758042424917221, "learning_rate": 0.00019994414205639775, "loss": 0.9059, "step": 950 }, { "epoch": 0.11017535763728657, "grad_norm": 0.18398715555667877, "learning_rate": 0.00019993721020224308, "loss": 0.9649, "step": 955 }, { "epoch": 0.11075219197046608, "grad_norm": 0.17959755659103394, "learning_rate": 0.0001999298731510399, "loss": 0.9904, "step": 960 }, { "epoch": 0.11132902630364559, "grad_norm": 0.17280980944633484, "learning_rate": 0.00019992213093253643, "loss": 1.0314, "step": 965 }, { "epoch": 0.1119058606368251, "grad_norm": 0.17635640501976013, "learning_rate": 0.0001999139835781236, "loss": 1.0148, "step": 970 }, { "epoch": 0.11248269497000461, "grad_norm": 0.1721516102552414, "learning_rate": 0.00019990543112083503, "loss": 0.9573, "step": 975 }, { "epoch": 0.11305952930318412, "grad_norm": 0.18502525985240936, "learning_rate": 0.00019989647359534672, "loss": 1.0328, "step": 980 }, { "epoch": 0.11363636363636363, "grad_norm": 0.18054784834384918, "learning_rate": 0.0001998871110379772, "loss": 0.9956, "step": 985 }, { "epoch": 0.11421319796954314, "grad_norm": 0.1870676875114441, "learning_rate": 0.00019987734348668706, "loss": 0.9665, "step": 990 }, { "epoch": 0.11479003230272265, "grad_norm": 0.18523703515529633, "learning_rate": 0.00019986717098107896, "loss": 0.9926, "step": 995 }, { "epoch": 0.11536686663590216, "grad_norm": 0.2257257103919983, "learning_rate": 0.00019985659356239758, "loss": 0.9635, "step": 1000 }, { "epoch": 0.11594370096908169, "grad_norm": 0.1749376505613327, "learning_rate": 0.00019984561127352914, "loss": 0.9773, "step": 1005 }, { "epoch": 0.1165205353022612, "grad_norm": 0.18685142695903778, "learning_rate": 0.00019983422415900158, "loss": 0.967, "step": 1010 }, { "epoch": 0.1170973696354407, "grad_norm": 0.1762312352657318, "learning_rate": 0.00019982243226498411, "loss": 0.9666, "step": 1015 }, { "epoch": 0.11767420396862022, "grad_norm": 0.18300583958625793, "learning_rate": 0.00019981023563928716, "loss": 0.9654, "step": 1020 }, { "epoch": 0.11825103830179973, "grad_norm": 0.17595453560352325, "learning_rate": 0.00019979763433136216, "loss": 0.9668, "step": 1025 }, { "epoch": 0.11882787263497924, "grad_norm": 0.17857778072357178, "learning_rate": 0.00019978462839230133, "loss": 0.9814, "step": 1030 }, { "epoch": 0.11940470696815875, "grad_norm": 0.194159135222435, "learning_rate": 0.0001997712178748374, "loss": 0.9548, "step": 1035 }, { "epoch": 0.11998154130133826, "grad_norm": 0.1872921586036682, "learning_rate": 0.0001997574028333436, "loss": 0.9537, "step": 1040 }, { "epoch": 0.12055837563451777, "grad_norm": 0.18175874650478363, "learning_rate": 0.0001997431833238332, "loss": 0.9708, "step": 1045 }, { "epoch": 0.12113520996769728, "grad_norm": 0.17377467453479767, "learning_rate": 0.00019972855940395947, "loss": 1.0154, "step": 1050 }, { "epoch": 0.12171204430087679, "grad_norm": 0.1744399070739746, "learning_rate": 0.00019971353113301527, "loss": 0.983, "step": 1055 }, { "epoch": 0.1222888786340563, "grad_norm": 0.19033999741077423, "learning_rate": 0.00019969809857193306, "loss": 0.9915, "step": 1060 }, { "epoch": 0.12286571296723581, "grad_norm": 0.18971246480941772, "learning_rate": 0.0001996822617832843, "loss": 0.974, "step": 1065 }, { "epoch": 0.12344254730041532, "grad_norm": 0.18075977265834808, "learning_rate": 0.0001996660208312796, "loss": 0.9888, "step": 1070 }, { "epoch": 0.12401938163359483, "grad_norm": 0.18304185569286346, "learning_rate": 0.00019964937578176816, "loss": 1.0237, "step": 1075 }, { "epoch": 0.12459621596677434, "grad_norm": 0.18264040350914001, "learning_rate": 0.00019963232670223752, "loss": 1.023, "step": 1080 }, { "epoch": 0.12517305029995385, "grad_norm": 0.17688079178333282, "learning_rate": 0.00019961487366181355, "loss": 0.9399, "step": 1085 }, { "epoch": 0.12574988463313336, "grad_norm": 0.18298649787902832, "learning_rate": 0.00019959701673125983, "loss": 1.0495, "step": 1090 }, { "epoch": 0.12632671896631287, "grad_norm": 0.1869022250175476, "learning_rate": 0.00019957875598297759, "loss": 0.9749, "step": 1095 }, { "epoch": 0.12690355329949238, "grad_norm": 0.17531757056713104, "learning_rate": 0.00019956009149100533, "loss": 0.9841, "step": 1100 }, { "epoch": 0.1274803876326719, "grad_norm": 0.18799515068531036, "learning_rate": 0.00019954102333101856, "loss": 0.985, "step": 1105 }, { "epoch": 0.1280572219658514, "grad_norm": 0.19451211392879486, "learning_rate": 0.0001995215515803294, "loss": 1.0184, "step": 1110 }, { "epoch": 0.1286340562990309, "grad_norm": 0.17992301285266876, "learning_rate": 0.00019950167631788642, "loss": 1.0029, "step": 1115 }, { "epoch": 0.12921089063221042, "grad_norm": 0.18217399716377258, "learning_rate": 0.00019948139762427416, "loss": 0.9786, "step": 1120 }, { "epoch": 0.12978772496538993, "grad_norm": 0.1724836379289627, "learning_rate": 0.000199460715581713, "loss": 0.9503, "step": 1125 }, { "epoch": 0.13036455929856944, "grad_norm": 0.18973779678344727, "learning_rate": 0.0001994396302740585, "loss": 0.9589, "step": 1130 }, { "epoch": 0.13094139363174895, "grad_norm": 0.1789364516735077, "learning_rate": 0.00019941814178680144, "loss": 1.0123, "step": 1135 }, { "epoch": 0.13151822796492849, "grad_norm": 0.179600328207016, "learning_rate": 0.00019939625020706724, "loss": 0.9644, "step": 1140 }, { "epoch": 0.132095062298108, "grad_norm": 0.20578639209270477, "learning_rate": 0.00019937395562361564, "loss": 0.9155, "step": 1145 }, { "epoch": 0.1326718966312875, "grad_norm": 0.17455343902111053, "learning_rate": 0.00019935125812684047, "loss": 1.0081, "step": 1150 }, { "epoch": 0.13324873096446702, "grad_norm": 0.18840928375720978, "learning_rate": 0.00019932815780876904, "loss": 0.9383, "step": 1155 }, { "epoch": 0.13382556529764653, "grad_norm": 0.18001633882522583, "learning_rate": 0.00019930465476306197, "loss": 0.997, "step": 1160 }, { "epoch": 0.13440239963082604, "grad_norm": 0.18877308070659637, "learning_rate": 0.00019928074908501272, "loss": 1.0056, "step": 1165 }, { "epoch": 0.13497923396400555, "grad_norm": 0.19247262179851532, "learning_rate": 0.00019925644087154734, "loss": 0.9396, "step": 1170 }, { "epoch": 0.13555606829718506, "grad_norm": 0.1856631338596344, "learning_rate": 0.00019923173022122378, "loss": 0.9777, "step": 1175 }, { "epoch": 0.13613290263036457, "grad_norm": 0.18794025480747223, "learning_rate": 0.00019920661723423183, "loss": 0.9611, "step": 1180 }, { "epoch": 0.13670973696354408, "grad_norm": 0.18122021853923798, "learning_rate": 0.00019918110201239247, "loss": 0.9364, "step": 1185 }, { "epoch": 0.1372865712967236, "grad_norm": 0.18005718290805817, "learning_rate": 0.00019915518465915758, "loss": 0.9338, "step": 1190 }, { "epoch": 0.1378634056299031, "grad_norm": 0.1888824999332428, "learning_rate": 0.00019912886527960954, "loss": 0.9059, "step": 1195 }, { "epoch": 0.1384402399630826, "grad_norm": 0.17332522571086884, "learning_rate": 0.0001991021439804607, "loss": 0.9822, "step": 1200 }, { "epoch": 0.13901707429626212, "grad_norm": 0.18236401677131653, "learning_rate": 0.00019907502087005297, "loss": 1.0221, "step": 1205 }, { "epoch": 0.13959390862944163, "grad_norm": 0.18748964369297028, "learning_rate": 0.00019904749605835742, "loss": 1.0282, "step": 1210 }, { "epoch": 0.14017074296262114, "grad_norm": 0.18207697570323944, "learning_rate": 0.00019901956965697387, "loss": 1.0046, "step": 1215 }, { "epoch": 0.14074757729580065, "grad_norm": 0.18568968772888184, "learning_rate": 0.00019899124177913041, "loss": 1.0182, "step": 1220 }, { "epoch": 0.14132441162898016, "grad_norm": 0.17385929822921753, "learning_rate": 0.00019896251253968288, "loss": 0.956, "step": 1225 }, { "epoch": 0.14190124596215967, "grad_norm": 0.18582086265087128, "learning_rate": 0.0001989333820551144, "loss": 1.0011, "step": 1230 }, { "epoch": 0.14247808029533918, "grad_norm": 0.18691149353981018, "learning_rate": 0.00019890385044353501, "loss": 1.0031, "step": 1235 }, { "epoch": 0.1430549146285187, "grad_norm": 0.17932343482971191, "learning_rate": 0.00019887391782468113, "loss": 1.0163, "step": 1240 }, { "epoch": 0.1436317489616982, "grad_norm": 0.1789650321006775, "learning_rate": 0.000198843584319915, "loss": 0.9428, "step": 1245 }, { "epoch": 0.1442085832948777, "grad_norm": 0.17853863537311554, "learning_rate": 0.0001988128500522244, "loss": 0.9619, "step": 1250 }, { "epoch": 0.14478541762805722, "grad_norm": 0.18588118255138397, "learning_rate": 0.00019878171514622187, "loss": 0.9928, "step": 1255 }, { "epoch": 0.14536225196123673, "grad_norm": 0.1837441772222519, "learning_rate": 0.00019875017972814435, "loss": 0.9711, "step": 1260 }, { "epoch": 0.14593908629441624, "grad_norm": 0.1958654373884201, "learning_rate": 0.00019871824392585276, "loss": 0.9413, "step": 1265 }, { "epoch": 0.14651592062759575, "grad_norm": 0.19134190678596497, "learning_rate": 0.00019868590786883134, "loss": 0.9717, "step": 1270 }, { "epoch": 0.14709275496077526, "grad_norm": 0.18663935363292694, "learning_rate": 0.00019865317168818713, "loss": 0.9806, "step": 1275 }, { "epoch": 0.14766958929395477, "grad_norm": 0.18832144141197205, "learning_rate": 0.0001986200355166495, "loss": 0.9256, "step": 1280 }, { "epoch": 0.14824642362713428, "grad_norm": 0.17965355515480042, "learning_rate": 0.0001985864994885697, "loss": 0.9852, "step": 1285 }, { "epoch": 0.1488232579603138, "grad_norm": 0.18879751861095428, "learning_rate": 0.00019855256373991993, "loss": 0.9489, "step": 1290 }, { "epoch": 0.1494000922934933, "grad_norm": 0.18717263638973236, "learning_rate": 0.00019851822840829338, "loss": 0.9698, "step": 1295 }, { "epoch": 0.1499769266266728, "grad_norm": 0.19117680191993713, "learning_rate": 0.0001984834936329031, "loss": 0.9967, "step": 1300 }, { "epoch": 0.15055376095985232, "grad_norm": 0.18893282115459442, "learning_rate": 0.00019844835955458193, "loss": 0.9391, "step": 1305 }, { "epoch": 0.15113059529303183, "grad_norm": 0.1852642297744751, "learning_rate": 0.00019841282631578145, "loss": 0.9871, "step": 1310 }, { "epoch": 0.15170742962621137, "grad_norm": 0.1851365864276886, "learning_rate": 0.00019837689406057183, "loss": 0.9459, "step": 1315 }, { "epoch": 0.15228426395939088, "grad_norm": 0.216008722782135, "learning_rate": 0.00019834056293464093, "loss": 0.9901, "step": 1320 }, { "epoch": 0.15286109829257039, "grad_norm": 0.17874599993228912, "learning_rate": 0.00019830383308529393, "loss": 0.984, "step": 1325 }, { "epoch": 0.1534379326257499, "grad_norm": 0.18888545036315918, "learning_rate": 0.00019826670466145262, "loss": 0.9617, "step": 1330 }, { "epoch": 0.1540147669589294, "grad_norm": 0.1813315898180008, "learning_rate": 0.00019822917781365474, "loss": 0.9944, "step": 1335 }, { "epoch": 0.15459160129210892, "grad_norm": 0.18800750374794006, "learning_rate": 0.00019819125269405352, "loss": 0.9283, "step": 1340 }, { "epoch": 0.15516843562528843, "grad_norm": 0.19481781125068665, "learning_rate": 0.00019815292945641705, "loss": 0.9559, "step": 1345 }, { "epoch": 0.15574526995846794, "grad_norm": 0.17894810438156128, "learning_rate": 0.0001981142082561274, "loss": 0.9628, "step": 1350 }, { "epoch": 0.15632210429164745, "grad_norm": 0.1818206012248993, "learning_rate": 0.0001980750892501804, "loss": 1.0073, "step": 1355 }, { "epoch": 0.15689893862482696, "grad_norm": 0.19612440466880798, "learning_rate": 0.0001980355725971847, "loss": 0.9837, "step": 1360 }, { "epoch": 0.15747577295800647, "grad_norm": 0.1835489571094513, "learning_rate": 0.0001979956584573612, "loss": 1.0062, "step": 1365 }, { "epoch": 0.15805260729118598, "grad_norm": 0.18093307316303253, "learning_rate": 0.00019795534699254238, "loss": 0.9496, "step": 1370 }, { "epoch": 0.15862944162436549, "grad_norm": 0.1910361796617508, "learning_rate": 0.00019791463836617176, "loss": 1.0064, "step": 1375 }, { "epoch": 0.159206275957545, "grad_norm": 0.17584437131881714, "learning_rate": 0.00019787353274330313, "loss": 0.9604, "step": 1380 }, { "epoch": 0.1597831102907245, "grad_norm": 0.193894624710083, "learning_rate": 0.00019783203029059997, "loss": 0.9816, "step": 1385 }, { "epoch": 0.16035994462390402, "grad_norm": 0.18043072521686554, "learning_rate": 0.00019779013117633454, "loss": 0.9106, "step": 1390 }, { "epoch": 0.16093677895708353, "grad_norm": 0.18467725813388824, "learning_rate": 0.00019774783557038755, "loss": 0.9019, "step": 1395 }, { "epoch": 0.16151361329026304, "grad_norm": 0.17407982051372528, "learning_rate": 0.00019770514364424725, "loss": 0.9465, "step": 1400 }, { "epoch": 0.16209044762344255, "grad_norm": 0.18963223695755005, "learning_rate": 0.00019766205557100868, "loss": 0.9775, "step": 1405 }, { "epoch": 0.16266728195662206, "grad_norm": 0.18725010752677917, "learning_rate": 0.0001976185715253732, "loss": 0.9709, "step": 1410 }, { "epoch": 0.16324411628980157, "grad_norm": 0.17535746097564697, "learning_rate": 0.0001975746916836475, "loss": 0.9495, "step": 1415 }, { "epoch": 0.16382095062298108, "grad_norm": 0.19157184660434723, "learning_rate": 0.0001975304162237432, "loss": 0.9545, "step": 1420 }, { "epoch": 0.1643977849561606, "grad_norm": 0.20112945139408112, "learning_rate": 0.00019748574532517586, "loss": 0.9671, "step": 1425 }, { "epoch": 0.1649746192893401, "grad_norm": 0.19094939529895782, "learning_rate": 0.0001974406791690643, "loss": 0.9768, "step": 1430 }, { "epoch": 0.1655514536225196, "grad_norm": 0.175222247838974, "learning_rate": 0.00019739521793813006, "loss": 0.9699, "step": 1435 }, { "epoch": 0.16612828795569912, "grad_norm": 0.18948869407176971, "learning_rate": 0.00019734936181669638, "loss": 1.0102, "step": 1440 }, { "epoch": 0.16670512228887863, "grad_norm": 0.18475863337516785, "learning_rate": 0.00019730311099068771, "loss": 0.922, "step": 1445 }, { "epoch": 0.16728195662205814, "grad_norm": 0.18690991401672363, "learning_rate": 0.00019725646564762878, "loss": 0.9693, "step": 1450 }, { "epoch": 0.16785879095523765, "grad_norm": 0.1863308697938919, "learning_rate": 0.00019720942597664385, "loss": 0.9639, "step": 1455 }, { "epoch": 0.16843562528841716, "grad_norm": 0.18741768598556519, "learning_rate": 0.00019716199216845604, "loss": 1.0382, "step": 1460 }, { "epoch": 0.16901245962159667, "grad_norm": 0.18736031651496887, "learning_rate": 0.00019711416441538652, "loss": 1.0025, "step": 1465 }, { "epoch": 0.16958929395477618, "grad_norm": 0.18398988246917725, "learning_rate": 0.00019706594291135366, "loss": 0.9566, "step": 1470 }, { "epoch": 0.1701661282879557, "grad_norm": 0.18206271529197693, "learning_rate": 0.0001970173278518724, "loss": 0.9727, "step": 1475 }, { "epoch": 0.1707429626211352, "grad_norm": 0.24337929487228394, "learning_rate": 0.00019696831943405324, "loss": 1.0105, "step": 1480 }, { "epoch": 0.1713197969543147, "grad_norm": 0.20907312631607056, "learning_rate": 0.0001969189178566016, "loss": 0.9619, "step": 1485 }, { "epoch": 0.17189663128749424, "grad_norm": 0.1832038313150406, "learning_rate": 0.00019686912331981702, "loss": 0.9998, "step": 1490 }, { "epoch": 0.17247346562067375, "grad_norm": 0.1847505420446396, "learning_rate": 0.00019681893602559224, "loss": 0.9444, "step": 1495 }, { "epoch": 0.17305029995385326, "grad_norm": 0.19578874111175537, "learning_rate": 0.00019676835617741249, "loss": 0.966, "step": 1500 }, { "epoch": 0.17362713428703277, "grad_norm": 0.20430216193199158, "learning_rate": 0.0001967173839803545, "loss": 0.9983, "step": 1505 }, { "epoch": 0.17420396862021229, "grad_norm": 0.19105270504951477, "learning_rate": 0.00019666601964108598, "loss": 0.9622, "step": 1510 }, { "epoch": 0.1747808029533918, "grad_norm": 0.19650229811668396, "learning_rate": 0.00019661426336786445, "loss": 0.924, "step": 1515 }, { "epoch": 0.1753576372865713, "grad_norm": 0.18799568712711334, "learning_rate": 0.00019656211537053654, "loss": 0.9319, "step": 1520 }, { "epoch": 0.17593447161975082, "grad_norm": 0.19247035682201385, "learning_rate": 0.00019650957586053716, "loss": 0.9913, "step": 1525 }, { "epoch": 0.17651130595293033, "grad_norm": 0.18789616227149963, "learning_rate": 0.00019645664505088864, "loss": 0.8992, "step": 1530 }, { "epoch": 0.17708814028610984, "grad_norm": 0.18446215987205505, "learning_rate": 0.00019640332315619977, "loss": 0.987, "step": 1535 }, { "epoch": 0.17766497461928935, "grad_norm": 0.1788845956325531, "learning_rate": 0.00019634961039266506, "loss": 0.9455, "step": 1540 }, { "epoch": 0.17824180895246886, "grad_norm": 0.17681817710399628, "learning_rate": 0.0001962955069780638, "loss": 1.0042, "step": 1545 }, { "epoch": 0.17881864328564837, "grad_norm": 0.18479640781879425, "learning_rate": 0.00019624101313175918, "loss": 0.9973, "step": 1550 }, { "epoch": 0.17939547761882788, "grad_norm": 0.17797234654426575, "learning_rate": 0.00019618612907469732, "loss": 0.959, "step": 1555 }, { "epoch": 0.17997231195200739, "grad_norm": 0.1861361563205719, "learning_rate": 0.00019613085502940658, "loss": 0.9529, "step": 1560 }, { "epoch": 0.1805491462851869, "grad_norm": 0.19123396277427673, "learning_rate": 0.00019607519121999647, "loss": 0.9506, "step": 1565 }, { "epoch": 0.1811259806183664, "grad_norm": 0.19712139666080475, "learning_rate": 0.00019601913787215683, "loss": 1.0023, "step": 1570 }, { "epoch": 0.18170281495154592, "grad_norm": 0.18968220055103302, "learning_rate": 0.0001959626952131568, "loss": 0.9354, "step": 1575 }, { "epoch": 0.18227964928472543, "grad_norm": 0.1877613365650177, "learning_rate": 0.00019590586347184417, "loss": 0.9825, "step": 1580 }, { "epoch": 0.18285648361790494, "grad_norm": 0.1915743201971054, "learning_rate": 0.00019584864287864408, "loss": 0.9456, "step": 1585 }, { "epoch": 0.18343331795108445, "grad_norm": 0.19143231213092804, "learning_rate": 0.0001957910336655584, "loss": 0.9993, "step": 1590 }, { "epoch": 0.18401015228426396, "grad_norm": 0.1909196525812149, "learning_rate": 0.00019573303606616459, "loss": 0.9774, "step": 1595 }, { "epoch": 0.18458698661744347, "grad_norm": 0.19479255378246307, "learning_rate": 0.00019567465031561487, "loss": 0.9727, "step": 1600 }, { "epoch": 0.18516382095062298, "grad_norm": 0.18205581605434418, "learning_rate": 0.0001956158766506352, "loss": 0.9786, "step": 1605 }, { "epoch": 0.18574065528380249, "grad_norm": 0.19251278042793274, "learning_rate": 0.00019555671530952445, "loss": 0.9815, "step": 1610 }, { "epoch": 0.186317489616982, "grad_norm": 0.1939883977174759, "learning_rate": 0.00019549716653215318, "loss": 0.9449, "step": 1615 }, { "epoch": 0.1868943239501615, "grad_norm": 0.187269926071167, "learning_rate": 0.00019543723055996282, "loss": 0.9425, "step": 1620 }, { "epoch": 0.18747115828334102, "grad_norm": 0.18750888109207153, "learning_rate": 0.00019537690763596487, "loss": 0.961, "step": 1625 }, { "epoch": 0.18804799261652053, "grad_norm": 0.20002637803554535, "learning_rate": 0.00019531619800473952, "loss": 0.9399, "step": 1630 }, { "epoch": 0.18862482694970004, "grad_norm": 0.19453556835651398, "learning_rate": 0.00019525510191243498, "loss": 0.9138, "step": 1635 }, { "epoch": 0.18920166128287955, "grad_norm": 0.18952696025371552, "learning_rate": 0.0001951936196067664, "loss": 1.0256, "step": 1640 }, { "epoch": 0.18977849561605906, "grad_norm": 0.22311453521251678, "learning_rate": 0.00019513175133701474, "loss": 0.9247, "step": 1645 }, { "epoch": 0.19035532994923857, "grad_norm": 0.1846632957458496, "learning_rate": 0.00019506949735402588, "loss": 0.9555, "step": 1650 }, { "epoch": 0.19093216428241808, "grad_norm": 0.1898278146982193, "learning_rate": 0.00019500685791020968, "loss": 0.9954, "step": 1655 }, { "epoch": 0.1915089986155976, "grad_norm": 0.18607108294963837, "learning_rate": 0.00019494383325953875, "loss": 0.9065, "step": 1660 }, { "epoch": 0.19208583294877712, "grad_norm": 0.19422227144241333, "learning_rate": 0.00019488042365754758, "loss": 0.9198, "step": 1665 }, { "epoch": 0.19266266728195663, "grad_norm": 0.1838252693414688, "learning_rate": 0.0001948166293613314, "loss": 0.9501, "step": 1670 }, { "epoch": 0.19323950161513614, "grad_norm": 0.1975485235452652, "learning_rate": 0.00019475245062954523, "loss": 0.9422, "step": 1675 }, { "epoch": 0.19381633594831565, "grad_norm": 0.19868746399879456, "learning_rate": 0.00019468788772240286, "loss": 0.9407, "step": 1680 }, { "epoch": 0.19439317028149516, "grad_norm": 0.19394518435001373, "learning_rate": 0.00019462294090167554, "loss": 0.9775, "step": 1685 }, { "epoch": 0.19497000461467467, "grad_norm": 0.19594334065914154, "learning_rate": 0.0001945576104306913, "loss": 0.9393, "step": 1690 }, { "epoch": 0.19554683894785418, "grad_norm": 0.18195070326328278, "learning_rate": 0.00019449189657433358, "loss": 0.9847, "step": 1695 }, { "epoch": 0.1961236732810337, "grad_norm": 0.19553768634796143, "learning_rate": 0.00019442579959904024, "loss": 0.946, "step": 1700 }, { "epoch": 0.1967005076142132, "grad_norm": 0.18642808496952057, "learning_rate": 0.0001943593197728026, "loss": 0.938, "step": 1705 }, { "epoch": 0.19727734194739271, "grad_norm": 0.18931232392787933, "learning_rate": 0.00019429245736516415, "loss": 0.9308, "step": 1710 }, { "epoch": 0.19785417628057222, "grad_norm": 0.1960645318031311, "learning_rate": 0.00019422521264721962, "loss": 0.9677, "step": 1715 }, { "epoch": 0.19843101061375173, "grad_norm": 0.18322202563285828, "learning_rate": 0.00019415758589161385, "loss": 0.9631, "step": 1720 }, { "epoch": 0.19900784494693124, "grad_norm": 0.19517329335212708, "learning_rate": 0.0001940895773725406, "loss": 0.9644, "step": 1725 }, { "epoch": 0.19958467928011075, "grad_norm": 0.19266700744628906, "learning_rate": 0.00019402118736574155, "loss": 0.9747, "step": 1730 }, { "epoch": 0.20016151361329027, "grad_norm": 0.18284273147583008, "learning_rate": 0.00019395241614850504, "loss": 0.922, "step": 1735 }, { "epoch": 0.20073834794646978, "grad_norm": 0.19635812938213348, "learning_rate": 0.00019388326399966515, "loss": 0.9793, "step": 1740 }, { "epoch": 0.20131518227964929, "grad_norm": 0.19478964805603027, "learning_rate": 0.00019381373119960033, "loss": 0.9948, "step": 1745 }, { "epoch": 0.2018920166128288, "grad_norm": 0.2036595195531845, "learning_rate": 0.00019374381803023252, "loss": 1.0269, "step": 1750 }, { "epoch": 0.2024688509460083, "grad_norm": 0.20682990550994873, "learning_rate": 0.00019367352477502576, "loss": 0.9985, "step": 1755 }, { "epoch": 0.20304568527918782, "grad_norm": 0.178926020860672, "learning_rate": 0.0001936028517189852, "loss": 0.9763, "step": 1760 }, { "epoch": 0.20362251961236733, "grad_norm": 0.19505412876605988, "learning_rate": 0.00019353179914865596, "loss": 0.9484, "step": 1765 }, { "epoch": 0.20419935394554684, "grad_norm": 0.20620250701904297, "learning_rate": 0.00019346036735212177, "loss": 0.9963, "step": 1770 }, { "epoch": 0.20477618827872635, "grad_norm": 0.19574880599975586, "learning_rate": 0.00019338855661900405, "loss": 0.9976, "step": 1775 }, { "epoch": 0.20535302261190586, "grad_norm": 0.19267630577087402, "learning_rate": 0.00019331636724046058, "loss": 0.9666, "step": 1780 }, { "epoch": 0.20592985694508537, "grad_norm": 0.18831545114517212, "learning_rate": 0.00019324379950918437, "loss": 1.0032, "step": 1785 }, { "epoch": 0.20650669127826488, "grad_norm": 0.19826582074165344, "learning_rate": 0.00019317085371940246, "loss": 0.9847, "step": 1790 }, { "epoch": 0.20708352561144439, "grad_norm": 0.19098900258541107, "learning_rate": 0.00019309753016687477, "loss": 0.9777, "step": 1795 }, { "epoch": 0.2076603599446239, "grad_norm": 0.19980227947235107, "learning_rate": 0.00019302382914889284, "loss": 0.9907, "step": 1800 }, { "epoch": 0.2082371942778034, "grad_norm": 0.18145912885665894, "learning_rate": 0.00019294975096427862, "loss": 0.9465, "step": 1805 }, { "epoch": 0.20881402861098292, "grad_norm": 0.18919304013252258, "learning_rate": 0.00019287529591338333, "loss": 0.9272, "step": 1810 }, { "epoch": 0.20939086294416243, "grad_norm": 0.18271850049495697, "learning_rate": 0.0001928004642980862, "loss": 0.9525, "step": 1815 }, { "epoch": 0.20996769727734194, "grad_norm": 0.21809083223342896, "learning_rate": 0.00019272525642179323, "loss": 0.972, "step": 1820 }, { "epoch": 0.21054453161052145, "grad_norm": 0.19276146590709686, "learning_rate": 0.00019264967258943595, "loss": 0.9335, "step": 1825 }, { "epoch": 0.21112136594370096, "grad_norm": 0.18455006182193756, "learning_rate": 0.0001925737131074703, "loss": 0.9558, "step": 1830 }, { "epoch": 0.2116982002768805, "grad_norm": 0.2075817883014679, "learning_rate": 0.00019249737828387522, "loss": 1.0067, "step": 1835 }, { "epoch": 0.21227503461006, "grad_norm": 0.18990999460220337, "learning_rate": 0.00019242066842815146, "loss": 0.9964, "step": 1840 }, { "epoch": 0.21285186894323951, "grad_norm": 0.19079752266407013, "learning_rate": 0.00019234358385132038, "loss": 0.9827, "step": 1845 }, { "epoch": 0.21342870327641902, "grad_norm": 0.187656432390213, "learning_rate": 0.00019226612486592271, "loss": 0.9619, "step": 1850 }, { "epoch": 0.21400553760959853, "grad_norm": 0.19485455751419067, "learning_rate": 0.00019218829178601713, "loss": 0.9874, "step": 1855 }, { "epoch": 0.21458237194277804, "grad_norm": 0.19166654348373413, "learning_rate": 0.00019211008492717914, "loss": 1.0142, "step": 1860 }, { "epoch": 0.21515920627595755, "grad_norm": 0.20272095501422882, "learning_rate": 0.0001920315046064997, "loss": 0.9981, "step": 1865 }, { "epoch": 0.21573604060913706, "grad_norm": 0.18274882435798645, "learning_rate": 0.00019195255114258408, "loss": 0.9761, "step": 1870 }, { "epoch": 0.21631287494231657, "grad_norm": 0.18468588590621948, "learning_rate": 0.00019187322485555031, "loss": 0.9657, "step": 1875 }, { "epoch": 0.21688970927549608, "grad_norm": 0.19172626733779907, "learning_rate": 0.00019179352606702813, "loss": 0.9432, "step": 1880 }, { "epoch": 0.2174665436086756, "grad_norm": 0.1883264034986496, "learning_rate": 0.00019171345510015758, "loss": 0.9939, "step": 1885 }, { "epoch": 0.2180433779418551, "grad_norm": 0.1864314079284668, "learning_rate": 0.0001916330122795877, "loss": 1.0028, "step": 1890 }, { "epoch": 0.21862021227503461, "grad_norm": 0.19553621113300323, "learning_rate": 0.00019155219793147522, "loss": 0.9866, "step": 1895 }, { "epoch": 0.21919704660821412, "grad_norm": 0.1937098354101181, "learning_rate": 0.00019147101238348326, "loss": 0.9515, "step": 1900 }, { "epoch": 0.21977388094139363, "grad_norm": 0.1826750636100769, "learning_rate": 0.00019138945596477994, "loss": 0.9832, "step": 1905 }, { "epoch": 0.22035071527457314, "grad_norm": 0.19543537497520447, "learning_rate": 0.00019130752900603702, "loss": 0.9677, "step": 1910 }, { "epoch": 0.22092754960775265, "grad_norm": 0.18481284379959106, "learning_rate": 0.00019122523183942879, "loss": 0.9081, "step": 1915 }, { "epoch": 0.22150438394093216, "grad_norm": 0.17375807464122772, "learning_rate": 0.00019114256479863038, "loss": 0.9674, "step": 1920 }, { "epoch": 0.22208121827411167, "grad_norm": 0.19649936258792877, "learning_rate": 0.00019105952821881668, "loss": 0.9693, "step": 1925 }, { "epoch": 0.22265805260729118, "grad_norm": 0.18931066989898682, "learning_rate": 0.00019097612243666086, "loss": 0.9665, "step": 1930 }, { "epoch": 0.2232348869404707, "grad_norm": 0.18258830904960632, "learning_rate": 0.00019089234779033306, "loss": 0.9844, "step": 1935 }, { "epoch": 0.2238117212736502, "grad_norm": 0.2044518142938614, "learning_rate": 0.00019080820461949886, "loss": 0.9293, "step": 1940 }, { "epoch": 0.22438855560682971, "grad_norm": 0.1841280460357666, "learning_rate": 0.00019072369326531824, "loss": 0.9823, "step": 1945 }, { "epoch": 0.22496538994000922, "grad_norm": 0.1927040070295334, "learning_rate": 0.00019063881407044373, "loss": 0.993, "step": 1950 }, { "epoch": 0.22554222427318874, "grad_norm": 0.19881246984004974, "learning_rate": 0.00019055356737901952, "loss": 0.9679, "step": 1955 }, { "epoch": 0.22611905860636825, "grad_norm": 0.18685629963874817, "learning_rate": 0.00019046795353667965, "loss": 0.961, "step": 1960 }, { "epoch": 0.22669589293954776, "grad_norm": 0.1891525834798813, "learning_rate": 0.00019038197289054684, "loss": 0.9496, "step": 1965 }, { "epoch": 0.22727272727272727, "grad_norm": 0.1977647989988327, "learning_rate": 0.00019029562578923106, "loss": 0.9804, "step": 1970 }, { "epoch": 0.22784956160590678, "grad_norm": 0.19999738037586212, "learning_rate": 0.000190208912582828, "loss": 1.0139, "step": 1975 }, { "epoch": 0.22842639593908629, "grad_norm": 0.18472820520401, "learning_rate": 0.0001901218336229178, "loss": 0.989, "step": 1980 }, { "epoch": 0.2290032302722658, "grad_norm": 0.18772737681865692, "learning_rate": 0.0001900343892625635, "loss": 0.9869, "step": 1985 }, { "epoch": 0.2295800646054453, "grad_norm": 0.21390481293201447, "learning_rate": 0.00018994657985630972, "loss": 0.9556, "step": 1990 }, { "epoch": 0.23015689893862482, "grad_norm": 0.18885917961597443, "learning_rate": 0.00018985840576018107, "loss": 0.951, "step": 1995 }, { "epoch": 0.23073373327180433, "grad_norm": 0.18941619992256165, "learning_rate": 0.00018976986733168093, "loss": 0.9348, "step": 2000 }, { "epoch": 0.23131056760498384, "grad_norm": 0.18609663844108582, "learning_rate": 0.00018968096492978976, "loss": 0.9704, "step": 2005 }, { "epoch": 0.23188740193816337, "grad_norm": 0.19215025007724762, "learning_rate": 0.0001895916989149638, "loss": 0.9188, "step": 2010 }, { "epoch": 0.23246423627134288, "grad_norm": 0.18575677275657654, "learning_rate": 0.00018950206964913355, "loss": 0.9793, "step": 2015 }, { "epoch": 0.2330410706045224, "grad_norm": 0.198299378156662, "learning_rate": 0.00018941207749570237, "loss": 0.9936, "step": 2020 }, { "epoch": 0.2336179049377019, "grad_norm": 0.1827745884656906, "learning_rate": 0.0001893217228195449, "loss": 0.9306, "step": 2025 }, { "epoch": 0.2341947392708814, "grad_norm": 0.19425687193870544, "learning_rate": 0.00018923100598700561, "loss": 1.0467, "step": 2030 }, { "epoch": 0.23477157360406092, "grad_norm": 0.18473173677921295, "learning_rate": 0.00018913992736589746, "loss": 1.0026, "step": 2035 }, { "epoch": 0.23534840793724043, "grad_norm": 0.1885528862476349, "learning_rate": 0.0001890484873255001, "loss": 0.9516, "step": 2040 }, { "epoch": 0.23592524227041994, "grad_norm": 0.19046476483345032, "learning_rate": 0.00018895668623655873, "loss": 0.9957, "step": 2045 }, { "epoch": 0.23650207660359945, "grad_norm": 0.1952226310968399, "learning_rate": 0.0001888645244712824, "loss": 0.98, "step": 2050 }, { "epoch": 0.23707891093677896, "grad_norm": 0.1928633153438568, "learning_rate": 0.00018877200240334236, "loss": 1.0137, "step": 2055 }, { "epoch": 0.23765574526995847, "grad_norm": 0.1882990151643753, "learning_rate": 0.00018867912040787096, "loss": 1.0047, "step": 2060 }, { "epoch": 0.23823257960313798, "grad_norm": 0.20092810690402985, "learning_rate": 0.00018858587886145975, "loss": 0.9555, "step": 2065 }, { "epoch": 0.2388094139363175, "grad_norm": 0.19786730408668518, "learning_rate": 0.00018849227814215805, "loss": 0.9894, "step": 2070 }, { "epoch": 0.239386248269497, "grad_norm": 0.19983135163784027, "learning_rate": 0.00018839831862947152, "loss": 0.9391, "step": 2075 }, { "epoch": 0.23996308260267651, "grad_norm": 0.19174017012119293, "learning_rate": 0.00018830400070436057, "loss": 0.9526, "step": 2080 }, { "epoch": 0.24053991693585602, "grad_norm": 0.1841077357530594, "learning_rate": 0.00018820932474923873, "loss": 0.9744, "step": 2085 }, { "epoch": 0.24111675126903553, "grad_norm": 0.19150149822235107, "learning_rate": 0.00018811429114797123, "loss": 0.9676, "step": 2090 }, { "epoch": 0.24169358560221504, "grad_norm": 0.20261262357234955, "learning_rate": 0.00018801890028587333, "loss": 0.9706, "step": 2095 }, { "epoch": 0.24227041993539455, "grad_norm": 0.199992835521698, "learning_rate": 0.0001879231525497089, "loss": 1.0026, "step": 2100 }, { "epoch": 0.24284725426857406, "grad_norm": 0.20790398120880127, "learning_rate": 0.0001878270483276886, "loss": 1.0086, "step": 2105 }, { "epoch": 0.24342408860175357, "grad_norm": 0.1923636943101883, "learning_rate": 0.00018773058800946858, "loss": 0.932, "step": 2110 }, { "epoch": 0.24400092293493308, "grad_norm": 0.18227846920490265, "learning_rate": 0.00018763377198614887, "loss": 0.9612, "step": 2115 }, { "epoch": 0.2445777572681126, "grad_norm": 0.18197417259216309, "learning_rate": 0.00018753660065027152, "loss": 1.0405, "step": 2120 }, { "epoch": 0.2451545916012921, "grad_norm": 0.1864314079284668, "learning_rate": 0.00018743907439581933, "loss": 0.9918, "step": 2125 }, { "epoch": 0.24573142593447161, "grad_norm": 0.18296851217746735, "learning_rate": 0.0001873411936182141, "loss": 0.9839, "step": 2130 }, { "epoch": 0.24630826026765112, "grad_norm": 0.20536720752716064, "learning_rate": 0.000187242958714315, "loss": 0.9955, "step": 2135 }, { "epoch": 0.24688509460083063, "grad_norm": 0.18991383910179138, "learning_rate": 0.00018714437008241709, "loss": 0.9564, "step": 2140 }, { "epoch": 0.24746192893401014, "grad_norm": 0.21389083564281464, "learning_rate": 0.00018704542812224956, "loss": 1.0121, "step": 2145 }, { "epoch": 0.24803876326718965, "grad_norm": 0.19424885511398315, "learning_rate": 0.00018694613323497422, "loss": 0.9919, "step": 2150 }, { "epoch": 0.24861559760036916, "grad_norm": 0.20211443305015564, "learning_rate": 0.0001868464858231838, "loss": 1.0288, "step": 2155 }, { "epoch": 0.24919243193354867, "grad_norm": 0.1808236688375473, "learning_rate": 0.0001867464862909004, "loss": 0.8799, "step": 2160 }, { "epoch": 0.24976926626672818, "grad_norm": 0.19431866705417633, "learning_rate": 0.00018664613504357366, "loss": 0.942, "step": 2165 }, { "epoch": 0.2503461005999077, "grad_norm": 0.18865206837654114, "learning_rate": 0.0001865454324880794, "loss": 0.9667, "step": 2170 }, { "epoch": 0.2509229349330872, "grad_norm": 0.1844114363193512, "learning_rate": 0.00018644437903271778, "loss": 0.9783, "step": 2175 }, { "epoch": 0.2514997692662667, "grad_norm": 0.19228576123714447, "learning_rate": 0.00018634297508721167, "loss": 0.9547, "step": 2180 }, { "epoch": 0.2520766035994462, "grad_norm": 0.19523252546787262, "learning_rate": 0.00018624122106270506, "loss": 0.944, "step": 2185 }, { "epoch": 0.25265343793262574, "grad_norm": 0.18889620900154114, "learning_rate": 0.00018613911737176125, "loss": 0.9372, "step": 2190 }, { "epoch": 0.25323027226580525, "grad_norm": 0.1964087188243866, "learning_rate": 0.0001860366644283613, "loss": 0.9885, "step": 2195 }, { "epoch": 0.25380710659898476, "grad_norm": 0.19151298701763153, "learning_rate": 0.00018593386264790243, "loss": 1.023, "step": 2200 }, { "epoch": 0.25438394093216427, "grad_norm": 0.181709885597229, "learning_rate": 0.00018583071244719607, "loss": 0.8938, "step": 2205 }, { "epoch": 0.2549607752653438, "grad_norm": 0.18949133157730103, "learning_rate": 0.0001857272142444664, "loss": 0.9712, "step": 2210 }, { "epoch": 0.2555376095985233, "grad_norm": 0.19438017904758453, "learning_rate": 0.0001856233684593486, "loss": 0.9659, "step": 2215 }, { "epoch": 0.2561144439317028, "grad_norm": 0.19530200958251953, "learning_rate": 0.00018551917551288706, "loss": 1.0126, "step": 2220 }, { "epoch": 0.2566912782648823, "grad_norm": 0.19227471947669983, "learning_rate": 0.0001854146358275338, "loss": 0.9807, "step": 2225 }, { "epoch": 0.2572681125980618, "grad_norm": 0.19348041713237762, "learning_rate": 0.00018530974982714667, "loss": 0.9755, "step": 2230 }, { "epoch": 0.2578449469312413, "grad_norm": 0.2049867808818817, "learning_rate": 0.0001852045179369877, "loss": 0.9948, "step": 2235 }, { "epoch": 0.25842178126442084, "grad_norm": 0.19374214112758636, "learning_rate": 0.0001850989405837212, "loss": 1.028, "step": 2240 }, { "epoch": 0.25899861559760035, "grad_norm": 0.18960264325141907, "learning_rate": 0.0001849930181954124, "loss": 1.0199, "step": 2245 }, { "epoch": 0.25957544993077986, "grad_norm": 0.19603078067302704, "learning_rate": 0.00018488675120152532, "loss": 0.9788, "step": 2250 }, { "epoch": 0.26015228426395937, "grad_norm": 0.19418926537036896, "learning_rate": 0.00018478014003292116, "loss": 0.9406, "step": 2255 }, { "epoch": 0.2607291185971389, "grad_norm": 0.183867946267128, "learning_rate": 0.0001846731851218567, "loss": 0.9217, "step": 2260 }, { "epoch": 0.2613059529303184, "grad_norm": 0.19260871410369873, "learning_rate": 0.00018456588690198236, "loss": 0.9618, "step": 2265 }, { "epoch": 0.2618827872634979, "grad_norm": 0.1969606876373291, "learning_rate": 0.0001844582458083405, "loss": 0.9594, "step": 2270 }, { "epoch": 0.26245962159667746, "grad_norm": 0.1877615749835968, "learning_rate": 0.0001843502622773637, "loss": 0.952, "step": 2275 }, { "epoch": 0.26303645592985697, "grad_norm": 0.18486037850379944, "learning_rate": 0.00018424193674687297, "loss": 0.9699, "step": 2280 }, { "epoch": 0.2636132902630365, "grad_norm": 0.19585862755775452, "learning_rate": 0.00018413326965607593, "loss": 0.9894, "step": 2285 }, { "epoch": 0.264190124596216, "grad_norm": 0.17970632016658783, "learning_rate": 0.00018402426144556504, "loss": 0.9834, "step": 2290 }, { "epoch": 0.2647669589293955, "grad_norm": 0.19453051686286926, "learning_rate": 0.0001839149125573159, "loss": 0.9737, "step": 2295 }, { "epoch": 0.265343793262575, "grad_norm": 0.20039866864681244, "learning_rate": 0.00018380522343468532, "loss": 1.0147, "step": 2300 }, { "epoch": 0.2659206275957545, "grad_norm": 0.1953950971364975, "learning_rate": 0.00018369519452240973, "loss": 0.9651, "step": 2305 }, { "epoch": 0.26649746192893403, "grad_norm": 0.1906932145357132, "learning_rate": 0.00018358482626660303, "loss": 0.9364, "step": 2310 }, { "epoch": 0.26707429626211354, "grad_norm": 0.1904212385416031, "learning_rate": 0.0001834741191147552, "loss": 0.9498, "step": 2315 }, { "epoch": 0.26765113059529305, "grad_norm": 0.18951915204524994, "learning_rate": 0.00018336307351573018, "loss": 1.0005, "step": 2320 }, { "epoch": 0.26822796492847256, "grad_norm": 0.19409124553203583, "learning_rate": 0.00018325168991976408, "loss": 1.0019, "step": 2325 }, { "epoch": 0.26880479926165207, "grad_norm": 0.20801284909248352, "learning_rate": 0.00018313996877846361, "loss": 0.9383, "step": 2330 }, { "epoch": 0.2693816335948316, "grad_norm": 0.19471175968647003, "learning_rate": 0.00018302791054480394, "loss": 1.0274, "step": 2335 }, { "epoch": 0.2699584679280111, "grad_norm": 0.20756390690803528, "learning_rate": 0.00018291551567312694, "loss": 0.9846, "step": 2340 }, { "epoch": 0.2705353022611906, "grad_norm": 0.1832803636789322, "learning_rate": 0.00018280278461913952, "loss": 0.9467, "step": 2345 }, { "epoch": 0.2711121365943701, "grad_norm": 0.19528664648532867, "learning_rate": 0.00018268971783991152, "loss": 0.9564, "step": 2350 }, { "epoch": 0.2716889709275496, "grad_norm": 0.18837648630142212, "learning_rate": 0.00018257631579387412, "loss": 0.9511, "step": 2355 }, { "epoch": 0.27226580526072913, "grad_norm": 0.20164744555950165, "learning_rate": 0.0001824625789408177, "loss": 0.9314, "step": 2360 }, { "epoch": 0.27284263959390864, "grad_norm": 0.1876562237739563, "learning_rate": 0.00018234850774189018, "loss": 0.9518, "step": 2365 }, { "epoch": 0.27341947392708815, "grad_norm": 0.18695729970932007, "learning_rate": 0.00018223410265959516, "loss": 0.9382, "step": 2370 }, { "epoch": 0.27399630826026766, "grad_norm": 0.19776229560375214, "learning_rate": 0.00018211936415778984, "loss": 0.9793, "step": 2375 }, { "epoch": 0.2745731425934472, "grad_norm": 0.2102830559015274, "learning_rate": 0.0001820042927016834, "loss": 0.9862, "step": 2380 }, { "epoch": 0.2751499769266267, "grad_norm": 0.19167035818099976, "learning_rate": 0.0001818888887578349, "loss": 0.9348, "step": 2385 }, { "epoch": 0.2757268112598062, "grad_norm": 0.20402806997299194, "learning_rate": 0.00018177315279415153, "loss": 0.9807, "step": 2390 }, { "epoch": 0.2763036455929857, "grad_norm": 0.19756613671779633, "learning_rate": 0.00018165708527988664, "loss": 1.0354, "step": 2395 }, { "epoch": 0.2768804799261652, "grad_norm": 0.19909250736236572, "learning_rate": 0.00018154068668563782, "loss": 1.0038, "step": 2400 }, { "epoch": 0.2774573142593447, "grad_norm": 0.19040563702583313, "learning_rate": 0.00018142395748334513, "loss": 0.9219, "step": 2405 }, { "epoch": 0.27803414859252423, "grad_norm": 0.18826067447662354, "learning_rate": 0.000181306898146289, "loss": 0.9073, "step": 2410 }, { "epoch": 0.27861098292570374, "grad_norm": 0.2101050615310669, "learning_rate": 0.00018118950914908843, "loss": 0.9463, "step": 2415 }, { "epoch": 0.27918781725888325, "grad_norm": 0.19028016924858093, "learning_rate": 0.00018107179096769901, "loss": 0.9478, "step": 2420 }, { "epoch": 0.27976465159206276, "grad_norm": 0.21243633329868317, "learning_rate": 0.00018095374407941104, "loss": 0.9734, "step": 2425 }, { "epoch": 0.2803414859252423, "grad_norm": 0.19807711243629456, "learning_rate": 0.0001808353689628475, "loss": 0.9057, "step": 2430 }, { "epoch": 0.2809183202584218, "grad_norm": 0.19900068640708923, "learning_rate": 0.0001807166660979623, "loss": 0.9656, "step": 2435 }, { "epoch": 0.2814951545916013, "grad_norm": 0.18442806601524353, "learning_rate": 0.00018059763596603814, "loss": 1.0024, "step": 2440 }, { "epoch": 0.2820719889247808, "grad_norm": 0.18683601915836334, "learning_rate": 0.0001804782790496846, "loss": 0.9286, "step": 2445 }, { "epoch": 0.2826488232579603, "grad_norm": 0.1832648068666458, "learning_rate": 0.00018035859583283626, "loss": 1.0318, "step": 2450 }, { "epoch": 0.2832256575911398, "grad_norm": 0.1928461343050003, "learning_rate": 0.00018023858680075061, "loss": 0.9739, "step": 2455 }, { "epoch": 0.28380249192431933, "grad_norm": 0.19916784763336182, "learning_rate": 0.00018011825244000632, "loss": 0.9622, "step": 2460 }, { "epoch": 0.28437932625749884, "grad_norm": 0.1981884241104126, "learning_rate": 0.00017999759323850098, "loss": 0.9899, "step": 2465 }, { "epoch": 0.28495616059067835, "grad_norm": 0.19374872744083405, "learning_rate": 0.0001798766096854493, "loss": 1.0103, "step": 2470 }, { "epoch": 0.28553299492385786, "grad_norm": 0.18874534964561462, "learning_rate": 0.00017975530227138105, "loss": 0.9592, "step": 2475 }, { "epoch": 0.2861098292570374, "grad_norm": 0.1857873797416687, "learning_rate": 0.00017963367148813913, "loss": 0.9453, "step": 2480 }, { "epoch": 0.2866866635902169, "grad_norm": 0.18749921023845673, "learning_rate": 0.0001795117178288775, "loss": 0.9826, "step": 2485 }, { "epoch": 0.2872634979233964, "grad_norm": 0.19447381794452667, "learning_rate": 0.00017938944178805933, "loss": 0.9433, "step": 2490 }, { "epoch": 0.2878403322565759, "grad_norm": 0.18583492934703827, "learning_rate": 0.00017926684386145478, "loss": 0.8993, "step": 2495 }, { "epoch": 0.2884171665897554, "grad_norm": 0.21102139353752136, "learning_rate": 0.00017914392454613913, "loss": 1.0086, "step": 2500 }, { "epoch": 0.2889940009229349, "grad_norm": 0.19170348346233368, "learning_rate": 0.00017902068434049077, "loss": 0.9852, "step": 2505 }, { "epoch": 0.28957083525611443, "grad_norm": 0.1980980634689331, "learning_rate": 0.00017889712374418912, "loss": 0.9699, "step": 2510 }, { "epoch": 0.29014766958929394, "grad_norm": 0.20153307914733887, "learning_rate": 0.00017877324325821264, "loss": 0.951, "step": 2515 }, { "epoch": 0.29072450392247345, "grad_norm": 0.1984160840511322, "learning_rate": 0.00017864904338483676, "loss": 0.9704, "step": 2520 }, { "epoch": 0.29130133825565296, "grad_norm": 0.20981407165527344, "learning_rate": 0.00017852452462763192, "loss": 0.9606, "step": 2525 }, { "epoch": 0.2918781725888325, "grad_norm": 0.1887744665145874, "learning_rate": 0.00017839968749146142, "loss": 0.9673, "step": 2530 }, { "epoch": 0.292455006922012, "grad_norm": 0.2027631551027298, "learning_rate": 0.0001782745324824795, "loss": 0.9601, "step": 2535 }, { "epoch": 0.2930318412551915, "grad_norm": 0.20790278911590576, "learning_rate": 0.00017814906010812912, "loss": 0.9639, "step": 2540 }, { "epoch": 0.293608675588371, "grad_norm": 0.1964947134256363, "learning_rate": 0.00017802327087714016, "loss": 0.9848, "step": 2545 }, { "epoch": 0.2941855099215505, "grad_norm": 0.19823837280273438, "learning_rate": 0.00017789716529952704, "loss": 1.0059, "step": 2550 }, { "epoch": 0.29476234425473, "grad_norm": 0.20603398978710175, "learning_rate": 0.00017777074388658693, "loss": 0.9444, "step": 2555 }, { "epoch": 0.29533917858790953, "grad_norm": 0.20617981255054474, "learning_rate": 0.00017764400715089744, "loss": 0.9477, "step": 2560 }, { "epoch": 0.29591601292108904, "grad_norm": 0.1940777450799942, "learning_rate": 0.0001775169556063148, "loss": 0.9033, "step": 2565 }, { "epoch": 0.29649284725426855, "grad_norm": 0.2084151804447174, "learning_rate": 0.00017738958976797157, "loss": 0.9458, "step": 2570 }, { "epoch": 0.29706968158744806, "grad_norm": 0.21188698709011078, "learning_rate": 0.00017726191015227452, "loss": 1.0301, "step": 2575 }, { "epoch": 0.2976465159206276, "grad_norm": 0.2021496593952179, "learning_rate": 0.00017713391727690284, "loss": 0.972, "step": 2580 }, { "epoch": 0.2982233502538071, "grad_norm": 0.19584889709949493, "learning_rate": 0.0001770056116608057, "loss": 0.9382, "step": 2585 }, { "epoch": 0.2988001845869866, "grad_norm": 0.1912315934896469, "learning_rate": 0.0001768769938242003, "loss": 0.9805, "step": 2590 }, { "epoch": 0.2993770189201661, "grad_norm": 0.2060055434703827, "learning_rate": 0.0001767480642885698, "loss": 1.0436, "step": 2595 }, { "epoch": 0.2999538532533456, "grad_norm": 0.19867336750030518, "learning_rate": 0.00017661882357666105, "loss": 0.9456, "step": 2600 }, { "epoch": 0.3005306875865251, "grad_norm": 0.19426298141479492, "learning_rate": 0.00017648927221248264, "loss": 1.0145, "step": 2605 }, { "epoch": 0.30110752191970463, "grad_norm": 0.2108680009841919, "learning_rate": 0.00017635941072130268, "loss": 0.9791, "step": 2610 }, { "epoch": 0.30168435625288414, "grad_norm": 0.1892360895872116, "learning_rate": 0.00017622923962964672, "loss": 1.0363, "step": 2615 }, { "epoch": 0.30226119058606365, "grad_norm": 0.1932888627052307, "learning_rate": 0.0001760987594652956, "loss": 0.9866, "step": 2620 }, { "epoch": 0.3028380249192432, "grad_norm": 0.19689255952835083, "learning_rate": 0.00017596797075728322, "loss": 0.9844, "step": 2625 }, { "epoch": 0.30341485925242273, "grad_norm": 0.18802085518836975, "learning_rate": 0.00017583687403589454, "loss": 0.9317, "step": 2630 }, { "epoch": 0.30399169358560224, "grad_norm": 0.19586369395256042, "learning_rate": 0.0001757054698326634, "loss": 0.9983, "step": 2635 }, { "epoch": 0.30456852791878175, "grad_norm": 0.19697046279907227, "learning_rate": 0.00017557375868037026, "loss": 0.9707, "step": 2640 }, { "epoch": 0.30514536225196126, "grad_norm": 0.19013574719429016, "learning_rate": 0.0001754417411130401, "loss": 0.9764, "step": 2645 }, { "epoch": 0.30572219658514077, "grad_norm": 0.2056090384721756, "learning_rate": 0.0001753094176659403, "loss": 0.961, "step": 2650 }, { "epoch": 0.3062990309183203, "grad_norm": 0.18955689668655396, "learning_rate": 0.0001751767888755785, "loss": 0.9444, "step": 2655 }, { "epoch": 0.3068758652514998, "grad_norm": 0.20427672564983368, "learning_rate": 0.00017504385527970028, "loss": 1.0496, "step": 2660 }, { "epoch": 0.3074526995846793, "grad_norm": 0.20869238674640656, "learning_rate": 0.00017491061741728702, "loss": 0.9981, "step": 2665 }, { "epoch": 0.3080295339178588, "grad_norm": 0.2011020928621292, "learning_rate": 0.00017477707582855384, "loss": 0.9737, "step": 2670 }, { "epoch": 0.3086063682510383, "grad_norm": 0.20109429955482483, "learning_rate": 0.00017464323105494727, "loss": 0.9614, "step": 2675 }, { "epoch": 0.30918320258421783, "grad_norm": 0.1839284896850586, "learning_rate": 0.00017450908363914316, "loss": 1.0152, "step": 2680 }, { "epoch": 0.30976003691739734, "grad_norm": 0.24291379749774933, "learning_rate": 0.00017437463412504437, "loss": 0.9756, "step": 2685 }, { "epoch": 0.31033687125057685, "grad_norm": 0.20243053138256073, "learning_rate": 0.00017423988305777864, "loss": 0.9743, "step": 2690 }, { "epoch": 0.31091370558375636, "grad_norm": 0.20824618637561798, "learning_rate": 0.0001741048309836964, "loss": 0.9752, "step": 2695 }, { "epoch": 0.31149053991693587, "grad_norm": 0.19603955745697021, "learning_rate": 0.00017396947845036844, "loss": 0.9745, "step": 2700 }, { "epoch": 0.3120673742501154, "grad_norm": 0.2077784240245819, "learning_rate": 0.00017383382600658388, "loss": 0.9846, "step": 2705 }, { "epoch": 0.3126442085832949, "grad_norm": 0.21430762112140656, "learning_rate": 0.0001736978742023477, "loss": 1.0096, "step": 2710 }, { "epoch": 0.3132210429164744, "grad_norm": 0.18606549501419067, "learning_rate": 0.00017356162358887875, "loss": 0.9688, "step": 2715 }, { "epoch": 0.3137978772496539, "grad_norm": 0.1990072876214981, "learning_rate": 0.00017342507471860733, "loss": 0.9299, "step": 2720 }, { "epoch": 0.3143747115828334, "grad_norm": 0.191040500998497, "learning_rate": 0.0001732882281451731, "loss": 0.9946, "step": 2725 }, { "epoch": 0.31495154591601293, "grad_norm": 0.21565918624401093, "learning_rate": 0.0001731510844234227, "loss": 0.9862, "step": 2730 }, { "epoch": 0.31552838024919244, "grad_norm": 0.19910766184329987, "learning_rate": 0.0001730136441094076, "loss": 0.9582, "step": 2735 }, { "epoch": 0.31610521458237195, "grad_norm": 0.19215498864650726, "learning_rate": 0.00017287590776038177, "loss": 0.9837, "step": 2740 }, { "epoch": 0.31668204891555146, "grad_norm": 0.19923163950443268, "learning_rate": 0.0001727378759347995, "loss": 0.9536, "step": 2745 }, { "epoch": 0.31725888324873097, "grad_norm": 0.20635759830474854, "learning_rate": 0.0001725995491923131, "loss": 0.9745, "step": 2750 }, { "epoch": 0.3178357175819105, "grad_norm": 0.1964532434940338, "learning_rate": 0.00017246092809377058, "loss": 1.0143, "step": 2755 }, { "epoch": 0.31841255191509, "grad_norm": 0.21438036859035492, "learning_rate": 0.0001723220132012134, "loss": 0.9954, "step": 2760 }, { "epoch": 0.3189893862482695, "grad_norm": 0.2038785070180893, "learning_rate": 0.00017218280507787435, "loss": 1.0069, "step": 2765 }, { "epoch": 0.319566220581449, "grad_norm": 0.19554126262664795, "learning_rate": 0.00017204330428817496, "loss": 0.933, "step": 2770 }, { "epoch": 0.3201430549146285, "grad_norm": 0.20744913816452026, "learning_rate": 0.00017190351139772348, "loss": 0.9584, "step": 2775 }, { "epoch": 0.32071988924780803, "grad_norm": 0.20495720207691193, "learning_rate": 0.00017176342697331246, "loss": 0.9706, "step": 2780 }, { "epoch": 0.32129672358098754, "grad_norm": 0.20170055329799652, "learning_rate": 0.00017162305158291655, "loss": 0.9542, "step": 2785 }, { "epoch": 0.32187355791416705, "grad_norm": 0.18749262392520905, "learning_rate": 0.00017148238579568995, "loss": 0.8963, "step": 2790 }, { "epoch": 0.32245039224734656, "grad_norm": 0.2015407681465149, "learning_rate": 0.00017134143018196447, "loss": 0.9572, "step": 2795 }, { "epoch": 0.3230272265805261, "grad_norm": 0.197077214717865, "learning_rate": 0.00017120018531324689, "loss": 1.0026, "step": 2800 }, { "epoch": 0.3236040609137056, "grad_norm": 0.2059078812599182, "learning_rate": 0.00017105865176221684, "loss": 0.9523, "step": 2805 }, { "epoch": 0.3241808952468851, "grad_norm": 0.19224663078784943, "learning_rate": 0.00017091683010272447, "loss": 0.9887, "step": 2810 }, { "epoch": 0.3247577295800646, "grad_norm": 0.20333868265151978, "learning_rate": 0.00017077472090978798, "loss": 0.9438, "step": 2815 }, { "epoch": 0.3253345639132441, "grad_norm": 0.2038094699382782, "learning_rate": 0.00017063232475959133, "loss": 0.9884, "step": 2820 }, { "epoch": 0.3259113982464236, "grad_norm": 0.19815954566001892, "learning_rate": 0.00017048964222948217, "loss": 0.9506, "step": 2825 }, { "epoch": 0.32648823257960313, "grad_norm": 0.19894501566886902, "learning_rate": 0.00017034667389796904, "loss": 0.9787, "step": 2830 }, { "epoch": 0.32706506691278264, "grad_norm": 0.19535242021083832, "learning_rate": 0.00017020342034471944, "loss": 0.9805, "step": 2835 }, { "epoch": 0.32764190124596215, "grad_norm": 0.2212100327014923, "learning_rate": 0.00017005988215055718, "loss": 0.9826, "step": 2840 }, { "epoch": 0.32821873557914166, "grad_norm": 0.20346590876579285, "learning_rate": 0.00016991605989746025, "loss": 0.9496, "step": 2845 }, { "epoch": 0.3287955699123212, "grad_norm": 0.2145700603723526, "learning_rate": 0.00016977195416855828, "loss": 0.977, "step": 2850 }, { "epoch": 0.3293724042455007, "grad_norm": 0.18952029943466187, "learning_rate": 0.00016962756554813037, "loss": 0.8824, "step": 2855 }, { "epoch": 0.3299492385786802, "grad_norm": 0.2040802389383316, "learning_rate": 0.0001694828946216025, "loss": 0.9683, "step": 2860 }, { "epoch": 0.3305260729118597, "grad_norm": 0.19531820714473724, "learning_rate": 0.00016933794197554524, "loss": 1.005, "step": 2865 }, { "epoch": 0.3311029072450392, "grad_norm": 0.20256836712360382, "learning_rate": 0.00016919270819767152, "loss": 0.9877, "step": 2870 }, { "epoch": 0.3316797415782187, "grad_norm": 0.20027059316635132, "learning_rate": 0.00016904719387683407, "loss": 0.9758, "step": 2875 }, { "epoch": 0.33225657591139823, "grad_norm": 0.1954347789287567, "learning_rate": 0.00016890139960302304, "loss": 0.9962, "step": 2880 }, { "epoch": 0.33283341024457774, "grad_norm": 0.20312942564487457, "learning_rate": 0.00016875532596736373, "loss": 0.986, "step": 2885 }, { "epoch": 0.33341024457775725, "grad_norm": 0.20359157025814056, "learning_rate": 0.00016860897356211403, "loss": 0.9411, "step": 2890 }, { "epoch": 0.33398707891093676, "grad_norm": 0.19114482402801514, "learning_rate": 0.00016846234298066218, "loss": 0.9789, "step": 2895 }, { "epoch": 0.3345639132441163, "grad_norm": 0.19829359650611877, "learning_rate": 0.0001683154348175243, "loss": 0.9444, "step": 2900 }, { "epoch": 0.3351407475772958, "grad_norm": 0.18730616569519043, "learning_rate": 0.00016816824966834183, "loss": 0.9306, "step": 2905 }, { "epoch": 0.3357175819104753, "grad_norm": 0.2252754271030426, "learning_rate": 0.00016802078812987948, "loss": 0.9732, "step": 2910 }, { "epoch": 0.3362944162436548, "grad_norm": 0.20403608679771423, "learning_rate": 0.0001678730508000224, "loss": 0.9538, "step": 2915 }, { "epoch": 0.3368712505768343, "grad_norm": 0.23965264856815338, "learning_rate": 0.00016772503827777396, "loss": 0.9988, "step": 2920 }, { "epoch": 0.3374480849100138, "grad_norm": 0.20559944212436676, "learning_rate": 0.00016757675116325343, "loss": 0.9697, "step": 2925 }, { "epoch": 0.33802491924319333, "grad_norm": 0.19775480031967163, "learning_rate": 0.0001674281900576933, "loss": 0.9915, "step": 2930 }, { "epoch": 0.33860175357637284, "grad_norm": 0.20038728415966034, "learning_rate": 0.00016727935556343698, "loss": 0.9976, "step": 2935 }, { "epoch": 0.33917858790955235, "grad_norm": 0.20284847915172577, "learning_rate": 0.0001671302482839364, "loss": 1.0131, "step": 2940 }, { "epoch": 0.33975542224273186, "grad_norm": 0.18095138669013977, "learning_rate": 0.00016698086882374939, "loss": 0.9535, "step": 2945 }, { "epoch": 0.3403322565759114, "grad_norm": 0.18949155509471893, "learning_rate": 0.00016683121778853746, "loss": 0.9928, "step": 2950 }, { "epoch": 0.3409090909090909, "grad_norm": 0.18761120736598969, "learning_rate": 0.00016668129578506315, "loss": 0.968, "step": 2955 }, { "epoch": 0.3414859252422704, "grad_norm": 0.19716186821460724, "learning_rate": 0.00016653110342118764, "loss": 1.0469, "step": 2960 }, { "epoch": 0.3420627595754499, "grad_norm": 0.19194428622722626, "learning_rate": 0.0001663806413058684, "loss": 0.9824, "step": 2965 }, { "epoch": 0.3426395939086294, "grad_norm": 0.19304610788822174, "learning_rate": 0.00016622991004915645, "loss": 0.9739, "step": 2970 }, { "epoch": 0.343216428241809, "grad_norm": 0.21132609248161316, "learning_rate": 0.00016607891026219418, "loss": 1.0423, "step": 2975 }, { "epoch": 0.3437932625749885, "grad_norm": 0.21153788268566132, "learning_rate": 0.00016592764255721264, "loss": 0.9585, "step": 2980 }, { "epoch": 0.344370096908168, "grad_norm": 0.19157880544662476, "learning_rate": 0.00016577610754752925, "loss": 0.93, "step": 2985 }, { "epoch": 0.3449469312413475, "grad_norm": 0.2042781114578247, "learning_rate": 0.00016562430584754516, "loss": 0.9758, "step": 2990 }, { "epoch": 0.345523765574527, "grad_norm": 0.1973501592874527, "learning_rate": 0.00016547223807274287, "loss": 0.9822, "step": 2995 }, { "epoch": 0.34610059990770653, "grad_norm": 0.20457808673381805, "learning_rate": 0.00016531990483968357, "loss": 0.9275, "step": 3000 }, { "epoch": 0.34667743424088604, "grad_norm": 0.19708305597305298, "learning_rate": 0.00016516730676600493, "loss": 0.9457, "step": 3005 }, { "epoch": 0.34725426857406555, "grad_norm": 0.1883085072040558, "learning_rate": 0.00016501444447041824, "loss": 0.9429, "step": 3010 }, { "epoch": 0.34783110290724506, "grad_norm": 0.20550213754177094, "learning_rate": 0.00016486131857270628, "loss": 0.9335, "step": 3015 }, { "epoch": 0.34840793724042457, "grad_norm": 0.20632968842983246, "learning_rate": 0.00016470792969372039, "loss": 0.9335, "step": 3020 }, { "epoch": 0.3489847715736041, "grad_norm": 0.20688195526599884, "learning_rate": 0.00016455427845537835, "loss": 0.9474, "step": 3025 }, { "epoch": 0.3495616059067836, "grad_norm": 0.19943097233772278, "learning_rate": 0.0001644003654806616, "loss": 1.0254, "step": 3030 }, { "epoch": 0.3501384402399631, "grad_norm": 0.2087412327528, "learning_rate": 0.00016424619139361282, "loss": 1.0255, "step": 3035 }, { "epoch": 0.3507152745731426, "grad_norm": 0.1902855485677719, "learning_rate": 0.00016409175681933328, "loss": 0.9629, "step": 3040 }, { "epoch": 0.3512921089063221, "grad_norm": 0.19096340239048004, "learning_rate": 0.00016393706238398056, "loss": 0.9426, "step": 3045 }, { "epoch": 0.35186894323950163, "grad_norm": 0.20610526204109192, "learning_rate": 0.00016378210871476577, "loss": 0.8949, "step": 3050 }, { "epoch": 0.35244577757268114, "grad_norm": 0.20180396735668182, "learning_rate": 0.00016362689643995105, "loss": 0.9682, "step": 3055 }, { "epoch": 0.35302261190586065, "grad_norm": 0.2666982412338257, "learning_rate": 0.00016347142618884712, "loss": 0.999, "step": 3060 }, { "epoch": 0.35359944623904016, "grad_norm": 0.20752312242984772, "learning_rate": 0.00016331569859181062, "loss": 0.962, "step": 3065 }, { "epoch": 0.35417628057221967, "grad_norm": 0.21684733033180237, "learning_rate": 0.00016315971428024168, "loss": 0.9512, "step": 3070 }, { "epoch": 0.3547531149053992, "grad_norm": 0.20133435726165771, "learning_rate": 0.0001630034738865812, "loss": 0.9224, "step": 3075 }, { "epoch": 0.3553299492385787, "grad_norm": 0.1901036500930786, "learning_rate": 0.00016284697804430843, "loss": 0.928, "step": 3080 }, { "epoch": 0.3559067835717582, "grad_norm": 0.19342263042926788, "learning_rate": 0.00016269022738793832, "loss": 0.9801, "step": 3085 }, { "epoch": 0.3564836179049377, "grad_norm": 0.18987055122852325, "learning_rate": 0.00016253322255301887, "loss": 0.9403, "step": 3090 }, { "epoch": 0.3570604522381172, "grad_norm": 0.21176302433013916, "learning_rate": 0.0001623759641761289, "loss": 0.9854, "step": 3095 }, { "epoch": 0.35763728657129673, "grad_norm": 0.198608860373497, "learning_rate": 0.00016221845289487492, "loss": 0.9506, "step": 3100 }, { "epoch": 0.35821412090447624, "grad_norm": 0.1978042870759964, "learning_rate": 0.00016206068934788905, "loss": 0.9745, "step": 3105 }, { "epoch": 0.35879095523765575, "grad_norm": 0.20335493981838226, "learning_rate": 0.0001619026741748262, "loss": 0.9575, "step": 3110 }, { "epoch": 0.35936778957083526, "grad_norm": 0.2059904783964157, "learning_rate": 0.00016174440801636138, "loss": 0.9237, "step": 3115 }, { "epoch": 0.35994462390401477, "grad_norm": 0.20635630190372467, "learning_rate": 0.0001615858915141874, "loss": 0.9222, "step": 3120 }, { "epoch": 0.3605214582371943, "grad_norm": 0.2041216492652893, "learning_rate": 0.00016142712531101196, "loss": 0.9432, "step": 3125 }, { "epoch": 0.3610982925703738, "grad_norm": 0.2060927152633667, "learning_rate": 0.0001612681100505552, "loss": 0.9993, "step": 3130 }, { "epoch": 0.3616751269035533, "grad_norm": 0.21824884414672852, "learning_rate": 0.00016110884637754713, "loss": 0.9735, "step": 3135 }, { "epoch": 0.3622519612367328, "grad_norm": 0.20615214109420776, "learning_rate": 0.00016094933493772487, "loss": 1.022, "step": 3140 }, { "epoch": 0.3628287955699123, "grad_norm": 0.19122281670570374, "learning_rate": 0.00016078957637783017, "loss": 0.9451, "step": 3145 }, { "epoch": 0.36340562990309183, "grad_norm": 0.20327936112880707, "learning_rate": 0.00016062957134560675, "loss": 0.939, "step": 3150 }, { "epoch": 0.36398246423627134, "grad_norm": 0.18740370869636536, "learning_rate": 0.0001604693204897975, "loss": 0.9801, "step": 3155 }, { "epoch": 0.36455929856945085, "grad_norm": 0.21493248641490936, "learning_rate": 0.00016030882446014234, "loss": 1.0288, "step": 3160 }, { "epoch": 0.36513613290263036, "grad_norm": 0.21232721209526062, "learning_rate": 0.00016014808390737485, "loss": 0.9975, "step": 3165 }, { "epoch": 0.36571296723580987, "grad_norm": 0.1946122944355011, "learning_rate": 0.00015998709948322027, "loss": 0.9658, "step": 3170 }, { "epoch": 0.3662898015689894, "grad_norm": 0.18985234200954437, "learning_rate": 0.00015982587184039263, "loss": 0.9608, "step": 3175 }, { "epoch": 0.3668666359021689, "grad_norm": 0.21031589806079865, "learning_rate": 0.00015966440163259202, "loss": 0.9316, "step": 3180 }, { "epoch": 0.3674434702353484, "grad_norm": 0.19520461559295654, "learning_rate": 0.00015950268951450198, "loss": 0.9502, "step": 3185 }, { "epoch": 0.3680203045685279, "grad_norm": 0.20601095259189606, "learning_rate": 0.00015934073614178696, "loss": 0.9511, "step": 3190 }, { "epoch": 0.3685971389017074, "grad_norm": 0.19720254838466644, "learning_rate": 0.00015917854217108954, "loss": 0.9694, "step": 3195 }, { "epoch": 0.36917397323488693, "grad_norm": 0.19519665837287903, "learning_rate": 0.00015901610826002787, "loss": 0.9564, "step": 3200 }, { "epoch": 0.36975080756806644, "grad_norm": 0.21351169049739838, "learning_rate": 0.0001588534350671928, "loss": 0.9541, "step": 3205 }, { "epoch": 0.37032764190124595, "grad_norm": 0.214279443025589, "learning_rate": 0.00015869052325214554, "loss": 0.9811, "step": 3210 }, { "epoch": 0.37090447623442546, "grad_norm": 0.2172657698392868, "learning_rate": 0.00015852737347541465, "loss": 0.9756, "step": 3215 }, { "epoch": 0.37148131056760497, "grad_norm": 0.1879701018333435, "learning_rate": 0.00015836398639849355, "loss": 0.9628, "step": 3220 }, { "epoch": 0.3720581449007845, "grad_norm": 0.2050919383764267, "learning_rate": 0.00015820036268383785, "loss": 0.9715, "step": 3225 }, { "epoch": 0.372634979233964, "grad_norm": 0.18730495870113373, "learning_rate": 0.00015803650299486252, "loss": 0.9706, "step": 3230 }, { "epoch": 0.3732118135671435, "grad_norm": 0.19974547624588013, "learning_rate": 0.00015787240799593937, "loss": 0.9911, "step": 3235 }, { "epoch": 0.373788647900323, "grad_norm": 0.1979796588420868, "learning_rate": 0.00015770807835239424, "loss": 0.9154, "step": 3240 }, { "epoch": 0.3743654822335025, "grad_norm": 0.2047133445739746, "learning_rate": 0.00015754351473050435, "loss": 0.9593, "step": 3245 }, { "epoch": 0.37494231656668203, "grad_norm": 0.19649125635623932, "learning_rate": 0.0001573787177974956, "loss": 0.9831, "step": 3250 }, { "epoch": 0.37551915089986154, "grad_norm": 0.2003108412027359, "learning_rate": 0.00015721368822153986, "loss": 0.876, "step": 3255 }, { "epoch": 0.37609598523304105, "grad_norm": 0.21534393727779388, "learning_rate": 0.0001570484266717522, "loss": 1.0213, "step": 3260 }, { "epoch": 0.37667281956622056, "grad_norm": 0.19399525225162506, "learning_rate": 0.00015688293381818823, "loss": 0.9159, "step": 3265 }, { "epoch": 0.3772496538994001, "grad_norm": 0.19937078654766083, "learning_rate": 0.0001567172103318415, "loss": 0.9598, "step": 3270 }, { "epoch": 0.3778264882325796, "grad_norm": 0.1996837705373764, "learning_rate": 0.00015655125688464062, "loss": 0.9606, "step": 3275 }, { "epoch": 0.3784033225657591, "grad_norm": 0.2030685842037201, "learning_rate": 0.00015638507414944642, "loss": 0.9623, "step": 3280 }, { "epoch": 0.3789801568989386, "grad_norm": 0.20788177847862244, "learning_rate": 0.0001562186628000496, "loss": 1.0118, "step": 3285 }, { "epoch": 0.3795569912321181, "grad_norm": 0.19508282840251923, "learning_rate": 0.00015605202351116765, "loss": 0.9457, "step": 3290 }, { "epoch": 0.3801338255652976, "grad_norm": 0.19498229026794434, "learning_rate": 0.00015588515695844234, "loss": 0.9243, "step": 3295 }, { "epoch": 0.38071065989847713, "grad_norm": 0.2012222409248352, "learning_rate": 0.00015571806381843676, "loss": 0.9631, "step": 3300 }, { "epoch": 0.38128749423165664, "grad_norm": 0.1890476495027542, "learning_rate": 0.00015555074476863282, "loss": 0.931, "step": 3305 }, { "epoch": 0.38186432856483615, "grad_norm": 0.19449810683727264, "learning_rate": 0.00015538320048742835, "loss": 0.945, "step": 3310 }, { "epoch": 0.38244116289801566, "grad_norm": 0.19659358263015747, "learning_rate": 0.00015521543165413428, "loss": 0.9052, "step": 3315 }, { "epoch": 0.3830179972311952, "grad_norm": 0.1930277943611145, "learning_rate": 0.00015504743894897218, "loss": 0.9379, "step": 3320 }, { "epoch": 0.38359483156437474, "grad_norm": 0.1968124955892563, "learning_rate": 0.00015487922305307118, "loss": 0.9839, "step": 3325 }, { "epoch": 0.38417166589755425, "grad_norm": 0.20579907298088074, "learning_rate": 0.0001547107846484653, "loss": 0.9653, "step": 3330 }, { "epoch": 0.38474850023073376, "grad_norm": 0.193470761179924, "learning_rate": 0.00015454212441809095, "loss": 1.0168, "step": 3335 }, { "epoch": 0.38532533456391327, "grad_norm": 0.20029513537883759, "learning_rate": 0.00015437324304578363, "loss": 1.0071, "step": 3340 }, { "epoch": 0.3859021688970928, "grad_norm": 0.19662067294120789, "learning_rate": 0.00015420414121627575, "loss": 0.9741, "step": 3345 }, { "epoch": 0.3864790032302723, "grad_norm": 0.21813516318798065, "learning_rate": 0.00015403481961519334, "loss": 1.012, "step": 3350 }, { "epoch": 0.3870558375634518, "grad_norm": 0.19463011622428894, "learning_rate": 0.00015386527892905365, "loss": 0.9786, "step": 3355 }, { "epoch": 0.3876326718966313, "grad_norm": 0.19774432480335236, "learning_rate": 0.0001536955198452621, "loss": 0.9776, "step": 3360 }, { "epoch": 0.3882095062298108, "grad_norm": 0.2053624838590622, "learning_rate": 0.0001535255430521097, "loss": 0.9645, "step": 3365 }, { "epoch": 0.38878634056299033, "grad_norm": 0.20018287003040314, "learning_rate": 0.00015335534923877013, "loss": 0.9489, "step": 3370 }, { "epoch": 0.38936317489616984, "grad_norm": 0.1886557936668396, "learning_rate": 0.000153184939095297, "loss": 0.9604, "step": 3375 }, { "epoch": 0.38994000922934935, "grad_norm": 0.19467659294605255, "learning_rate": 0.00015301431331262095, "loss": 0.9485, "step": 3380 }, { "epoch": 0.39051684356252886, "grad_norm": 0.20318065583705902, "learning_rate": 0.00015284347258254704, "loss": 0.9876, "step": 3385 }, { "epoch": 0.39109367789570837, "grad_norm": 0.1873011291027069, "learning_rate": 0.0001526724175977518, "loss": 0.9538, "step": 3390 }, { "epoch": 0.3916705122288879, "grad_norm": 0.19449511170387268, "learning_rate": 0.0001525011490517805, "loss": 0.9369, "step": 3395 }, { "epoch": 0.3922473465620674, "grad_norm": 0.19777143001556396, "learning_rate": 0.00015232966763904416, "loss": 0.9926, "step": 3400 }, { "epoch": 0.3928241808952469, "grad_norm": 0.20093326270580292, "learning_rate": 0.00015215797405481704, "loss": 0.9737, "step": 3405 }, { "epoch": 0.3934010152284264, "grad_norm": 0.20396889746189117, "learning_rate": 0.00015198606899523352, "loss": 0.9654, "step": 3410 }, { "epoch": 0.3939778495616059, "grad_norm": 0.20018213987350464, "learning_rate": 0.00015181395315728554, "loss": 0.9835, "step": 3415 }, { "epoch": 0.39455468389478543, "grad_norm": 0.18661239743232727, "learning_rate": 0.00015164162723881947, "loss": 0.9946, "step": 3420 }, { "epoch": 0.39513151822796494, "grad_norm": 0.20598599314689636, "learning_rate": 0.00015146909193853363, "loss": 0.9495, "step": 3425 }, { "epoch": 0.39570835256114445, "grad_norm": 0.2064884752035141, "learning_rate": 0.0001512963479559752, "loss": 0.9524, "step": 3430 }, { "epoch": 0.39628518689432396, "grad_norm": 0.19914241135120392, "learning_rate": 0.00015112339599153746, "loss": 0.9643, "step": 3435 }, { "epoch": 0.39686202122750347, "grad_norm": 0.20408232510089874, "learning_rate": 0.00015095023674645698, "loss": 0.9757, "step": 3440 }, { "epoch": 0.397438855560683, "grad_norm": 0.1998940259218216, "learning_rate": 0.00015077687092281074, "loss": 0.9318, "step": 3445 }, { "epoch": 0.3980156898938625, "grad_norm": 0.20163275301456451, "learning_rate": 0.00015060329922351326, "loss": 0.9465, "step": 3450 }, { "epoch": 0.398592524227042, "grad_norm": 0.20353001356124878, "learning_rate": 0.0001504295223523139, "loss": 0.9631, "step": 3455 }, { "epoch": 0.3991693585602215, "grad_norm": 0.20348602533340454, "learning_rate": 0.00015025554101379379, "loss": 0.9685, "step": 3460 }, { "epoch": 0.399746192893401, "grad_norm": 0.19552728533744812, "learning_rate": 0.0001500813559133631, "loss": 0.9729, "step": 3465 }, { "epoch": 0.40032302722658053, "grad_norm": 0.19121238589286804, "learning_rate": 0.00014990696775725812, "loss": 0.9514, "step": 3470 }, { "epoch": 0.40089986155976004, "grad_norm": 0.18907782435417175, "learning_rate": 0.0001497323772525385, "loss": 0.9234, "step": 3475 }, { "epoch": 0.40147669589293955, "grad_norm": 0.19860269129276276, "learning_rate": 0.00014955758510708434, "loss": 0.9192, "step": 3480 }, { "epoch": 0.40205353022611906, "grad_norm": 0.19196364283561707, "learning_rate": 0.00014938259202959317, "loss": 0.9216, "step": 3485 }, { "epoch": 0.40263036455929857, "grad_norm": 0.20269255340099335, "learning_rate": 0.00014920739872957732, "loss": 0.9733, "step": 3490 }, { "epoch": 0.4032071988924781, "grad_norm": 0.20808875560760498, "learning_rate": 0.00014903200591736087, "loss": 0.9984, "step": 3495 }, { "epoch": 0.4037840332256576, "grad_norm": 0.20577386021614075, "learning_rate": 0.00014885641430407686, "loss": 0.9771, "step": 3500 }, { "epoch": 0.4043608675588371, "grad_norm": 0.20498362183570862, "learning_rate": 0.0001486806246016643, "loss": 0.9883, "step": 3505 }, { "epoch": 0.4049377018920166, "grad_norm": 0.1883656233549118, "learning_rate": 0.00014850463752286543, "loss": 0.9662, "step": 3510 }, { "epoch": 0.4055145362251961, "grad_norm": 0.18973584473133087, "learning_rate": 0.00014832845378122276, "loss": 0.9403, "step": 3515 }, { "epoch": 0.40609137055837563, "grad_norm": 0.20748434960842133, "learning_rate": 0.00014815207409107608, "loss": 0.945, "step": 3520 }, { "epoch": 0.40666820489155514, "grad_norm": 0.19455088675022125, "learning_rate": 0.00014797549916755975, "loss": 0.9646, "step": 3525 }, { "epoch": 0.40724503922473465, "grad_norm": 0.20486250519752502, "learning_rate": 0.0001477987297265997, "loss": 0.9901, "step": 3530 }, { "epoch": 0.40782187355791416, "grad_norm": 0.20417264103889465, "learning_rate": 0.0001476217664849105, "loss": 0.9385, "step": 3535 }, { "epoch": 0.40839870789109367, "grad_norm": 0.19334882497787476, "learning_rate": 0.00014744461015999248, "loss": 1.0049, "step": 3540 }, { "epoch": 0.4089755422242732, "grad_norm": 0.18688935041427612, "learning_rate": 0.00014726726147012889, "loss": 0.9574, "step": 3545 }, { "epoch": 0.4095523765574527, "grad_norm": 0.1947825849056244, "learning_rate": 0.00014708972113438285, "loss": 0.957, "step": 3550 }, { "epoch": 0.4101292108906322, "grad_norm": 0.20580242574214935, "learning_rate": 0.00014691198987259454, "loss": 0.9648, "step": 3555 }, { "epoch": 0.4107060452238117, "grad_norm": 0.1960141509771347, "learning_rate": 0.00014673406840537824, "loss": 0.9558, "step": 3560 }, { "epoch": 0.4112828795569912, "grad_norm": 0.188209667801857, "learning_rate": 0.00014655595745411955, "loss": 0.9761, "step": 3565 }, { "epoch": 0.41185971389017073, "grad_norm": 0.20472858846187592, "learning_rate": 0.00014637765774097206, "loss": 0.9597, "step": 3570 }, { "epoch": 0.41243654822335024, "grad_norm": 0.20903244614601135, "learning_rate": 0.000146199169988855, "loss": 0.9749, "step": 3575 }, { "epoch": 0.41301338255652975, "grad_norm": 0.19724762439727783, "learning_rate": 0.00014602049492144984, "loss": 0.9621, "step": 3580 }, { "epoch": 0.41359021688970926, "grad_norm": 0.190316304564476, "learning_rate": 0.00014584163326319754, "loss": 0.9804, "step": 3585 }, { "epoch": 0.41416705122288877, "grad_norm": 0.18492008745670319, "learning_rate": 0.00014566258573929557, "loss": 0.9791, "step": 3590 }, { "epoch": 0.4147438855560683, "grad_norm": 0.19851504266262054, "learning_rate": 0.0001454833530756951, "loss": 0.9497, "step": 3595 }, { "epoch": 0.4153207198892478, "grad_norm": 0.20571914315223694, "learning_rate": 0.0001453039359990979, "loss": 1.0079, "step": 3600 }, { "epoch": 0.4158975542224273, "grad_norm": 0.22042030096054077, "learning_rate": 0.00014512433523695332, "loss": 0.9887, "step": 3605 }, { "epoch": 0.4164743885556068, "grad_norm": 0.1839991807937622, "learning_rate": 0.0001449445515174557, "loss": 0.9404, "step": 3610 }, { "epoch": 0.4170512228887863, "grad_norm": 0.19041991233825684, "learning_rate": 0.000144764585569541, "loss": 0.9374, "step": 3615 }, { "epoch": 0.41762805722196583, "grad_norm": 0.19904352724552155, "learning_rate": 0.00014458443812288415, "loss": 0.9219, "step": 3620 }, { "epoch": 0.41820489155514534, "grad_norm": 0.19090399146080017, "learning_rate": 0.00014440410990789582, "loss": 0.9379, "step": 3625 }, { "epoch": 0.41878172588832485, "grad_norm": 0.18457092344760895, "learning_rate": 0.00014422360165571976, "loss": 0.9966, "step": 3630 }, { "epoch": 0.41935856022150436, "grad_norm": 0.20009112358093262, "learning_rate": 0.0001440429140982296, "loss": 0.9714, "step": 3635 }, { "epoch": 0.41993539455468387, "grad_norm": 0.19650514423847198, "learning_rate": 0.000143862047968026, "loss": 0.9502, "step": 3640 }, { "epoch": 0.4205122288878634, "grad_norm": 0.20459064841270447, "learning_rate": 0.00014368100399843366, "loss": 0.9208, "step": 3645 }, { "epoch": 0.4210890632210429, "grad_norm": 0.20869080722332, "learning_rate": 0.00014349978292349825, "loss": 0.9478, "step": 3650 }, { "epoch": 0.4216658975542224, "grad_norm": 0.21228952705860138, "learning_rate": 0.0001433183854779836, "loss": 0.957, "step": 3655 }, { "epoch": 0.4222427318874019, "grad_norm": 0.19315268099308014, "learning_rate": 0.00014313681239736865, "loss": 0.973, "step": 3660 }, { "epoch": 0.4228195662205814, "grad_norm": 0.19466190040111542, "learning_rate": 0.00014295506441784435, "loss": 1.0031, "step": 3665 }, { "epoch": 0.423396400553761, "grad_norm": 0.1872572898864746, "learning_rate": 0.00014277314227631086, "loss": 0.9398, "step": 3670 }, { "epoch": 0.4239732348869405, "grad_norm": 0.18912255764007568, "learning_rate": 0.00014259104671037452, "loss": 0.9261, "step": 3675 }, { "epoch": 0.42455006922012, "grad_norm": 0.18942441046237946, "learning_rate": 0.00014240877845834472, "loss": 0.9511, "step": 3680 }, { "epoch": 0.4251269035532995, "grad_norm": 0.2267604023218155, "learning_rate": 0.00014222633825923108, "loss": 0.9919, "step": 3685 }, { "epoch": 0.42570373788647903, "grad_norm": 0.19375835359096527, "learning_rate": 0.00014204372685274039, "loss": 0.9862, "step": 3690 }, { "epoch": 0.42628057221965854, "grad_norm": 0.19178146123886108, "learning_rate": 0.00014186094497927352, "loss": 0.9777, "step": 3695 }, { "epoch": 0.42685740655283805, "grad_norm": 0.198550745844841, "learning_rate": 0.00014167799337992258, "loss": 0.936, "step": 3700 }, { "epoch": 0.42743424088601756, "grad_norm": 0.2064967155456543, "learning_rate": 0.00014149487279646781, "loss": 0.9877, "step": 3705 }, { "epoch": 0.42801107521919707, "grad_norm": 0.20294541120529175, "learning_rate": 0.00014131158397137462, "loss": 0.9306, "step": 3710 }, { "epoch": 0.4285879095523766, "grad_norm": 0.21081644296646118, "learning_rate": 0.00014112812764779053, "loss": 0.9482, "step": 3715 }, { "epoch": 0.4291647438855561, "grad_norm": 0.20666174590587616, "learning_rate": 0.00014094450456954218, "loss": 0.9848, "step": 3720 }, { "epoch": 0.4297415782187356, "grad_norm": 0.19854900240898132, "learning_rate": 0.00014076071548113238, "loss": 0.9593, "step": 3725 }, { "epoch": 0.4303184125519151, "grad_norm": 0.2024005800485611, "learning_rate": 0.0001405767611277369, "loss": 0.9753, "step": 3730 }, { "epoch": 0.4308952468850946, "grad_norm": 0.19519449770450592, "learning_rate": 0.00014039264225520175, "loss": 0.9844, "step": 3735 }, { "epoch": 0.43147208121827413, "grad_norm": 0.19514302909374237, "learning_rate": 0.0001402083596100399, "loss": 0.991, "step": 3740 }, { "epoch": 0.43204891555145364, "grad_norm": 0.1931222528219223, "learning_rate": 0.00014002391393942826, "loss": 0.9693, "step": 3745 }, { "epoch": 0.43262574988463315, "grad_norm": 0.203064426779747, "learning_rate": 0.00013983930599120487, "loss": 0.9822, "step": 3750 }, { "epoch": 0.43320258421781266, "grad_norm": 0.19102463126182556, "learning_rate": 0.0001396545365138657, "loss": 0.9643, "step": 3755 }, { "epoch": 0.43377941855099217, "grad_norm": 0.18826338648796082, "learning_rate": 0.00013946960625656153, "loss": 0.9706, "step": 3760 }, { "epoch": 0.4343562528841717, "grad_norm": 0.20435942709445953, "learning_rate": 0.00013928451596909516, "loss": 0.9388, "step": 3765 }, { "epoch": 0.4349330872173512, "grad_norm": 0.18556763231754303, "learning_rate": 0.00013909926640191813, "loss": 0.9218, "step": 3770 }, { "epoch": 0.4355099215505307, "grad_norm": 0.1957630217075348, "learning_rate": 0.0001389138583061279, "loss": 1.0146, "step": 3775 }, { "epoch": 0.4360867558837102, "grad_norm": 0.19505439698696136, "learning_rate": 0.00013872829243346453, "loss": 1.0006, "step": 3780 }, { "epoch": 0.4366635902168897, "grad_norm": 0.20385412871837616, "learning_rate": 0.00013854256953630797, "loss": 0.997, "step": 3785 }, { "epoch": 0.43724042455006923, "grad_norm": 0.1895400434732437, "learning_rate": 0.00013835669036767466, "loss": 1.0025, "step": 3790 }, { "epoch": 0.43781725888324874, "grad_norm": 0.19983290135860443, "learning_rate": 0.00013817065568121477, "loss": 0.9643, "step": 3795 }, { "epoch": 0.43839409321642825, "grad_norm": 0.20115922391414642, "learning_rate": 0.00013798446623120893, "loss": 0.9225, "step": 3800 }, { "epoch": 0.43897092754960776, "grad_norm": 0.19573155045509338, "learning_rate": 0.00013779812277256537, "loss": 0.9408, "step": 3805 }, { "epoch": 0.43954776188278727, "grad_norm": 0.21077951788902283, "learning_rate": 0.0001376116260608166, "loss": 0.9573, "step": 3810 }, { "epoch": 0.4401245962159668, "grad_norm": 0.2025730162858963, "learning_rate": 0.0001374249768521166, "loss": 0.9674, "step": 3815 }, { "epoch": 0.4407014305491463, "grad_norm": 0.1951024830341339, "learning_rate": 0.0001372381759032377, "loss": 0.9782, "step": 3820 }, { "epoch": 0.4412782648823258, "grad_norm": 0.1892070323228836, "learning_rate": 0.00013705122397156727, "loss": 0.9297, "step": 3825 }, { "epoch": 0.4418550992155053, "grad_norm": 0.2148219794034958, "learning_rate": 0.00013686412181510504, "loss": 0.9735, "step": 3830 }, { "epoch": 0.4424319335486848, "grad_norm": 0.2071019560098648, "learning_rate": 0.0001366768701924598, "loss": 0.9767, "step": 3835 }, { "epoch": 0.44300876788186433, "grad_norm": 0.19552481174468994, "learning_rate": 0.0001364894698628462, "loss": 0.9675, "step": 3840 }, { "epoch": 0.44358560221504384, "grad_norm": 0.2387782335281372, "learning_rate": 0.00013630192158608202, "loss": 1.0083, "step": 3845 }, { "epoch": 0.44416243654822335, "grad_norm": 0.19381476938724518, "learning_rate": 0.00013611422612258477, "loss": 0.9669, "step": 3850 }, { "epoch": 0.44473927088140286, "grad_norm": 0.19321493804454803, "learning_rate": 0.00013592638423336875, "loss": 0.9885, "step": 3855 }, { "epoch": 0.44531610521458237, "grad_norm": 0.20854459702968597, "learning_rate": 0.00013573839668004202, "loss": 1.0008, "step": 3860 }, { "epoch": 0.4458929395477619, "grad_norm": 0.2074405997991562, "learning_rate": 0.00013555026422480313, "loss": 0.9222, "step": 3865 }, { "epoch": 0.4464697738809414, "grad_norm": 0.194743350148201, "learning_rate": 0.00013536198763043823, "loss": 0.9856, "step": 3870 }, { "epoch": 0.4470466082141209, "grad_norm": 0.20063389837741852, "learning_rate": 0.00013517356766031777, "loss": 1.0056, "step": 3875 }, { "epoch": 0.4476234425473004, "grad_norm": 0.1934989094734192, "learning_rate": 0.00013498500507839363, "loss": 0.9928, "step": 3880 }, { "epoch": 0.4482002768804799, "grad_norm": 0.20212046802043915, "learning_rate": 0.00013479630064919593, "loss": 0.8963, "step": 3885 }, { "epoch": 0.44877711121365943, "grad_norm": 0.21239322423934937, "learning_rate": 0.00013460745513782976, "loss": 0.9812, "step": 3890 }, { "epoch": 0.44935394554683894, "grad_norm": 0.2325180619955063, "learning_rate": 0.0001344184693099724, "loss": 0.9476, "step": 3895 }, { "epoch": 0.44993077988001845, "grad_norm": 0.19209115207195282, "learning_rate": 0.00013422934393186994, "loss": 0.9289, "step": 3900 }, { "epoch": 0.45050761421319796, "grad_norm": 0.19307377934455872, "learning_rate": 0.0001340400797703343, "loss": 0.983, "step": 3905 }, { "epoch": 0.45108444854637747, "grad_norm": 0.19258156418800354, "learning_rate": 0.00013385067759274014, "loss": 0.986, "step": 3910 }, { "epoch": 0.451661282879557, "grad_norm": 0.1946646273136139, "learning_rate": 0.00013366113816702164, "loss": 0.9962, "step": 3915 }, { "epoch": 0.4522381172127365, "grad_norm": 0.20190544426441193, "learning_rate": 0.0001334714622616695, "loss": 0.9591, "step": 3920 }, { "epoch": 0.452814951545916, "grad_norm": 0.1982085108757019, "learning_rate": 0.0001332816506457278, "loss": 0.9545, "step": 3925 }, { "epoch": 0.4533917858790955, "grad_norm": 0.18541787564754486, "learning_rate": 0.0001330917040887908, "loss": 0.9652, "step": 3930 }, { "epoch": 0.453968620212275, "grad_norm": 0.19790363311767578, "learning_rate": 0.00013290162336099996, "loss": 0.923, "step": 3935 }, { "epoch": 0.45454545454545453, "grad_norm": 0.21102024614810944, "learning_rate": 0.00013271140923304064, "loss": 0.9701, "step": 3940 }, { "epoch": 0.45512228887863404, "grad_norm": 0.1868615597486496, "learning_rate": 0.00013252106247613914, "loss": 0.9216, "step": 3945 }, { "epoch": 0.45569912321181355, "grad_norm": 0.19408555328845978, "learning_rate": 0.00013233058386205948, "loss": 0.9397, "step": 3950 }, { "epoch": 0.45627595754499306, "grad_norm": 0.19673417508602142, "learning_rate": 0.00013213997416310034, "loss": 0.9116, "step": 3955 }, { "epoch": 0.45685279187817257, "grad_norm": 0.2141309231519699, "learning_rate": 0.00013194923415209183, "loss": 0.9748, "step": 3960 }, { "epoch": 0.4574296262113521, "grad_norm": 0.19085197150707245, "learning_rate": 0.00013175836460239243, "loss": 1.0119, "step": 3965 }, { "epoch": 0.4580064605445316, "grad_norm": 0.19188842177391052, "learning_rate": 0.00013156736628788584, "loss": 0.9487, "step": 3970 }, { "epoch": 0.4585832948777111, "grad_norm": 0.19430477917194366, "learning_rate": 0.00013137623998297785, "loss": 0.9753, "step": 3975 }, { "epoch": 0.4591601292108906, "grad_norm": 0.1962517946958542, "learning_rate": 0.00013118498646259323, "loss": 0.9655, "step": 3980 }, { "epoch": 0.4597369635440701, "grad_norm": 0.20422694087028503, "learning_rate": 0.0001309936065021724, "loss": 1.0045, "step": 3985 }, { "epoch": 0.46031379787724963, "grad_norm": 0.19590309262275696, "learning_rate": 0.0001308021008776686, "loss": 0.9858, "step": 3990 }, { "epoch": 0.46089063221042914, "grad_norm": 0.19435778260231018, "learning_rate": 0.00013061047036554444, "loss": 0.9605, "step": 3995 }, { "epoch": 0.46146746654360865, "grad_norm": 0.2044801414012909, "learning_rate": 0.00013041871574276905, "loss": 0.9802, "step": 4000 }, { "epoch": 0.46204430087678816, "grad_norm": 0.21514268219470978, "learning_rate": 0.00013022683778681458, "loss": 0.9468, "step": 4005 }, { "epoch": 0.46262113520996767, "grad_norm": 0.19823044538497925, "learning_rate": 0.00013003483727565344, "loss": 0.9975, "step": 4010 }, { "epoch": 0.4631979695431472, "grad_norm": 0.1900324672460556, "learning_rate": 0.00012984271498775473, "loss": 0.9505, "step": 4015 }, { "epoch": 0.46377480387632675, "grad_norm": 0.20325466990470886, "learning_rate": 0.00012965047170208145, "loss": 0.958, "step": 4020 }, { "epoch": 0.46435163820950626, "grad_norm": 0.18988734483718872, "learning_rate": 0.00012945810819808715, "loss": 0.9684, "step": 4025 }, { "epoch": 0.46492847254268577, "grad_norm": 0.19148334860801697, "learning_rate": 0.00012926562525571273, "loss": 0.953, "step": 4030 }, { "epoch": 0.4655053068758653, "grad_norm": 0.19238923490047455, "learning_rate": 0.00012907302365538348, "loss": 0.9194, "step": 4035 }, { "epoch": 0.4660821412090448, "grad_norm": 0.1843547224998474, "learning_rate": 0.0001288803041780057, "loss": 0.9375, "step": 4040 }, { "epoch": 0.4666589755422243, "grad_norm": 0.20036543905735016, "learning_rate": 0.0001286874676049637, "loss": 0.9753, "step": 4045 }, { "epoch": 0.4672358098754038, "grad_norm": 0.19511838257312775, "learning_rate": 0.00012849451471811643, "loss": 0.9217, "step": 4050 }, { "epoch": 0.4678126442085833, "grad_norm": 0.2069857269525528, "learning_rate": 0.00012830144629979456, "loss": 0.9803, "step": 4055 }, { "epoch": 0.4683894785417628, "grad_norm": 0.19766615331172943, "learning_rate": 0.00012810826313279717, "loss": 0.9806, "step": 4060 }, { "epoch": 0.46896631287494234, "grad_norm": 0.19592691957950592, "learning_rate": 0.00012791496600038854, "loss": 0.9848, "step": 4065 }, { "epoch": 0.46954314720812185, "grad_norm": 0.19588027894496918, "learning_rate": 0.00012772155568629499, "loss": 0.9274, "step": 4070 }, { "epoch": 0.47011998154130136, "grad_norm": 0.2110724300146103, "learning_rate": 0.00012752803297470187, "loss": 0.9989, "step": 4075 }, { "epoch": 0.47069681587448087, "grad_norm": 0.2043410688638687, "learning_rate": 0.00012733439865025012, "loss": 0.9706, "step": 4080 }, { "epoch": 0.4712736502076604, "grad_norm": 0.22159966826438904, "learning_rate": 0.0001271406534980333, "loss": 0.9409, "step": 4085 }, { "epoch": 0.4718504845408399, "grad_norm": 0.19468598067760468, "learning_rate": 0.0001269467983035943, "loss": 0.9749, "step": 4090 }, { "epoch": 0.4724273188740194, "grad_norm": 0.1822662055492401, "learning_rate": 0.00012675283385292212, "loss": 0.9994, "step": 4095 }, { "epoch": 0.4730041532071989, "grad_norm": 0.19376279413700104, "learning_rate": 0.00012655876093244878, "loss": 0.9757, "step": 4100 }, { "epoch": 0.4735809875403784, "grad_norm": 0.18976148962974548, "learning_rate": 0.00012636458032904617, "loss": 1.0159, "step": 4105 }, { "epoch": 0.4741578218735579, "grad_norm": 0.20439419150352478, "learning_rate": 0.00012617029283002265, "loss": 1.0269, "step": 4110 }, { "epoch": 0.47473465620673744, "grad_norm": 0.18871116638183594, "learning_rate": 0.00012597589922312008, "loss": 0.944, "step": 4115 }, { "epoch": 0.47531149053991695, "grad_norm": 0.19103854894638062, "learning_rate": 0.00012578140029651053, "loss": 0.9384, "step": 4120 }, { "epoch": 0.47588832487309646, "grad_norm": 0.1954331398010254, "learning_rate": 0.00012558679683879301, "loss": 0.9838, "step": 4125 }, { "epoch": 0.47646515920627597, "grad_norm": 0.20008032023906708, "learning_rate": 0.0001253920896389905, "loss": 1.012, "step": 4130 }, { "epoch": 0.4770419935394555, "grad_norm": 0.18915073573589325, "learning_rate": 0.00012519727948654642, "loss": 0.965, "step": 4135 }, { "epoch": 0.477618827872635, "grad_norm": 0.19700497388839722, "learning_rate": 0.00012500236717132178, "loss": 1.0043, "step": 4140 }, { "epoch": 0.4781956622058145, "grad_norm": 0.23750852048397064, "learning_rate": 0.0001248073534835917, "loss": 0.955, "step": 4145 }, { "epoch": 0.478772496538994, "grad_norm": 0.1965513378381729, "learning_rate": 0.0001246122392140424, "loss": 0.9257, "step": 4150 }, { "epoch": 0.4793493308721735, "grad_norm": 0.20664075016975403, "learning_rate": 0.00012441702515376786, "loss": 0.9276, "step": 4155 }, { "epoch": 0.47992616520535303, "grad_norm": 0.20796158909797668, "learning_rate": 0.0001242217120942666, "loss": 0.9813, "step": 4160 }, { "epoch": 0.48050299953853254, "grad_norm": 0.19869232177734375, "learning_rate": 0.00012402630082743868, "loss": 0.9262, "step": 4165 }, { "epoch": 0.48107983387171205, "grad_norm": 0.1941554695367813, "learning_rate": 0.00012383079214558227, "loss": 0.9682, "step": 4170 }, { "epoch": 0.48165666820489156, "grad_norm": 0.19581513106822968, "learning_rate": 0.00012363518684139043, "loss": 0.952, "step": 4175 }, { "epoch": 0.48223350253807107, "grad_norm": 0.18992619216442108, "learning_rate": 0.00012343948570794815, "loss": 0.9541, "step": 4180 }, { "epoch": 0.4828103368712506, "grad_norm": 0.19012705981731415, "learning_rate": 0.00012324368953872883, "loss": 0.9461, "step": 4185 }, { "epoch": 0.4833871712044301, "grad_norm": 0.24560825526714325, "learning_rate": 0.00012304779912759118, "loss": 0.9679, "step": 4190 }, { "epoch": 0.4839640055376096, "grad_norm": 0.19769693911075592, "learning_rate": 0.00012285181526877615, "loss": 0.926, "step": 4195 }, { "epoch": 0.4845408398707891, "grad_norm": 0.20233696699142456, "learning_rate": 0.00012265573875690344, "loss": 0.9854, "step": 4200 }, { "epoch": 0.4851176742039686, "grad_norm": 0.1947474330663681, "learning_rate": 0.0001224595703869685, "loss": 0.9606, "step": 4205 }, { "epoch": 0.48569450853714813, "grad_norm": 0.1961849331855774, "learning_rate": 0.0001222633109543392, "loss": 1.0326, "step": 4210 }, { "epoch": 0.48627134287032764, "grad_norm": 0.1963188499212265, "learning_rate": 0.00012206696125475249, "loss": 0.979, "step": 4215 }, { "epoch": 0.48684817720350715, "grad_norm": 0.19063878059387207, "learning_rate": 0.00012187052208431158, "loss": 0.9483, "step": 4220 }, { "epoch": 0.48742501153668666, "grad_norm": 0.2054065316915512, "learning_rate": 0.0001216739942394822, "loss": 0.9725, "step": 4225 }, { "epoch": 0.48800184586986617, "grad_norm": 0.19815371930599213, "learning_rate": 0.00012147737851708973, "loss": 0.9445, "step": 4230 }, { "epoch": 0.4885786802030457, "grad_norm": 0.19612999260425568, "learning_rate": 0.00012128067571431583, "loss": 0.9167, "step": 4235 }, { "epoch": 0.4891555145362252, "grad_norm": 0.18682846426963806, "learning_rate": 0.00012108388662869519, "loss": 0.9596, "step": 4240 }, { "epoch": 0.4897323488694047, "grad_norm": 0.19077306985855103, "learning_rate": 0.0001208870120581124, "loss": 0.9491, "step": 4245 }, { "epoch": 0.4903091832025842, "grad_norm": 0.1911584734916687, "learning_rate": 0.00012069005280079862, "loss": 0.9399, "step": 4250 }, { "epoch": 0.4908860175357637, "grad_norm": 0.18746261298656464, "learning_rate": 0.00012049300965532832, "loss": 0.9954, "step": 4255 }, { "epoch": 0.49146285186894323, "grad_norm": 0.19653597474098206, "learning_rate": 0.00012029588342061621, "loss": 0.9635, "step": 4260 }, { "epoch": 0.49203968620212274, "grad_norm": 0.1984453648328781, "learning_rate": 0.00012009867489591377, "loss": 0.901, "step": 4265 }, { "epoch": 0.49261652053530225, "grad_norm": 0.20906962454319, "learning_rate": 0.00011990138488080622, "loss": 0.9282, "step": 4270 }, { "epoch": 0.49319335486848176, "grad_norm": 0.2029707431793213, "learning_rate": 0.00011970401417520913, "loss": 0.9731, "step": 4275 }, { "epoch": 0.49377018920166127, "grad_norm": 0.19050882756710052, "learning_rate": 0.00011950656357936525, "loss": 0.9431, "step": 4280 }, { "epoch": 0.4943470235348408, "grad_norm": 0.19275051355361938, "learning_rate": 0.00011930903389384123, "loss": 0.9576, "step": 4285 }, { "epoch": 0.4949238578680203, "grad_norm": 0.21345672011375427, "learning_rate": 0.00011911142591952437, "loss": 0.9696, "step": 4290 }, { "epoch": 0.4955006922011998, "grad_norm": 0.1972283273935318, "learning_rate": 0.0001189137404576195, "loss": 0.9669, "step": 4295 }, { "epoch": 0.4960775265343793, "grad_norm": 0.19907104969024658, "learning_rate": 0.00011871597830964551, "loss": 0.9477, "step": 4300 }, { "epoch": 0.4966543608675588, "grad_norm": 0.1972326934337616, "learning_rate": 0.00011851814027743223, "loss": 0.9962, "step": 4305 }, { "epoch": 0.49723119520073833, "grad_norm": 0.18552403151988983, "learning_rate": 0.00011832022716311722, "loss": 0.9556, "step": 4310 }, { "epoch": 0.49780802953391784, "grad_norm": 0.18974804878234863, "learning_rate": 0.00011812223976914243, "loss": 0.9446, "step": 4315 }, { "epoch": 0.49838486386709735, "grad_norm": 0.2123657763004303, "learning_rate": 0.00011792417889825094, "loss": 0.9444, "step": 4320 }, { "epoch": 0.49896169820027686, "grad_norm": 0.1999022513628006, "learning_rate": 0.00011772604535348382, "loss": 0.9767, "step": 4325 }, { "epoch": 0.49953853253345637, "grad_norm": 0.19423572719097137, "learning_rate": 0.00011752783993817675, "loss": 0.9548, "step": 4330 }, { "epoch": 0.5001153668666359, "grad_norm": 0.19109071791172028, "learning_rate": 0.00011732956345595682, "loss": 0.9343, "step": 4335 }, { "epoch": 0.5006922011998154, "grad_norm": 0.18442274630069733, "learning_rate": 0.00011713121671073924, "loss": 0.9759, "step": 4340 }, { "epoch": 0.501269035532995, "grad_norm": 0.20228290557861328, "learning_rate": 0.00011693280050672417, "loss": 0.9676, "step": 4345 }, { "epoch": 0.5018458698661744, "grad_norm": 0.19034543633460999, "learning_rate": 0.00011673431564839327, "loss": 0.87, "step": 4350 }, { "epoch": 0.502422704199354, "grad_norm": 0.2001582235097885, "learning_rate": 0.0001165357629405067, "loss": 0.9489, "step": 4355 }, { "epoch": 0.5029995385325334, "grad_norm": 0.18779706954956055, "learning_rate": 0.00011633714318809962, "loss": 0.9755, "step": 4360 }, { "epoch": 0.503576372865713, "grad_norm": 0.19904857873916626, "learning_rate": 0.00011613845719647909, "loss": 0.9586, "step": 4365 }, { "epoch": 0.5041532071988925, "grad_norm": 0.20691759884357452, "learning_rate": 0.00011593970577122067, "loss": 0.9716, "step": 4370 }, { "epoch": 0.504730041532072, "grad_norm": 0.19286218285560608, "learning_rate": 0.00011574088971816523, "loss": 0.9424, "step": 4375 }, { "epoch": 0.5053068758652515, "grad_norm": 0.1918749362230301, "learning_rate": 0.00011554200984341577, "loss": 0.9339, "step": 4380 }, { "epoch": 0.505883710198431, "grad_norm": 0.20204107463359833, "learning_rate": 0.00011534306695333395, "loss": 1.0028, "step": 4385 }, { "epoch": 0.5064605445316105, "grad_norm": 0.21450471878051758, "learning_rate": 0.00011514406185453692, "loss": 0.95, "step": 4390 }, { "epoch": 0.5070373788647901, "grad_norm": 0.22814354300498962, "learning_rate": 0.00011494499535389418, "loss": 0.9179, "step": 4395 }, { "epoch": 0.5076142131979695, "grad_norm": 0.19863393902778625, "learning_rate": 0.00011474586825852405, "loss": 0.9664, "step": 4400 }, { "epoch": 0.5081910475311491, "grad_norm": 0.21064569056034088, "learning_rate": 0.00011454668137579059, "loss": 0.9269, "step": 4405 }, { "epoch": 0.5087678818643285, "grad_norm": 0.19144834578037262, "learning_rate": 0.00011434743551330028, "loss": 0.9448, "step": 4410 }, { "epoch": 0.5093447161975081, "grad_norm": 0.19969388842582703, "learning_rate": 0.00011414813147889868, "loss": 0.9967, "step": 4415 }, { "epoch": 0.5099215505306876, "grad_norm": 0.1932876855134964, "learning_rate": 0.00011394877008066731, "loss": 0.9726, "step": 4420 }, { "epoch": 0.5104983848638671, "grad_norm": 0.2045474648475647, "learning_rate": 0.00011374935212692018, "loss": 0.9356, "step": 4425 }, { "epoch": 0.5110752191970466, "grad_norm": 0.2248217910528183, "learning_rate": 0.00011354987842620061, "loss": 0.9842, "step": 4430 }, { "epoch": 0.5116520535302261, "grad_norm": 0.18337203562259674, "learning_rate": 0.000113350349787278, "loss": 1.0268, "step": 4435 }, { "epoch": 0.5122288878634056, "grad_norm": 0.21334248781204224, "learning_rate": 0.00011315076701914449, "loss": 0.952, "step": 4440 }, { "epoch": 0.5128057221965852, "grad_norm": 0.20142047107219696, "learning_rate": 0.00011295113093101162, "loss": 0.9348, "step": 4445 }, { "epoch": 0.5133825565297646, "grad_norm": 0.19259627163410187, "learning_rate": 0.0001127514423323072, "loss": 0.9867, "step": 4450 }, { "epoch": 0.5139593908629442, "grad_norm": 0.2004094421863556, "learning_rate": 0.00011255170203267186, "loss": 0.9208, "step": 4455 }, { "epoch": 0.5145362251961236, "grad_norm": 0.18530438840389252, "learning_rate": 0.000112351910841956, "loss": 0.9743, "step": 4460 }, { "epoch": 0.5151130595293032, "grad_norm": 0.20678523182868958, "learning_rate": 0.00011215206957021618, "loss": 0.9476, "step": 4465 }, { "epoch": 0.5156898938624827, "grad_norm": 0.20912997424602509, "learning_rate": 0.00011195217902771212, "loss": 0.9338, "step": 4470 }, { "epoch": 0.5162667281956622, "grad_norm": 0.1984136551618576, "learning_rate": 0.0001117522400249033, "loss": 0.9563, "step": 4475 }, { "epoch": 0.5168435625288417, "grad_norm": 0.21053320169448853, "learning_rate": 0.00011155225337244562, "loss": 0.9753, "step": 4480 }, { "epoch": 0.5174203968620212, "grad_norm": 0.19541747868061066, "learning_rate": 0.00011135221988118825, "loss": 0.9495, "step": 4485 }, { "epoch": 0.5179972311952007, "grad_norm": 0.1899503469467163, "learning_rate": 0.00011115214036217026, "loss": 0.9259, "step": 4490 }, { "epoch": 0.5185740655283803, "grad_norm": 0.1909545511007309, "learning_rate": 0.0001109520156266173, "loss": 0.9308, "step": 4495 }, { "epoch": 0.5191508998615597, "grad_norm": 0.18733327090740204, "learning_rate": 0.00011075184648593838, "loss": 0.989, "step": 4500 }, { "epoch": 0.5197277341947393, "grad_norm": 0.1994892954826355, "learning_rate": 0.00011055163375172257, "loss": 0.9611, "step": 4505 }, { "epoch": 0.5203045685279187, "grad_norm": 0.1912652552127838, "learning_rate": 0.00011035137823573561, "loss": 0.9785, "step": 4510 }, { "epoch": 0.5208814028610983, "grad_norm": 0.20350737869739532, "learning_rate": 0.0001101510807499168, "loss": 0.9672, "step": 4515 }, { "epoch": 0.5214582371942778, "grad_norm": 0.2018175572156906, "learning_rate": 0.00010995074210637557, "loss": 0.9362, "step": 4520 }, { "epoch": 0.5220350715274573, "grad_norm": 0.20151209831237793, "learning_rate": 0.00010975036311738818, "loss": 0.9485, "step": 4525 }, { "epoch": 0.5226119058606368, "grad_norm": 0.21562981605529785, "learning_rate": 0.00010954994459539452, "loss": 0.9553, "step": 4530 }, { "epoch": 0.5231887401938163, "grad_norm": 0.19124732911586761, "learning_rate": 0.00010934948735299475, "loss": 0.9422, "step": 4535 }, { "epoch": 0.5237655745269958, "grad_norm": 0.20208267867565155, "learning_rate": 0.00010914899220294607, "loss": 0.9729, "step": 4540 }, { "epoch": 0.5243424088601754, "grad_norm": 0.20815403759479523, "learning_rate": 0.00010894845995815928, "loss": 0.98, "step": 4545 }, { "epoch": 0.5249192431933549, "grad_norm": 0.19528432190418243, "learning_rate": 0.00010874789143169568, "loss": 0.9301, "step": 4550 }, { "epoch": 0.5254960775265344, "grad_norm": 0.20784156024456024, "learning_rate": 0.00010854728743676362, "loss": 0.9553, "step": 4555 }, { "epoch": 0.5260729118597139, "grad_norm": 0.20081031322479248, "learning_rate": 0.00010834664878671525, "loss": 0.943, "step": 4560 }, { "epoch": 0.5266497461928934, "grad_norm": 0.19570979475975037, "learning_rate": 0.00010814597629504324, "loss": 0.9876, "step": 4565 }, { "epoch": 0.527226580526073, "grad_norm": 0.1984855979681015, "learning_rate": 0.00010794527077537755, "loss": 1.0065, "step": 4570 }, { "epoch": 0.5278034148592524, "grad_norm": 0.18974007666110992, "learning_rate": 0.00010774453304148192, "loss": 0.881, "step": 4575 }, { "epoch": 0.528380249192432, "grad_norm": 0.21458855271339417, "learning_rate": 0.00010754376390725074, "loss": 0.922, "step": 4580 }, { "epoch": 0.5289570835256114, "grad_norm": 0.20850373804569244, "learning_rate": 0.00010734296418670582, "loss": 0.9884, "step": 4585 }, { "epoch": 0.529533917858791, "grad_norm": 0.20355555415153503, "learning_rate": 0.00010714213469399283, "loss": 0.9743, "step": 4590 }, { "epoch": 0.5301107521919705, "grad_norm": 0.20082198083400726, "learning_rate": 0.00010694127624337826, "loss": 0.9368, "step": 4595 }, { "epoch": 0.53068758652515, "grad_norm": 0.1965116560459137, "learning_rate": 0.00010674038964924597, "loss": 0.9374, "step": 4600 }, { "epoch": 0.5312644208583295, "grad_norm": 0.1953085958957672, "learning_rate": 0.00010653947572609393, "loss": 0.9168, "step": 4605 }, { "epoch": 0.531841255191509, "grad_norm": 0.18610374629497528, "learning_rate": 0.0001063385352885309, "loss": 0.9191, "step": 4610 }, { "epoch": 0.5324180895246885, "grad_norm": 0.18412019312381744, "learning_rate": 0.00010613756915127319, "loss": 0.9549, "step": 4615 }, { "epoch": 0.5329949238578681, "grad_norm": 0.20428583025932312, "learning_rate": 0.00010593657812914129, "loss": 0.9849, "step": 4620 }, { "epoch": 0.5335717581910475, "grad_norm": 0.1892022341489792, "learning_rate": 0.00010573556303705652, "loss": 0.9892, "step": 4625 }, { "epoch": 0.5341485925242271, "grad_norm": 0.19151116907596588, "learning_rate": 0.00010553452469003789, "loss": 0.9291, "step": 4630 }, { "epoch": 0.5347254268574065, "grad_norm": 0.20716647803783417, "learning_rate": 0.00010533346390319867, "loss": 0.9391, "step": 4635 }, { "epoch": 0.5353022611905861, "grad_norm": 0.19385290145874023, "learning_rate": 0.00010513238149174304, "loss": 1.0, "step": 4640 }, { "epoch": 0.5358790955237656, "grad_norm": 0.20355677604675293, "learning_rate": 0.00010493127827096298, "loss": 0.9311, "step": 4645 }, { "epoch": 0.5364559298569451, "grad_norm": 0.2063637375831604, "learning_rate": 0.00010473015505623477, "loss": 0.9521, "step": 4650 }, { "epoch": 0.5370327641901246, "grad_norm": 0.201882466673851, "learning_rate": 0.00010452901266301574, "loss": 0.9302, "step": 4655 }, { "epoch": 0.5376095985233041, "grad_norm": 0.19823043048381805, "learning_rate": 0.000104327851906841, "loss": 1.0078, "step": 4660 }, { "epoch": 0.5381864328564836, "grad_norm": 0.19284914433956146, "learning_rate": 0.00010412667360332013, "loss": 0.9246, "step": 4665 }, { "epoch": 0.5387632671896632, "grad_norm": 0.19816389679908752, "learning_rate": 0.00010392547856813384, "loss": 0.9565, "step": 4670 }, { "epoch": 0.5393401015228426, "grad_norm": 0.2038826197385788, "learning_rate": 0.00010372426761703067, "loss": 0.9511, "step": 4675 }, { "epoch": 0.5399169358560222, "grad_norm": 0.2072780877351761, "learning_rate": 0.00010352304156582376, "loss": 0.9883, "step": 4680 }, { "epoch": 0.5404937701892016, "grad_norm": 0.2100173830986023, "learning_rate": 0.0001033218012303873, "loss": 0.9626, "step": 4685 }, { "epoch": 0.5410706045223812, "grad_norm": 0.1984483003616333, "learning_rate": 0.00010312054742665362, "loss": 0.9235, "step": 4690 }, { "epoch": 0.5416474388555607, "grad_norm": 0.19214801490306854, "learning_rate": 0.0001029192809706095, "loss": 0.9675, "step": 4695 }, { "epoch": 0.5422242731887402, "grad_norm": 0.19624173641204834, "learning_rate": 0.00010271800267829308, "loss": 0.9274, "step": 4700 }, { "epoch": 0.5428011075219197, "grad_norm": 0.19869272410869598, "learning_rate": 0.00010251671336579048, "loss": 0.9557, "step": 4705 }, { "epoch": 0.5433779418550992, "grad_norm": 0.191603422164917, "learning_rate": 0.00010231541384923248, "loss": 0.9239, "step": 4710 }, { "epoch": 0.5439547761882787, "grad_norm": 0.18842673301696777, "learning_rate": 0.0001021141049447913, "loss": 0.9652, "step": 4715 }, { "epoch": 0.5445316105214583, "grad_norm": 0.1997225433588028, "learning_rate": 0.00010191278746867714, "loss": 0.964, "step": 4720 }, { "epoch": 0.5451084448546377, "grad_norm": 0.22433891892433167, "learning_rate": 0.00010171146223713496, "loss": 0.9204, "step": 4725 }, { "epoch": 0.5456852791878173, "grad_norm": 0.20622776448726654, "learning_rate": 0.00010151013006644128, "loss": 0.9701, "step": 4730 }, { "epoch": 0.5462621135209967, "grad_norm": 0.19883517920970917, "learning_rate": 0.00010130879177290061, "loss": 0.9816, "step": 4735 }, { "epoch": 0.5468389478541763, "grad_norm": 0.1920338273048401, "learning_rate": 0.00010110744817284232, "loss": 0.9579, "step": 4740 }, { "epoch": 0.5474157821873558, "grad_norm": 0.18235036730766296, "learning_rate": 0.00010090610008261738, "loss": 0.9488, "step": 4745 }, { "epoch": 0.5479926165205353, "grad_norm": 0.2129899263381958, "learning_rate": 0.00010070474831859486, "loss": 1.0436, "step": 4750 }, { "epoch": 0.5485694508537148, "grad_norm": 0.19054663181304932, "learning_rate": 0.0001005033936971588, "loss": 0.9736, "step": 4755 }, { "epoch": 0.5491462851868943, "grad_norm": 0.199018195271492, "learning_rate": 0.00010030203703470477, "loss": 0.9589, "step": 4760 }, { "epoch": 0.5497231195200738, "grad_norm": 0.20436997711658478, "learning_rate": 0.00010010067914763668, "loss": 0.9825, "step": 4765 }, { "epoch": 0.5502999538532534, "grad_norm": 0.19828738272190094, "learning_rate": 9.989932085236334e-05, "loss": 0.9739, "step": 4770 }, { "epoch": 0.5508767881864328, "grad_norm": 0.19354400038719177, "learning_rate": 9.969796296529525e-05, "loss": 0.968, "step": 4775 }, { "epoch": 0.5514536225196124, "grad_norm": 0.2065267264842987, "learning_rate": 9.949660630284122e-05, "loss": 0.9612, "step": 4780 }, { "epoch": 0.5520304568527918, "grad_norm": 0.18790316581726074, "learning_rate": 9.929525168140516e-05, "loss": 0.9412, "step": 4785 }, { "epoch": 0.5526072911859714, "grad_norm": 0.19820088148117065, "learning_rate": 9.909389991738263e-05, "loss": 0.9354, "step": 4790 }, { "epoch": 0.5531841255191509, "grad_norm": 0.1902572065591812, "learning_rate": 9.889255182715769e-05, "loss": 0.9176, "step": 4795 }, { "epoch": 0.5537609598523304, "grad_norm": 0.18741828203201294, "learning_rate": 9.869120822709946e-05, "loss": 0.9258, "step": 4800 }, { "epoch": 0.5543377941855099, "grad_norm": 0.18927207589149475, "learning_rate": 9.848986993355877e-05, "loss": 0.9675, "step": 4805 }, { "epoch": 0.5549146285186894, "grad_norm": 0.20099502801895142, "learning_rate": 9.828853776286505e-05, "loss": 0.937, "step": 4810 }, { "epoch": 0.5554914628518689, "grad_norm": 0.19836010038852692, "learning_rate": 9.808721253132289e-05, "loss": 0.9674, "step": 4815 }, { "epoch": 0.5560682971850485, "grad_norm": 0.18979863822460175, "learning_rate": 9.78858950552087e-05, "loss": 0.9916, "step": 4820 }, { "epoch": 0.5566451315182279, "grad_norm": 0.191952183842659, "learning_rate": 9.768458615076751e-05, "loss": 0.9519, "step": 4825 }, { "epoch": 0.5572219658514075, "grad_norm": 0.19669634103775024, "learning_rate": 9.748328663420952e-05, "loss": 0.9389, "step": 4830 }, { "epoch": 0.5577988001845869, "grad_norm": 0.20401979982852936, "learning_rate": 9.728199732170696e-05, "loss": 0.9875, "step": 4835 }, { "epoch": 0.5583756345177665, "grad_norm": 0.2189038097858429, "learning_rate": 9.708071902939054e-05, "loss": 0.9388, "step": 4840 }, { "epoch": 0.558952468850946, "grad_norm": 0.2018914520740509, "learning_rate": 9.687945257334641e-05, "loss": 1.0281, "step": 4845 }, { "epoch": 0.5595293031841255, "grad_norm": 0.20172595977783203, "learning_rate": 9.667819876961272e-05, "loss": 0.9957, "step": 4850 }, { "epoch": 0.560106137517305, "grad_norm": 0.2063581645488739, "learning_rate": 9.647695843417628e-05, "loss": 0.9723, "step": 4855 }, { "epoch": 0.5606829718504845, "grad_norm": 0.2113044112920761, "learning_rate": 9.627573238296933e-05, "loss": 0.9577, "step": 4860 }, { "epoch": 0.561259806183664, "grad_norm": 0.20113973319530487, "learning_rate": 9.60745214318662e-05, "loss": 0.9239, "step": 4865 }, { "epoch": 0.5618366405168436, "grad_norm": 0.20177756249904633, "learning_rate": 9.58733263966799e-05, "loss": 1.0015, "step": 4870 }, { "epoch": 0.562413474850023, "grad_norm": 0.19315795600414276, "learning_rate": 9.567214809315903e-05, "loss": 0.9231, "step": 4875 }, { "epoch": 0.5629903091832026, "grad_norm": 0.19903239607810974, "learning_rate": 9.547098733698428e-05, "loss": 0.9608, "step": 4880 }, { "epoch": 0.563567143516382, "grad_norm": 0.19510619342327118, "learning_rate": 9.526984494376524e-05, "loss": 1.0001, "step": 4885 }, { "epoch": 0.5641439778495616, "grad_norm": 0.1909225881099701, "learning_rate": 9.5068721729037e-05, "loss": 0.9288, "step": 4890 }, { "epoch": 0.5647208121827412, "grad_norm": 0.19778591394424438, "learning_rate": 9.486761850825694e-05, "loss": 0.9509, "step": 4895 }, { "epoch": 0.5652976465159206, "grad_norm": 0.19866321980953217, "learning_rate": 9.466653609680137e-05, "loss": 0.9815, "step": 4900 }, { "epoch": 0.5658744808491002, "grad_norm": 0.19185493886470795, "learning_rate": 9.446547530996214e-05, "loss": 0.9667, "step": 4905 }, { "epoch": 0.5664513151822796, "grad_norm": 0.22153504192829132, "learning_rate": 9.426443696294351e-05, "loss": 0.9367, "step": 4910 }, { "epoch": 0.5670281495154592, "grad_norm": 0.18705500662326813, "learning_rate": 9.406342187085875e-05, "loss": 0.9668, "step": 4915 }, { "epoch": 0.5676049838486387, "grad_norm": 0.19349828362464905, "learning_rate": 9.386243084872682e-05, "loss": 0.892, "step": 4920 }, { "epoch": 0.5681818181818182, "grad_norm": 0.21577003598213196, "learning_rate": 9.36614647114691e-05, "loss": 0.9722, "step": 4925 }, { "epoch": 0.5687586525149977, "grad_norm": 0.2050502598285675, "learning_rate": 9.34605242739061e-05, "loss": 0.9489, "step": 4930 }, { "epoch": 0.5693354868481773, "grad_norm": 0.19747060537338257, "learning_rate": 9.325961035075405e-05, "loss": 0.9862, "step": 4935 }, { "epoch": 0.5699123211813567, "grad_norm": 0.2020748257637024, "learning_rate": 9.305872375662176e-05, "loss": 1.017, "step": 4940 }, { "epoch": 0.5704891555145363, "grad_norm": 0.19945184886455536, "learning_rate": 9.285786530600718e-05, "loss": 0.9749, "step": 4945 }, { "epoch": 0.5710659898477157, "grad_norm": 0.20474183559417725, "learning_rate": 9.26570358132942e-05, "loss": 0.93, "step": 4950 }, { "epoch": 0.5716428241808953, "grad_norm": 0.1998247355222702, "learning_rate": 9.245623609274928e-05, "loss": 0.9278, "step": 4955 }, { "epoch": 0.5722196585140747, "grad_norm": 0.19674921035766602, "learning_rate": 9.225546695851815e-05, "loss": 0.9899, "step": 4960 }, { "epoch": 0.5727964928472543, "grad_norm": 0.19790132343769073, "learning_rate": 9.20547292246225e-05, "loss": 0.9343, "step": 4965 }, { "epoch": 0.5733733271804338, "grad_norm": 0.20167304575443268, "learning_rate": 9.185402370495677e-05, "loss": 0.9547, "step": 4970 }, { "epoch": 0.5739501615136133, "grad_norm": 0.19420599937438965, "learning_rate": 9.165335121328477e-05, "loss": 0.9824, "step": 4975 }, { "epoch": 0.5745269958467928, "grad_norm": 0.18731547892093658, "learning_rate": 9.14527125632364e-05, "loss": 0.9304, "step": 4980 }, { "epoch": 0.5751038301799724, "grad_norm": 0.19297674298286438, "learning_rate": 9.125210856830433e-05, "loss": 0.9895, "step": 4985 }, { "epoch": 0.5756806645131518, "grad_norm": 0.19416366517543793, "learning_rate": 9.105154004184071e-05, "loss": 0.9606, "step": 4990 }, { "epoch": 0.5762574988463314, "grad_norm": 0.19022150337696075, "learning_rate": 9.085100779705398e-05, "loss": 0.9269, "step": 4995 }, { "epoch": 0.5768343331795108, "grad_norm": 0.19337041676044464, "learning_rate": 9.065051264700527e-05, "loss": 0.9502, "step": 5000 }, { "epoch": 0.5774111675126904, "grad_norm": 0.19673341512680054, "learning_rate": 9.045005540460552e-05, "loss": 0.9453, "step": 5005 }, { "epoch": 0.5779880018458698, "grad_norm": 0.19977766275405884, "learning_rate": 9.024963688261186e-05, "loss": 0.9208, "step": 5010 }, { "epoch": 0.5785648361790494, "grad_norm": 0.19135598838329315, "learning_rate": 9.004925789362446e-05, "loss": 0.961, "step": 5015 }, { "epoch": 0.5791416705122289, "grad_norm": 0.1971130669116974, "learning_rate": 8.984891925008321e-05, "loss": 0.945, "step": 5020 }, { "epoch": 0.5797185048454084, "grad_norm": 0.19552946090698242, "learning_rate": 8.964862176426443e-05, "loss": 0.9618, "step": 5025 }, { "epoch": 0.5802953391785879, "grad_norm": 0.18302129209041595, "learning_rate": 8.944836624827748e-05, "loss": 0.956, "step": 5030 }, { "epoch": 0.5808721735117675, "grad_norm": 0.2028164565563202, "learning_rate": 8.924815351406163e-05, "loss": 1.0094, "step": 5035 }, { "epoch": 0.5814490078449469, "grad_norm": 0.19891835749149323, "learning_rate": 8.904798437338272e-05, "loss": 0.9727, "step": 5040 }, { "epoch": 0.5820258421781265, "grad_norm": 0.1948157548904419, "learning_rate": 8.884785963782975e-05, "loss": 1.0068, "step": 5045 }, { "epoch": 0.5826026765113059, "grad_norm": 0.19020549952983856, "learning_rate": 8.864778011881175e-05, "loss": 0.9164, "step": 5050 }, { "epoch": 0.5831795108444855, "grad_norm": 0.20140686631202698, "learning_rate": 8.84477466275544e-05, "loss": 0.949, "step": 5055 }, { "epoch": 0.583756345177665, "grad_norm": 0.203651562333107, "learning_rate": 8.824775997509675e-05, "loss": 0.9788, "step": 5060 }, { "epoch": 0.5843331795108445, "grad_norm": 0.20070117712020874, "learning_rate": 8.80478209722879e-05, "loss": 0.9566, "step": 5065 }, { "epoch": 0.584910013844024, "grad_norm": 0.2062043994665146, "learning_rate": 8.784793042978384e-05, "loss": 0.9331, "step": 5070 }, { "epoch": 0.5854868481772035, "grad_norm": 0.19828034937381744, "learning_rate": 8.764808915804401e-05, "loss": 0.9926, "step": 5075 }, { "epoch": 0.586063682510383, "grad_norm": 0.2180069088935852, "learning_rate": 8.744829796732812e-05, "loss": 1.0008, "step": 5080 }, { "epoch": 0.5866405168435626, "grad_norm": 0.2032460868358612, "learning_rate": 8.724855766769282e-05, "loss": 1.017, "step": 5085 }, { "epoch": 0.587217351176742, "grad_norm": 0.21724148094654083, "learning_rate": 8.70488690689884e-05, "loss": 0.9026, "step": 5090 }, { "epoch": 0.5877941855099216, "grad_norm": 0.21417102217674255, "learning_rate": 8.684923298085555e-05, "loss": 0.9439, "step": 5095 }, { "epoch": 0.588371019843101, "grad_norm": 0.19951827824115753, "learning_rate": 8.6649650212722e-05, "loss": 0.9587, "step": 5100 }, { "epoch": 0.5889478541762806, "grad_norm": 0.20338566601276398, "learning_rate": 8.645012157379941e-05, "loss": 1.0392, "step": 5105 }, { "epoch": 0.58952468850946, "grad_norm": 0.18534879386425018, "learning_rate": 8.625064787307986e-05, "loss": 0.975, "step": 5110 }, { "epoch": 0.5901015228426396, "grad_norm": 0.20185095071792603, "learning_rate": 8.605122991933271e-05, "loss": 0.9446, "step": 5115 }, { "epoch": 0.5906783571758191, "grad_norm": 0.19836807250976562, "learning_rate": 8.585186852110134e-05, "loss": 0.9678, "step": 5120 }, { "epoch": 0.5912551915089986, "grad_norm": 0.20136135816574097, "learning_rate": 8.565256448669976e-05, "loss": 0.9662, "step": 5125 }, { "epoch": 0.5918320258421781, "grad_norm": 0.1838986873626709, "learning_rate": 8.545331862420944e-05, "loss": 0.9133, "step": 5130 }, { "epoch": 0.5924088601753577, "grad_norm": 0.19178135693073273, "learning_rate": 8.525413174147598e-05, "loss": 0.9451, "step": 5135 }, { "epoch": 0.5929856945085371, "grad_norm": 0.20643360912799835, "learning_rate": 8.505500464610584e-05, "loss": 0.9355, "step": 5140 }, { "epoch": 0.5935625288417167, "grad_norm": 0.20259740948677063, "learning_rate": 8.485593814546307e-05, "loss": 0.9678, "step": 5145 }, { "epoch": 0.5941393631748961, "grad_norm": 0.19384346902370453, "learning_rate": 8.465693304666606e-05, "loss": 0.966, "step": 5150 }, { "epoch": 0.5947161975080757, "grad_norm": 0.2989238202571869, "learning_rate": 8.445799015658427e-05, "loss": 0.9356, "step": 5155 }, { "epoch": 0.5952930318412551, "grad_norm": 0.212250217795372, "learning_rate": 8.425911028183479e-05, "loss": 0.9423, "step": 5160 }, { "epoch": 0.5958698661744347, "grad_norm": 0.19762969017028809, "learning_rate": 8.406029422877937e-05, "loss": 0.9727, "step": 5165 }, { "epoch": 0.5964467005076142, "grad_norm": 0.18385392427444458, "learning_rate": 8.386154280352094e-05, "loss": 0.9671, "step": 5170 }, { "epoch": 0.5970235348407937, "grad_norm": 0.21260400116443634, "learning_rate": 8.366285681190039e-05, "loss": 0.9678, "step": 5175 }, { "epoch": 0.5976003691739732, "grad_norm": 0.20906022191047668, "learning_rate": 8.34642370594933e-05, "loss": 0.9252, "step": 5180 }, { "epoch": 0.5981772035071528, "grad_norm": 0.18937109410762787, "learning_rate": 8.326568435160677e-05, "loss": 1.0374, "step": 5185 }, { "epoch": 0.5987540378403322, "grad_norm": 0.19254235923290253, "learning_rate": 8.306719949327588e-05, "loss": 0.9621, "step": 5190 }, { "epoch": 0.5993308721735118, "grad_norm": 0.1981174647808075, "learning_rate": 8.286878328926077e-05, "loss": 0.9276, "step": 5195 }, { "epoch": 0.5999077065066912, "grad_norm": 0.2028452455997467, "learning_rate": 8.26704365440432e-05, "loss": 0.9824, "step": 5200 }, { "epoch": 0.6004845408398708, "grad_norm": 0.2027309387922287, "learning_rate": 8.247216006182326e-05, "loss": 0.9204, "step": 5205 }, { "epoch": 0.6010613751730502, "grad_norm": 0.19740572571754456, "learning_rate": 8.227395464651618e-05, "loss": 0.9799, "step": 5210 }, { "epoch": 0.6016382095062298, "grad_norm": 0.19563211500644684, "learning_rate": 8.20758211017491e-05, "loss": 0.9518, "step": 5215 }, { "epoch": 0.6022150438394093, "grad_norm": 0.19438865780830383, "learning_rate": 8.187776023085762e-05, "loss": 1.0022, "step": 5220 }, { "epoch": 0.6027918781725888, "grad_norm": 0.1977875679731369, "learning_rate": 8.167977283688282e-05, "loss": 0.997, "step": 5225 }, { "epoch": 0.6033687125057683, "grad_norm": 0.22014763951301575, "learning_rate": 8.148185972256778e-05, "loss": 0.9436, "step": 5230 }, { "epoch": 0.6039455468389479, "grad_norm": 0.19123868644237518, "learning_rate": 8.128402169035451e-05, "loss": 0.971, "step": 5235 }, { "epoch": 0.6045223811721273, "grad_norm": 0.18875616788864136, "learning_rate": 8.108625954238051e-05, "loss": 0.9399, "step": 5240 }, { "epoch": 0.6050992155053069, "grad_norm": 0.20804201066493988, "learning_rate": 8.088857408047562e-05, "loss": 0.9634, "step": 5245 }, { "epoch": 0.6056760498384864, "grad_norm": 0.1881391406059265, "learning_rate": 8.06909661061588e-05, "loss": 0.9885, "step": 5250 }, { "epoch": 0.6062528841716659, "grad_norm": 0.19537098705768585, "learning_rate": 8.049343642063477e-05, "loss": 0.952, "step": 5255 }, { "epoch": 0.6068297185048455, "grad_norm": 0.18685182929039001, "learning_rate": 8.029598582479088e-05, "loss": 0.9603, "step": 5260 }, { "epoch": 0.6074065528380249, "grad_norm": 0.2082066535949707, "learning_rate": 8.00986151191938e-05, "loss": 0.8972, "step": 5265 }, { "epoch": 0.6079833871712045, "grad_norm": 0.20010879635810852, "learning_rate": 7.990132510408625e-05, "loss": 0.934, "step": 5270 }, { "epoch": 0.6085602215043839, "grad_norm": 0.19527243077754974, "learning_rate": 7.970411657938381e-05, "loss": 0.9687, "step": 5275 }, { "epoch": 0.6091370558375635, "grad_norm": 0.2064102441072464, "learning_rate": 7.95069903446717e-05, "loss": 0.9407, "step": 5280 }, { "epoch": 0.609713890170743, "grad_norm": 0.19495844841003418, "learning_rate": 7.930994719920142e-05, "loss": 0.9628, "step": 5285 }, { "epoch": 0.6102907245039225, "grad_norm": 0.21486780047416687, "learning_rate": 7.911298794188761e-05, "loss": 0.9869, "step": 5290 }, { "epoch": 0.610867558837102, "grad_norm": 0.19653169810771942, "learning_rate": 7.891611337130482e-05, "loss": 0.9998, "step": 5295 }, { "epoch": 0.6114443931702815, "grad_norm": 0.21410493552684784, "learning_rate": 7.871932428568418e-05, "loss": 0.9142, "step": 5300 }, { "epoch": 0.612021227503461, "grad_norm": 0.18754735589027405, "learning_rate": 7.852262148291028e-05, "loss": 1.0069, "step": 5305 }, { "epoch": 0.6125980618366406, "grad_norm": 0.19620005786418915, "learning_rate": 7.832600576051779e-05, "loss": 0.9078, "step": 5310 }, { "epoch": 0.61317489616982, "grad_norm": 0.20171235501766205, "learning_rate": 7.812947791568845e-05, "loss": 0.9696, "step": 5315 }, { "epoch": 0.6137517305029996, "grad_norm": 0.20567715167999268, "learning_rate": 7.793303874524752e-05, "loss": 0.9431, "step": 5320 }, { "epoch": 0.614328564836179, "grad_norm": 0.19829270243644714, "learning_rate": 7.773668904566085e-05, "loss": 0.9473, "step": 5325 }, { "epoch": 0.6149053991693586, "grad_norm": 0.1850794106721878, "learning_rate": 7.75404296130315e-05, "loss": 0.9487, "step": 5330 }, { "epoch": 0.6154822335025381, "grad_norm": 0.19462130963802338, "learning_rate": 7.734426124309656e-05, "loss": 0.9599, "step": 5335 }, { "epoch": 0.6160590678357176, "grad_norm": 0.20998431742191315, "learning_rate": 7.714818473122385e-05, "loss": 0.9605, "step": 5340 }, { "epoch": 0.6166359021688971, "grad_norm": 0.18975423276424408, "learning_rate": 7.695220087240885e-05, "loss": 0.9829, "step": 5345 }, { "epoch": 0.6172127365020766, "grad_norm": 0.19463218748569489, "learning_rate": 7.675631046127123e-05, "loss": 0.9586, "step": 5350 }, { "epoch": 0.6177895708352561, "grad_norm": 0.19710615277290344, "learning_rate": 7.656051429205188e-05, "loss": 0.9812, "step": 5355 }, { "epoch": 0.6183664051684357, "grad_norm": 0.19291462004184723, "learning_rate": 7.636481315860958e-05, "loss": 0.9615, "step": 5360 }, { "epoch": 0.6189432395016151, "grad_norm": 0.19099998474121094, "learning_rate": 7.616920785441777e-05, "loss": 0.9017, "step": 5365 }, { "epoch": 0.6195200738347947, "grad_norm": 0.1884920597076416, "learning_rate": 7.597369917256132e-05, "loss": 0.9232, "step": 5370 }, { "epoch": 0.6200969081679741, "grad_norm": 0.20266391336917877, "learning_rate": 7.577828790573345e-05, "loss": 0.973, "step": 5375 }, { "epoch": 0.6206737425011537, "grad_norm": 0.19208677113056183, "learning_rate": 7.55829748462322e-05, "loss": 0.966, "step": 5380 }, { "epoch": 0.6212505768343332, "grad_norm": 0.1974397599697113, "learning_rate": 7.538776078595762e-05, "loss": 0.9816, "step": 5385 }, { "epoch": 0.6218274111675127, "grad_norm": 0.1980104148387909, "learning_rate": 7.519264651640829e-05, "loss": 0.9531, "step": 5390 }, { "epoch": 0.6224042455006922, "grad_norm": 0.19935230910778046, "learning_rate": 7.499763282867823e-05, "loss": 0.9754, "step": 5395 }, { "epoch": 0.6229810798338717, "grad_norm": 0.19416543841362, "learning_rate": 7.480272051345358e-05, "loss": 0.9571, "step": 5400 }, { "epoch": 0.6235579141670512, "grad_norm": 0.21511389315128326, "learning_rate": 7.460791036100952e-05, "loss": 0.9454, "step": 5405 }, { "epoch": 0.6241347485002308, "grad_norm": 0.1961875706911087, "learning_rate": 7.4413203161207e-05, "loss": 0.9851, "step": 5410 }, { "epoch": 0.6247115828334102, "grad_norm": 0.20345531404018402, "learning_rate": 7.421859970348949e-05, "loss": 0.9334, "step": 5415 }, { "epoch": 0.6252884171665898, "grad_norm": 0.18896692991256714, "learning_rate": 7.402410077687993e-05, "loss": 0.9288, "step": 5420 }, { "epoch": 0.6258652514997692, "grad_norm": 0.21268202364444733, "learning_rate": 7.382970716997736e-05, "loss": 0.9789, "step": 5425 }, { "epoch": 0.6264420858329488, "grad_norm": 0.20382317900657654, "learning_rate": 7.363541967095387e-05, "loss": 0.9449, "step": 5430 }, { "epoch": 0.6270189201661283, "grad_norm": 0.1883535534143448, "learning_rate": 7.344123906755124e-05, "loss": 0.9609, "step": 5435 }, { "epoch": 0.6275957544993078, "grad_norm": 0.2033308893442154, "learning_rate": 7.324716614707793e-05, "loss": 0.9712, "step": 5440 }, { "epoch": 0.6281725888324873, "grad_norm": 0.1854039430618286, "learning_rate": 7.305320169640575e-05, "loss": 0.9199, "step": 5445 }, { "epoch": 0.6287494231656668, "grad_norm": 0.19366517663002014, "learning_rate": 7.285934650196672e-05, "loss": 0.9421, "step": 5450 }, { "epoch": 0.6293262574988463, "grad_norm": 0.1920800656080246, "learning_rate": 7.266560134974989e-05, "loss": 0.9357, "step": 5455 }, { "epoch": 0.6299030918320259, "grad_norm": 0.1931942254304886, "learning_rate": 7.247196702529815e-05, "loss": 0.9787, "step": 5460 }, { "epoch": 0.6304799261652053, "grad_norm": 0.20151135325431824, "learning_rate": 7.227844431370502e-05, "loss": 1.0103, "step": 5465 }, { "epoch": 0.6310567604983849, "grad_norm": 0.19090351462364197, "learning_rate": 7.208503399961149e-05, "loss": 0.9166, "step": 5470 }, { "epoch": 0.6316335948315643, "grad_norm": 0.19098447263240814, "learning_rate": 7.189173686720287e-05, "loss": 0.9626, "step": 5475 }, { "epoch": 0.6322104291647439, "grad_norm": 0.20782425999641418, "learning_rate": 7.169855370020547e-05, "loss": 1.0002, "step": 5480 }, { "epoch": 0.6327872634979234, "grad_norm": 0.20794013142585754, "learning_rate": 7.15054852818836e-05, "loss": 0.9396, "step": 5485 }, { "epoch": 0.6333640978311029, "grad_norm": 0.2069510966539383, "learning_rate": 7.131253239503635e-05, "loss": 0.9651, "step": 5490 }, { "epoch": 0.6339409321642824, "grad_norm": 0.19113782048225403, "learning_rate": 7.111969582199431e-05, "loss": 0.9243, "step": 5495 }, { "epoch": 0.6345177664974619, "grad_norm": 0.19338031113147736, "learning_rate": 7.092697634461654e-05, "loss": 0.9379, "step": 5500 }, { "epoch": 0.6350946008306414, "grad_norm": 0.214319109916687, "learning_rate": 7.073437474428732e-05, "loss": 0.99, "step": 5505 }, { "epoch": 0.635671435163821, "grad_norm": 0.19014938175678253, "learning_rate": 7.05418918019129e-05, "loss": 0.9966, "step": 5510 }, { "epoch": 0.6362482694970004, "grad_norm": 0.20483070611953735, "learning_rate": 7.034952829791858e-05, "loss": 0.9126, "step": 5515 }, { "epoch": 0.63682510383018, "grad_norm": 0.19414497911930084, "learning_rate": 7.01572850122453e-05, "loss": 0.959, "step": 5520 }, { "epoch": 0.6374019381633594, "grad_norm": 0.20513789355754852, "learning_rate": 6.996516272434658e-05, "loss": 0.9301, "step": 5525 }, { "epoch": 0.637978772496539, "grad_norm": 0.19759726524353027, "learning_rate": 6.97731622131854e-05, "loss": 0.9727, "step": 5530 }, { "epoch": 0.6385556068297185, "grad_norm": 0.20151971280574799, "learning_rate": 6.9581284257231e-05, "loss": 0.9625, "step": 5535 }, { "epoch": 0.639132441162898, "grad_norm": 0.2085186392068863, "learning_rate": 6.938952963445559e-05, "loss": 0.9663, "step": 5540 }, { "epoch": 0.6397092754960775, "grad_norm": 0.19066348671913147, "learning_rate": 6.919789912233146e-05, "loss": 0.9397, "step": 5545 }, { "epoch": 0.640286109829257, "grad_norm": 0.2034011334180832, "learning_rate": 6.900639349782762e-05, "loss": 0.975, "step": 5550 }, { "epoch": 0.6408629441624365, "grad_norm": 0.19911380112171173, "learning_rate": 6.88150135374068e-05, "loss": 1.0096, "step": 5555 }, { "epoch": 0.6414397784956161, "grad_norm": 0.19491539895534515, "learning_rate": 6.862376001702213e-05, "loss": 0.9999, "step": 5560 }, { "epoch": 0.6420166128287955, "grad_norm": 0.18677499890327454, "learning_rate": 6.843263371211414e-05, "loss": 0.9005, "step": 5565 }, { "epoch": 0.6425934471619751, "grad_norm": 0.19133366644382477, "learning_rate": 6.824163539760759e-05, "loss": 0.9202, "step": 5570 }, { "epoch": 0.6431702814951545, "grad_norm": 0.19170129299163818, "learning_rate": 6.805076584790818e-05, "loss": 0.955, "step": 5575 }, { "epoch": 0.6437471158283341, "grad_norm": 0.18898151814937592, "learning_rate": 6.786002583689968e-05, "loss": 0.9515, "step": 5580 }, { "epoch": 0.6443239501615136, "grad_norm": 0.20018716156482697, "learning_rate": 6.766941613794053e-05, "loss": 0.9429, "step": 5585 }, { "epoch": 0.6449007844946931, "grad_norm": 0.20243898034095764, "learning_rate": 6.747893752386088e-05, "loss": 0.9879, "step": 5590 }, { "epoch": 0.6454776188278727, "grad_norm": 0.18688704073429108, "learning_rate": 6.728859076695938e-05, "loss": 0.9039, "step": 5595 }, { "epoch": 0.6460544531610521, "grad_norm": 0.2041657567024231, "learning_rate": 6.709837663900007e-05, "loss": 0.9449, "step": 5600 }, { "epoch": 0.6466312874942317, "grad_norm": 0.17731288075447083, "learning_rate": 6.690829591120922e-05, "loss": 0.8981, "step": 5605 }, { "epoch": 0.6472081218274112, "grad_norm": 0.19162966310977936, "learning_rate": 6.671834935427222e-05, "loss": 0.9003, "step": 5610 }, { "epoch": 0.6477849561605907, "grad_norm": 0.19198764860630035, "learning_rate": 6.652853773833052e-05, "loss": 0.9338, "step": 5615 }, { "epoch": 0.6483617904937702, "grad_norm": 0.1949075311422348, "learning_rate": 6.633886183297838e-05, "loss": 0.9595, "step": 5620 }, { "epoch": 0.6489386248269498, "grad_norm": 0.18270482122898102, "learning_rate": 6.614932240725989e-05, "loss": 0.9107, "step": 5625 }, { "epoch": 0.6495154591601292, "grad_norm": 0.19540977478027344, "learning_rate": 6.595992022966571e-05, "loss": 0.9186, "step": 5630 }, { "epoch": 0.6500922934933088, "grad_norm": 0.19692561030387878, "learning_rate": 6.577065606813011e-05, "loss": 0.9674, "step": 5635 }, { "epoch": 0.6506691278264882, "grad_norm": 0.19444316625595093, "learning_rate": 6.558153069002764e-05, "loss": 0.998, "step": 5640 }, { "epoch": 0.6512459621596678, "grad_norm": 0.19267070293426514, "learning_rate": 6.539254486217026e-05, "loss": 0.9694, "step": 5645 }, { "epoch": 0.6518227964928472, "grad_norm": 0.1884683221578598, "learning_rate": 6.520369935080411e-05, "loss": 0.9626, "step": 5650 }, { "epoch": 0.6523996308260268, "grad_norm": 0.19955401122570038, "learning_rate": 6.501499492160636e-05, "loss": 0.9644, "step": 5655 }, { "epoch": 0.6529764651592063, "grad_norm": 0.18771077692508698, "learning_rate": 6.482643233968224e-05, "loss": 0.9485, "step": 5660 }, { "epoch": 0.6535532994923858, "grad_norm": 0.2017538845539093, "learning_rate": 6.463801236956184e-05, "loss": 0.9341, "step": 5665 }, { "epoch": 0.6541301338255653, "grad_norm": 0.2100268006324768, "learning_rate": 6.44497357751969e-05, "loss": 0.9625, "step": 5670 }, { "epoch": 0.6547069681587449, "grad_norm": 0.1906213015317917, "learning_rate": 6.426160331995801e-05, "loss": 0.9099, "step": 5675 }, { "epoch": 0.6552838024919243, "grad_norm": 0.18438196182250977, "learning_rate": 6.407361576663124e-05, "loss": 0.8879, "step": 5680 }, { "epoch": 0.6558606368251039, "grad_norm": 0.18689702451229095, "learning_rate": 6.388577387741524e-05, "loss": 0.9613, "step": 5685 }, { "epoch": 0.6564374711582833, "grad_norm": 0.19837065041065216, "learning_rate": 6.369807841391798e-05, "loss": 0.9303, "step": 5690 }, { "epoch": 0.6570143054914629, "grad_norm": 0.19491511583328247, "learning_rate": 6.351053013715383e-05, "loss": 0.9777, "step": 5695 }, { "epoch": 0.6575911398246423, "grad_norm": 0.19005633890628815, "learning_rate": 6.332312980754025e-05, "loss": 0.9305, "step": 5700 }, { "epoch": 0.6581679741578219, "grad_norm": 0.19769403338432312, "learning_rate": 6.313587818489497e-05, "loss": 0.9505, "step": 5705 }, { "epoch": 0.6587448084910014, "grad_norm": 0.22214584052562714, "learning_rate": 6.294877602843275e-05, "loss": 0.9718, "step": 5710 }, { "epoch": 0.6593216428241809, "grad_norm": 0.18468452990055084, "learning_rate": 6.276182409676234e-05, "loss": 0.9605, "step": 5715 }, { "epoch": 0.6598984771573604, "grad_norm": 0.19918876886367798, "learning_rate": 6.25750231478834e-05, "loss": 0.9539, "step": 5720 }, { "epoch": 0.66047531149054, "grad_norm": 0.18475128710269928, "learning_rate": 6.238837393918341e-05, "loss": 0.9419, "step": 5725 }, { "epoch": 0.6610521458237194, "grad_norm": 0.20297956466674805, "learning_rate": 6.220187722743466e-05, "loss": 0.96, "step": 5730 }, { "epoch": 0.661628980156899, "grad_norm": 0.19551438093185425, "learning_rate": 6.201553376879108e-05, "loss": 0.95, "step": 5735 }, { "epoch": 0.6622058144900784, "grad_norm": 0.20556505024433136, "learning_rate": 6.182934431878526e-05, "loss": 0.9811, "step": 5740 }, { "epoch": 0.662782648823258, "grad_norm": 0.19358351826667786, "learning_rate": 6.164330963232535e-05, "loss": 0.9539, "step": 5745 }, { "epoch": 0.6633594831564374, "grad_norm": 0.18913935124874115, "learning_rate": 6.145743046369205e-05, "loss": 0.9677, "step": 5750 }, { "epoch": 0.663936317489617, "grad_norm": 0.1924053579568863, "learning_rate": 6.127170756653546e-05, "loss": 0.9302, "step": 5755 }, { "epoch": 0.6645131518227965, "grad_norm": 0.1916627734899521, "learning_rate": 6.108614169387215e-05, "loss": 0.9588, "step": 5760 }, { "epoch": 0.665089986155976, "grad_norm": 0.21800926327705383, "learning_rate": 6.090073359808188e-05, "loss": 0.9329, "step": 5765 }, { "epoch": 0.6656668204891555, "grad_norm": 0.19843755662441254, "learning_rate": 6.071548403090488e-05, "loss": 0.9722, "step": 5770 }, { "epoch": 0.666243654822335, "grad_norm": 0.1899397373199463, "learning_rate": 6.053039374343849e-05, "loss": 0.9167, "step": 5775 }, { "epoch": 0.6668204891555145, "grad_norm": 0.19542363286018372, "learning_rate": 6.0345463486134325e-05, "loss": 0.9741, "step": 5780 }, { "epoch": 0.6673973234886941, "grad_norm": 0.1900932788848877, "learning_rate": 6.0160694008795114e-05, "loss": 0.9137, "step": 5785 }, { "epoch": 0.6679741578218735, "grad_norm": 0.192066490650177, "learning_rate": 5.9976086060571765e-05, "loss": 0.947, "step": 5790 }, { "epoch": 0.6685509921550531, "grad_norm": 0.1989891529083252, "learning_rate": 5.979164038996015e-05, "loss": 0.9692, "step": 5795 }, { "epoch": 0.6691278264882325, "grad_norm": 0.18487831950187683, "learning_rate": 5.960735774479826e-05, "loss": 0.9288, "step": 5800 }, { "epoch": 0.6697046608214121, "grad_norm": 0.18360459804534912, "learning_rate": 5.942323887226311e-05, "loss": 0.966, "step": 5805 }, { "epoch": 0.6702814951545916, "grad_norm": 0.1812918782234192, "learning_rate": 5.923928451886767e-05, "loss": 0.907, "step": 5810 }, { "epoch": 0.6708583294877711, "grad_norm": 0.19430682063102722, "learning_rate": 5.905549543045783e-05, "loss": 0.962, "step": 5815 }, { "epoch": 0.6714351638209506, "grad_norm": 0.1931554228067398, "learning_rate": 5.887187235220948e-05, "loss": 0.957, "step": 5820 }, { "epoch": 0.6720119981541302, "grad_norm": 0.19421172142028809, "learning_rate": 5.868841602862541e-05, "loss": 0.9318, "step": 5825 }, { "epoch": 0.6725888324873096, "grad_norm": 0.18080562353134155, "learning_rate": 5.8505127203532216e-05, "loss": 0.9359, "step": 5830 }, { "epoch": 0.6731656668204892, "grad_norm": 0.19878889620304108, "learning_rate": 5.8322006620077426e-05, "loss": 0.9495, "step": 5835 }, { "epoch": 0.6737425011536686, "grad_norm": 0.18784356117248535, "learning_rate": 5.8139055020726494e-05, "loss": 0.9684, "step": 5840 }, { "epoch": 0.6743193354868482, "grad_norm": 0.17742076516151428, "learning_rate": 5.7956273147259645e-05, "loss": 0.9648, "step": 5845 }, { "epoch": 0.6748961698200276, "grad_norm": 0.19306622445583344, "learning_rate": 5.77736617407689e-05, "loss": 0.9388, "step": 5850 }, { "epoch": 0.6754730041532072, "grad_norm": 0.20553186535835266, "learning_rate": 5.7591221541655285e-05, "loss": 0.9764, "step": 5855 }, { "epoch": 0.6760498384863867, "grad_norm": 0.20302440226078033, "learning_rate": 5.74089532896255e-05, "loss": 0.9554, "step": 5860 }, { "epoch": 0.6766266728195662, "grad_norm": 0.19851718842983246, "learning_rate": 5.722685772368912e-05, "loss": 0.9692, "step": 5865 }, { "epoch": 0.6772035071527457, "grad_norm": 0.20243926346302032, "learning_rate": 5.704493558215567e-05, "loss": 0.967, "step": 5870 }, { "epoch": 0.6777803414859253, "grad_norm": 0.19658678770065308, "learning_rate": 5.6863187602631354e-05, "loss": 0.9167, "step": 5875 }, { "epoch": 0.6783571758191047, "grad_norm": 0.18363118171691895, "learning_rate": 5.668161452201639e-05, "loss": 0.9346, "step": 5880 }, { "epoch": 0.6789340101522843, "grad_norm": 0.2019900381565094, "learning_rate": 5.650021707650173e-05, "loss": 0.9602, "step": 5885 }, { "epoch": 0.6795108444854637, "grad_norm": 0.18240347504615784, "learning_rate": 5.6318996001566384e-05, "loss": 0.9663, "step": 5890 }, { "epoch": 0.6800876788186433, "grad_norm": 0.19030514359474182, "learning_rate": 5.613795203197401e-05, "loss": 0.9382, "step": 5895 }, { "epoch": 0.6806645131518227, "grad_norm": 0.18463625013828278, "learning_rate": 5.5957085901770424e-05, "loss": 0.9487, "step": 5900 }, { "epoch": 0.6812413474850023, "grad_norm": 0.18482211232185364, "learning_rate": 5.577639834428026e-05, "loss": 0.96, "step": 5905 }, { "epoch": 0.6818181818181818, "grad_norm": 0.19854898750782013, "learning_rate": 5.559589009210421e-05, "loss": 0.9632, "step": 5910 }, { "epoch": 0.6823950161513613, "grad_norm": 0.19982431828975677, "learning_rate": 5.5415561877115876e-05, "loss": 0.9312, "step": 5915 }, { "epoch": 0.6829718504845408, "grad_norm": 0.19143183529376984, "learning_rate": 5.523541443045904e-05, "loss": 0.9736, "step": 5920 }, { "epoch": 0.6835486848177204, "grad_norm": 0.20476052165031433, "learning_rate": 5.505544848254432e-05, "loss": 0.9658, "step": 5925 }, { "epoch": 0.6841255191508998, "grad_norm": 0.19165083765983582, "learning_rate": 5.4875664763046705e-05, "loss": 0.9917, "step": 5930 }, { "epoch": 0.6847023534840794, "grad_norm": 0.19160091876983643, "learning_rate": 5.4696064000902146e-05, "loss": 0.952, "step": 5935 }, { "epoch": 0.6852791878172588, "grad_norm": 0.22697453200817108, "learning_rate": 5.451664692430493e-05, "loss": 0.959, "step": 5940 }, { "epoch": 0.6858560221504384, "grad_norm": 0.21347078680992126, "learning_rate": 5.433741426070442e-05, "loss": 0.9489, "step": 5945 }, { "epoch": 0.686432856483618, "grad_norm": 0.19163408875465393, "learning_rate": 5.415836673680253e-05, "loss": 0.9566, "step": 5950 }, { "epoch": 0.6870096908167974, "grad_norm": 0.19686076045036316, "learning_rate": 5.3979505078550184e-05, "loss": 0.9389, "step": 5955 }, { "epoch": 0.687586525149977, "grad_norm": 0.1912279576063156, "learning_rate": 5.380083001114503e-05, "loss": 0.9249, "step": 5960 }, { "epoch": 0.6881633594831564, "grad_norm": 0.20188121497631073, "learning_rate": 5.362234225902794e-05, "loss": 0.9683, "step": 5965 }, { "epoch": 0.688740193816336, "grad_norm": 0.20273169875144958, "learning_rate": 5.3444042545880514e-05, "loss": 0.9125, "step": 5970 }, { "epoch": 0.6893170281495155, "grad_norm": 0.2052476704120636, "learning_rate": 5.3265931594621756e-05, "loss": 0.9327, "step": 5975 }, { "epoch": 0.689893862482695, "grad_norm": 0.19370770454406738, "learning_rate": 5.3088010127405496e-05, "loss": 0.9664, "step": 5980 }, { "epoch": 0.6904706968158745, "grad_norm": 0.19764676690101624, "learning_rate": 5.29102788656172e-05, "loss": 0.9241, "step": 5985 }, { "epoch": 0.691047531149054, "grad_norm": 0.2198079228401184, "learning_rate": 5.273273852987113e-05, "loss": 0.9722, "step": 5990 }, { "epoch": 0.6916243654822335, "grad_norm": 0.1917632520198822, "learning_rate": 5.255538984000753e-05, "loss": 0.9572, "step": 5995 }, { "epoch": 0.6922011998154131, "grad_norm": 0.19673052430152893, "learning_rate": 5.237823351508953e-05, "loss": 0.9546, "step": 6000 }, { "epoch": 0.6927780341485925, "grad_norm": 0.18753381073474884, "learning_rate": 5.2201270273400296e-05, "loss": 0.9448, "step": 6005 }, { "epoch": 0.6933548684817721, "grad_norm": 0.18277138471603394, "learning_rate": 5.202450083244026e-05, "loss": 0.9748, "step": 6010 }, { "epoch": 0.6939317028149515, "grad_norm": 0.1970137506723404, "learning_rate": 5.184792590892397e-05, "loss": 0.9961, "step": 6015 }, { "epoch": 0.6945085371481311, "grad_norm": 0.19395023584365845, "learning_rate": 5.167154621877728e-05, "loss": 0.9396, "step": 6020 }, { "epoch": 0.6950853714813106, "grad_norm": 0.1935676783323288, "learning_rate": 5.14953624771346e-05, "loss": 0.9277, "step": 6025 }, { "epoch": 0.6956622058144901, "grad_norm": 0.21159899234771729, "learning_rate": 5.131937539833571e-05, "loss": 0.9511, "step": 6030 }, { "epoch": 0.6962390401476696, "grad_norm": 0.1984950304031372, "learning_rate": 5.1143585695923166e-05, "loss": 0.9859, "step": 6035 }, { "epoch": 0.6968158744808491, "grad_norm": 0.19086718559265137, "learning_rate": 5.09679940826391e-05, "loss": 0.9942, "step": 6040 }, { "epoch": 0.6973927088140286, "grad_norm": 0.197454035282135, "learning_rate": 5.079260127042267e-05, "loss": 0.9919, "step": 6045 }, { "epoch": 0.6979695431472082, "grad_norm": 0.20797502994537354, "learning_rate": 5.061740797040684e-05, "loss": 0.9213, "step": 6050 }, { "epoch": 0.6985463774803876, "grad_norm": 0.18882066011428833, "learning_rate": 5.044241489291569e-05, "loss": 0.9691, "step": 6055 }, { "epoch": 0.6991232118135672, "grad_norm": 0.19341005384922028, "learning_rate": 5.0267622747461487e-05, "loss": 0.9267, "step": 6060 }, { "epoch": 0.6997000461467466, "grad_norm": 0.20529086887836456, "learning_rate": 5.009303224274191e-05, "loss": 0.9937, "step": 6065 }, { "epoch": 0.7002768804799262, "grad_norm": 0.1887139081954956, "learning_rate": 4.991864408663692e-05, "loss": 0.9477, "step": 6070 }, { "epoch": 0.7008537148131057, "grad_norm": 0.19098982214927673, "learning_rate": 4.974445898620622e-05, "loss": 0.9689, "step": 6075 }, { "epoch": 0.7014305491462852, "grad_norm": 0.19821666181087494, "learning_rate": 4.957047764768612e-05, "loss": 0.9333, "step": 6080 }, { "epoch": 0.7020073834794647, "grad_norm": 0.1881197690963745, "learning_rate": 4.939670077648676e-05, "loss": 0.9504, "step": 6085 }, { "epoch": 0.7025842178126442, "grad_norm": 0.19426091015338898, "learning_rate": 4.922312907718929e-05, "loss": 1.0074, "step": 6090 }, { "epoch": 0.7031610521458237, "grad_norm": 0.18885859847068787, "learning_rate": 4.9049763253543054e-05, "loss": 0.9706, "step": 6095 }, { "epoch": 0.7037378864790033, "grad_norm": 0.20289480686187744, "learning_rate": 4.8876604008462554e-05, "loss": 0.9547, "step": 6100 }, { "epoch": 0.7043147208121827, "grad_norm": 0.2023826539516449, "learning_rate": 4.870365204402483e-05, "loss": 1.005, "step": 6105 }, { "epoch": 0.7048915551453623, "grad_norm": 0.18636111915111542, "learning_rate": 4.8530908061466404e-05, "loss": 0.9311, "step": 6110 }, { "epoch": 0.7054683894785417, "grad_norm": 0.18160183727741241, "learning_rate": 4.835837276118058e-05, "loss": 0.9579, "step": 6115 }, { "epoch": 0.7060452238117213, "grad_norm": 0.19551381468772888, "learning_rate": 4.8186046842714504e-05, "loss": 0.8944, "step": 6120 }, { "epoch": 0.7066220581449008, "grad_norm": 0.18871080875396729, "learning_rate": 4.801393100476651e-05, "loss": 1.0176, "step": 6125 }, { "epoch": 0.7071988924780803, "grad_norm": 0.1864621639251709, "learning_rate": 4.784202594518298e-05, "loss": 0.945, "step": 6130 }, { "epoch": 0.7077757268112598, "grad_norm": 0.1970556527376175, "learning_rate": 4.767033236095585e-05, "loss": 0.9587, "step": 6135 }, { "epoch": 0.7083525611444393, "grad_norm": 0.19122757017612457, "learning_rate": 4.749885094821951e-05, "loss": 0.9473, "step": 6140 }, { "epoch": 0.7089293954776188, "grad_norm": 0.1870802640914917, "learning_rate": 4.732758240224818e-05, "loss": 0.965, "step": 6145 }, { "epoch": 0.7095062298107984, "grad_norm": 0.20964084565639496, "learning_rate": 4.715652741745298e-05, "loss": 0.9074, "step": 6150 }, { "epoch": 0.7100830641439778, "grad_norm": 0.1921871155500412, "learning_rate": 4.6985686687379103e-05, "loss": 0.9391, "step": 6155 }, { "epoch": 0.7106598984771574, "grad_norm": 0.205277681350708, "learning_rate": 4.6815060904703046e-05, "loss": 0.9321, "step": 6160 }, { "epoch": 0.7112367328103368, "grad_norm": 0.18848411738872528, "learning_rate": 4.664465076122991e-05, "loss": 0.9028, "step": 6165 }, { "epoch": 0.7118135671435164, "grad_norm": 0.18404066562652588, "learning_rate": 4.647445694789032e-05, "loss": 0.9967, "step": 6170 }, { "epoch": 0.7123904014766959, "grad_norm": 0.18522439897060394, "learning_rate": 4.630448015473794e-05, "loss": 0.9022, "step": 6175 }, { "epoch": 0.7129672358098754, "grad_norm": 0.1914357990026474, "learning_rate": 4.613472107094641e-05, "loss": 0.9313, "step": 6180 }, { "epoch": 0.7135440701430549, "grad_norm": 0.18472833931446075, "learning_rate": 4.596518038480667e-05, "loss": 0.9544, "step": 6185 }, { "epoch": 0.7141209044762344, "grad_norm": 0.1849919557571411, "learning_rate": 4.579585878372428e-05, "loss": 0.9521, "step": 6190 }, { "epoch": 0.7146977388094139, "grad_norm": 0.2014801949262619, "learning_rate": 4.562675695421634e-05, "loss": 0.9397, "step": 6195 }, { "epoch": 0.7152745731425935, "grad_norm": 0.19745898246765137, "learning_rate": 4.545787558190907e-05, "loss": 0.9671, "step": 6200 }, { "epoch": 0.7158514074757729, "grad_norm": 0.2034626305103302, "learning_rate": 4.5289215351534666e-05, "loss": 1.0031, "step": 6205 }, { "epoch": 0.7164282418089525, "grad_norm": 0.19451607763767242, "learning_rate": 4.512077694692888e-05, "loss": 0.924, "step": 6210 }, { "epoch": 0.7170050761421319, "grad_norm": 0.19638855755329132, "learning_rate": 4.495256105102784e-05, "loss": 0.9372, "step": 6215 }, { "epoch": 0.7175819104753115, "grad_norm": 0.18954864144325256, "learning_rate": 4.478456834586574e-05, "loss": 0.9775, "step": 6220 }, { "epoch": 0.718158744808491, "grad_norm": 0.1834801435470581, "learning_rate": 4.4616799512571675e-05, "loss": 0.946, "step": 6225 }, { "epoch": 0.7187355791416705, "grad_norm": 0.2009892612695694, "learning_rate": 4.4449255231367183e-05, "loss": 0.9131, "step": 6230 }, { "epoch": 0.71931241347485, "grad_norm": 0.1912214159965515, "learning_rate": 4.428193618156322e-05, "loss": 0.9248, "step": 6235 }, { "epoch": 0.7198892478080295, "grad_norm": 0.19237951934337616, "learning_rate": 4.411484304155771e-05, "loss": 0.9305, "step": 6240 }, { "epoch": 0.720466082141209, "grad_norm": 0.1901046186685562, "learning_rate": 4.394797648883236e-05, "loss": 0.9259, "step": 6245 }, { "epoch": 0.7210429164743886, "grad_norm": 0.19405438005924225, "learning_rate": 4.378133719995044e-05, "loss": 0.9308, "step": 6250 }, { "epoch": 0.721619750807568, "grad_norm": 0.19073431193828583, "learning_rate": 4.36149258505536e-05, "loss": 0.9325, "step": 6255 }, { "epoch": 0.7221965851407476, "grad_norm": 0.18729381263256073, "learning_rate": 4.344874311535944e-05, "loss": 0.9674, "step": 6260 }, { "epoch": 0.722773419473927, "grad_norm": 0.19104944169521332, "learning_rate": 4.3282789668158476e-05, "loss": 0.9328, "step": 6265 }, { "epoch": 0.7233502538071066, "grad_norm": 0.20195722579956055, "learning_rate": 4.31170661818118e-05, "loss": 0.9785, "step": 6270 }, { "epoch": 0.7239270881402861, "grad_norm": 0.18547381460666656, "learning_rate": 4.295157332824785e-05, "loss": 0.9534, "step": 6275 }, { "epoch": 0.7245039224734656, "grad_norm": 0.18389153480529785, "learning_rate": 4.27863117784602e-05, "loss": 0.9408, "step": 6280 }, { "epoch": 0.7250807568066451, "grad_norm": 0.1920400708913803, "learning_rate": 4.262128220250441e-05, "loss": 0.9413, "step": 6285 }, { "epoch": 0.7256575911398246, "grad_norm": 0.18754833936691284, "learning_rate": 4.245648526949567e-05, "loss": 0.9961, "step": 6290 }, { "epoch": 0.7262344254730042, "grad_norm": 0.19839036464691162, "learning_rate": 4.229192164760576e-05, "loss": 0.961, "step": 6295 }, { "epoch": 0.7268112598061837, "grad_norm": 0.20525625348091125, "learning_rate": 4.212759200406065e-05, "loss": 0.9767, "step": 6300 }, { "epoch": 0.7273880941393632, "grad_norm": 0.20254795253276825, "learning_rate": 4.1963497005137516e-05, "loss": 0.9062, "step": 6305 }, { "epoch": 0.7279649284725427, "grad_norm": 0.20592284202575684, "learning_rate": 4.179963731616221e-05, "loss": 0.9707, "step": 6310 }, { "epoch": 0.7285417628057222, "grad_norm": 0.1987779587507248, "learning_rate": 4.163601360150646e-05, "loss": 0.9244, "step": 6315 }, { "epoch": 0.7291185971389017, "grad_norm": 0.1847141832113266, "learning_rate": 4.147262652458539e-05, "loss": 0.9771, "step": 6320 }, { "epoch": 0.7296954314720813, "grad_norm": 0.18493050336837769, "learning_rate": 4.130947674785447e-05, "loss": 0.9005, "step": 6325 }, { "epoch": 0.7302722658052607, "grad_norm": 0.18646039068698883, "learning_rate": 4.114656493280721e-05, "loss": 0.938, "step": 6330 }, { "epoch": 0.7308491001384403, "grad_norm": 0.2044810801744461, "learning_rate": 4.098389173997218e-05, "loss": 0.9636, "step": 6335 }, { "epoch": 0.7314259344716197, "grad_norm": 0.20317162573337555, "learning_rate": 4.082145782891046e-05, "loss": 0.9499, "step": 6340 }, { "epoch": 0.7320027688047993, "grad_norm": 0.19699914753437042, "learning_rate": 4.065926385821307e-05, "loss": 0.9391, "step": 6345 }, { "epoch": 0.7325796031379788, "grad_norm": 0.18906693160533905, "learning_rate": 4.049731048549804e-05, "loss": 0.9166, "step": 6350 }, { "epoch": 0.7331564374711583, "grad_norm": 0.20645971596240997, "learning_rate": 4.033559836740801e-05, "loss": 0.9581, "step": 6355 }, { "epoch": 0.7337332718043378, "grad_norm": 0.18750208616256714, "learning_rate": 4.017412815960735e-05, "loss": 0.9418, "step": 6360 }, { "epoch": 0.7343101061375173, "grad_norm": 0.18464316427707672, "learning_rate": 4.001290051677975e-05, "loss": 0.9937, "step": 6365 }, { "epoch": 0.7348869404706968, "grad_norm": 0.19745340943336487, "learning_rate": 3.985191609262519e-05, "loss": 0.9188, "step": 6370 }, { "epoch": 0.7354637748038764, "grad_norm": 0.2214672714471817, "learning_rate": 3.969117553985772e-05, "loss": 0.9521, "step": 6375 }, { "epoch": 0.7360406091370558, "grad_norm": 0.1932508647441864, "learning_rate": 3.9530679510202476e-05, "loss": 0.9164, "step": 6380 }, { "epoch": 0.7366174434702354, "grad_norm": 0.1878437101840973, "learning_rate": 3.9370428654393296e-05, "loss": 0.9119, "step": 6385 }, { "epoch": 0.7371942778034148, "grad_norm": 0.19011224806308746, "learning_rate": 3.921042362216983e-05, "loss": 0.9232, "step": 6390 }, { "epoch": 0.7377711121365944, "grad_norm": 0.17758631706237793, "learning_rate": 3.905066506227515e-05, "loss": 0.937, "step": 6395 }, { "epoch": 0.7383479464697739, "grad_norm": 0.19590388238430023, "learning_rate": 3.8891153622452904e-05, "loss": 0.8993, "step": 6400 }, { "epoch": 0.7389247808029534, "grad_norm": 0.1892813891172409, "learning_rate": 3.873188994944483e-05, "loss": 0.9759, "step": 6405 }, { "epoch": 0.7395016151361329, "grad_norm": 0.19297149777412415, "learning_rate": 3.857287468898806e-05, "loss": 0.9918, "step": 6410 }, { "epoch": 0.7400784494693124, "grad_norm": 0.1931808739900589, "learning_rate": 3.8414108485812613e-05, "loss": 0.9332, "step": 6415 }, { "epoch": 0.7406552838024919, "grad_norm": 0.18963101506233215, "learning_rate": 3.825559198363861e-05, "loss": 0.9274, "step": 6420 }, { "epoch": 0.7412321181356715, "grad_norm": 0.18943539261817932, "learning_rate": 3.8097325825173826e-05, "loss": 0.9751, "step": 6425 }, { "epoch": 0.7418089524688509, "grad_norm": 0.1900118887424469, "learning_rate": 3.793931065211096e-05, "loss": 0.9527, "step": 6430 }, { "epoch": 0.7423857868020305, "grad_norm": 0.1904015690088272, "learning_rate": 3.778154710512512e-05, "loss": 0.9884, "step": 6435 }, { "epoch": 0.7429626211352099, "grad_norm": 0.19013242423534393, "learning_rate": 3.762403582387114e-05, "loss": 0.9467, "step": 6440 }, { "epoch": 0.7435394554683895, "grad_norm": 0.18729767203330994, "learning_rate": 3.746677744698114e-05, "loss": 0.9803, "step": 6445 }, { "epoch": 0.744116289801569, "grad_norm": 0.20547720789909363, "learning_rate": 3.730977261206171e-05, "loss": 0.9168, "step": 6450 }, { "epoch": 0.7446931241347485, "grad_norm": 0.20597174763679504, "learning_rate": 3.715302195569159e-05, "loss": 0.9868, "step": 6455 }, { "epoch": 0.745269958467928, "grad_norm": 0.1934199035167694, "learning_rate": 3.69965261134188e-05, "loss": 1.036, "step": 6460 }, { "epoch": 0.7458467928011075, "grad_norm": 0.19105862081050873, "learning_rate": 3.684028571975836e-05, "loss": 0.9528, "step": 6465 }, { "epoch": 0.746423627134287, "grad_norm": 0.19618894159793854, "learning_rate": 3.6684301408189406e-05, "loss": 1.0373, "step": 6470 }, { "epoch": 0.7470004614674666, "grad_norm": 0.18884247541427612, "learning_rate": 3.652857381115293e-05, "loss": 0.9981, "step": 6475 }, { "epoch": 0.747577295800646, "grad_norm": 0.1874423772096634, "learning_rate": 3.637310356004897e-05, "loss": 0.964, "step": 6480 }, { "epoch": 0.7481541301338256, "grad_norm": 0.19521760940551758, "learning_rate": 3.6217891285234265e-05, "loss": 0.9497, "step": 6485 }, { "epoch": 0.748730964467005, "grad_norm": 0.19494710862636566, "learning_rate": 3.6062937616019433e-05, "loss": 0.9687, "step": 6490 }, { "epoch": 0.7493077988001846, "grad_norm": 0.18842960894107819, "learning_rate": 3.5908243180666734e-05, "loss": 0.9522, "step": 6495 }, { "epoch": 0.7498846331333641, "grad_norm": 0.19816794991493225, "learning_rate": 3.5753808606387226e-05, "loss": 0.9548, "step": 6500 }, { "epoch": 0.7504614674665436, "grad_norm": 0.19967734813690186, "learning_rate": 3.55996345193384e-05, "loss": 1.0035, "step": 6505 }, { "epoch": 0.7510383017997231, "grad_norm": 0.19166813790798187, "learning_rate": 3.544572154462165e-05, "loss": 0.9374, "step": 6510 }, { "epoch": 0.7516151361329027, "grad_norm": 0.2017005980014801, "learning_rate": 3.529207030627959e-05, "loss": 1.0029, "step": 6515 }, { "epoch": 0.7521919704660821, "grad_norm": 0.19286920130252838, "learning_rate": 3.513868142729373e-05, "loss": 0.9457, "step": 6520 }, { "epoch": 0.7527688047992617, "grad_norm": 0.20132075250148773, "learning_rate": 3.498555552958176e-05, "loss": 0.9899, "step": 6525 }, { "epoch": 0.7533456391324411, "grad_norm": 0.22670696675777435, "learning_rate": 3.483269323399512e-05, "loss": 0.9568, "step": 6530 }, { "epoch": 0.7539224734656207, "grad_norm": 0.1912250965833664, "learning_rate": 3.468009516031644e-05, "loss": 0.9871, "step": 6535 }, { "epoch": 0.7544993077988001, "grad_norm": 0.1899501532316208, "learning_rate": 3.452776192725717e-05, "loss": 0.9634, "step": 6540 }, { "epoch": 0.7550761421319797, "grad_norm": 0.1889965683221817, "learning_rate": 3.437569415245483e-05, "loss": 0.9733, "step": 6545 }, { "epoch": 0.7556529764651592, "grad_norm": 0.19462092220783234, "learning_rate": 3.422389245247075e-05, "loss": 0.951, "step": 6550 }, { "epoch": 0.7562298107983387, "grad_norm": 0.1985846608877182, "learning_rate": 3.407235744278734e-05, "loss": 0.9462, "step": 6555 }, { "epoch": 0.7568066451315182, "grad_norm": 0.187138170003891, "learning_rate": 3.3921089737805866e-05, "loss": 0.9234, "step": 6560 }, { "epoch": 0.7573834794646978, "grad_norm": 0.188985213637352, "learning_rate": 3.3770089950843564e-05, "loss": 0.9074, "step": 6565 }, { "epoch": 0.7579603137978772, "grad_norm": 0.20506353676319122, "learning_rate": 3.361935869413163e-05, "loss": 0.9758, "step": 6570 }, { "epoch": 0.7585371481310568, "grad_norm": 0.18956059217453003, "learning_rate": 3.3468896578812344e-05, "loss": 0.9461, "step": 6575 }, { "epoch": 0.7591139824642362, "grad_norm": 0.2013210505247116, "learning_rate": 3.331870421493688e-05, "loss": 0.9575, "step": 6580 }, { "epoch": 0.7596908167974158, "grad_norm": 0.19527879357337952, "learning_rate": 3.316878221146253e-05, "loss": 0.9456, "step": 6585 }, { "epoch": 0.7602676511305952, "grad_norm": 0.1831914484500885, "learning_rate": 3.301913117625065e-05, "loss": 0.9144, "step": 6590 }, { "epoch": 0.7608444854637748, "grad_norm": 0.18544113636016846, "learning_rate": 3.286975171606362e-05, "loss": 0.9525, "step": 6595 }, { "epoch": 0.7614213197969543, "grad_norm": 0.20716486871242523, "learning_rate": 3.272064443656303e-05, "loss": 0.9571, "step": 6600 }, { "epoch": 0.7619981541301338, "grad_norm": 0.18836161494255066, "learning_rate": 3.257180994230671e-05, "loss": 0.9857, "step": 6605 }, { "epoch": 0.7625749884633133, "grad_norm": 0.19700607657432556, "learning_rate": 3.2423248836746575e-05, "loss": 0.9054, "step": 6610 }, { "epoch": 0.7631518227964929, "grad_norm": 0.18617579340934753, "learning_rate": 3.227496172222603e-05, "loss": 0.9512, "step": 6615 }, { "epoch": 0.7637286571296723, "grad_norm": 0.18982037901878357, "learning_rate": 3.212694919997764e-05, "loss": 0.9649, "step": 6620 }, { "epoch": 0.7643054914628519, "grad_norm": 0.19119331240653992, "learning_rate": 3.197921187012055e-05, "loss": 0.9482, "step": 6625 }, { "epoch": 0.7648823257960313, "grad_norm": 0.2058209329843521, "learning_rate": 3.1831750331658196e-05, "loss": 0.9741, "step": 6630 }, { "epoch": 0.7654591601292109, "grad_norm": 0.19063866138458252, "learning_rate": 3.168456518247575e-05, "loss": 0.9556, "step": 6635 }, { "epoch": 0.7660359944623903, "grad_norm": 0.1826866716146469, "learning_rate": 3.153765701933784e-05, "loss": 0.9362, "step": 6640 }, { "epoch": 0.7666128287955699, "grad_norm": 0.20662148296833038, "learning_rate": 3.1391026437885984e-05, "loss": 0.9941, "step": 6645 }, { "epoch": 0.7671896631287495, "grad_norm": 0.19351264834403992, "learning_rate": 3.12446740326363e-05, "loss": 0.9544, "step": 6650 }, { "epoch": 0.7677664974619289, "grad_norm": 0.19532477855682373, "learning_rate": 3.109860039697699e-05, "loss": 1.0098, "step": 6655 }, { "epoch": 0.7683433317951085, "grad_norm": 0.19506360590457916, "learning_rate": 3.0952806123165945e-05, "loss": 0.9201, "step": 6660 }, { "epoch": 0.768920166128288, "grad_norm": 0.19203807413578033, "learning_rate": 3.0807291802328494e-05, "loss": 0.9102, "step": 6665 }, { "epoch": 0.7694970004614675, "grad_norm": 0.19363170862197876, "learning_rate": 3.066205802445477e-05, "loss": 0.9485, "step": 6670 }, { "epoch": 0.770073834794647, "grad_norm": 0.19659185409545898, "learning_rate": 3.0517105378397536e-05, "loss": 0.9946, "step": 6675 }, { "epoch": 0.7706506691278265, "grad_norm": 0.20665878057479858, "learning_rate": 3.037243445186965e-05, "loss": 0.9688, "step": 6680 }, { "epoch": 0.771227503461006, "grad_norm": 0.1891080141067505, "learning_rate": 3.0228045831441733e-05, "loss": 0.9905, "step": 6685 }, { "epoch": 0.7718043377941856, "grad_norm": 0.19424353539943695, "learning_rate": 3.0083940102539763e-05, "loss": 0.9682, "step": 6690 }, { "epoch": 0.772381172127365, "grad_norm": 0.1884242296218872, "learning_rate": 2.994011784944284e-05, "loss": 1.0159, "step": 6695 }, { "epoch": 0.7729580064605446, "grad_norm": 0.191155344247818, "learning_rate": 2.9796579655280576e-05, "loss": 0.9149, "step": 6700 }, { "epoch": 0.773534840793724, "grad_norm": 0.20115402340888977, "learning_rate": 2.9653326102030964e-05, "loss": 0.9818, "step": 6705 }, { "epoch": 0.7741116751269036, "grad_norm": 0.19099047780036926, "learning_rate": 2.9510357770517825e-05, "loss": 1.0363, "step": 6710 }, { "epoch": 0.774688509460083, "grad_norm": 0.18843354284763336, "learning_rate": 2.9367675240408654e-05, "loss": 0.9309, "step": 6715 }, { "epoch": 0.7752653437932626, "grad_norm": 0.19343096017837524, "learning_rate": 2.9225279090212067e-05, "loss": 0.9773, "step": 6720 }, { "epoch": 0.7758421781264421, "grad_norm": 0.19770221412181854, "learning_rate": 2.9083169897275552e-05, "loss": 0.9453, "step": 6725 }, { "epoch": 0.7764190124596216, "grad_norm": 0.1835956573486328, "learning_rate": 2.894134823778315e-05, "loss": 0.9375, "step": 6730 }, { "epoch": 0.7769958467928011, "grad_norm": 0.18945716321468353, "learning_rate": 2.8799814686753134e-05, "loss": 0.9738, "step": 6735 }, { "epoch": 0.7775726811259807, "grad_norm": 0.18679681420326233, "learning_rate": 2.8658569818035542e-05, "loss": 0.9195, "step": 6740 }, { "epoch": 0.7781495154591601, "grad_norm": 0.19722817838191986, "learning_rate": 2.851761420431006e-05, "loss": 0.9791, "step": 6745 }, { "epoch": 0.7787263497923397, "grad_norm": 0.1932711899280548, "learning_rate": 2.8376948417083483e-05, "loss": 0.9314, "step": 6750 }, { "epoch": 0.7793031841255191, "grad_norm": 0.2086237072944641, "learning_rate": 2.823657302668755e-05, "loss": 0.9436, "step": 6755 }, { "epoch": 0.7798800184586987, "grad_norm": 0.18653713166713715, "learning_rate": 2.8096488602276528e-05, "loss": 0.9361, "step": 6760 }, { "epoch": 0.7804568527918782, "grad_norm": 0.1907120645046234, "learning_rate": 2.7956695711825075e-05, "loss": 0.9439, "step": 6765 }, { "epoch": 0.7810336871250577, "grad_norm": 0.18436580896377563, "learning_rate": 2.7817194922125666e-05, "loss": 0.9436, "step": 6770 }, { "epoch": 0.7816105214582372, "grad_norm": 0.18907365202903748, "learning_rate": 2.7677986798786615e-05, "loss": 0.9657, "step": 6775 }, { "epoch": 0.7821873557914167, "grad_norm": 0.19230635464191437, "learning_rate": 2.753907190622944e-05, "loss": 0.9517, "step": 6780 }, { "epoch": 0.7827641901245962, "grad_norm": 0.19662320613861084, "learning_rate": 2.7400450807686938e-05, "loss": 0.9558, "step": 6785 }, { "epoch": 0.7833410244577758, "grad_norm": 0.18257470428943634, "learning_rate": 2.726212406520051e-05, "loss": 0.9132, "step": 6790 }, { "epoch": 0.7839178587909552, "grad_norm": 0.19373179972171783, "learning_rate": 2.712409223961826e-05, "loss": 0.9496, "step": 6795 }, { "epoch": 0.7844946931241348, "grad_norm": 0.19530361890792847, "learning_rate": 2.698635589059242e-05, "loss": 0.9682, "step": 6800 }, { "epoch": 0.7850715274573142, "grad_norm": 0.18772435188293457, "learning_rate": 2.6848915576577317e-05, "loss": 0.9552, "step": 6805 }, { "epoch": 0.7856483617904938, "grad_norm": 0.19148732721805573, "learning_rate": 2.6711771854826905e-05, "loss": 0.9047, "step": 6810 }, { "epoch": 0.7862251961236733, "grad_norm": 0.19749705493450165, "learning_rate": 2.657492528139268e-05, "loss": 0.9442, "step": 6815 }, { "epoch": 0.7868020304568528, "grad_norm": 0.18885494768619537, "learning_rate": 2.643837641112128e-05, "loss": 0.9223, "step": 6820 }, { "epoch": 0.7873788647900323, "grad_norm": 0.19046206772327423, "learning_rate": 2.630212579765231e-05, "loss": 0.9801, "step": 6825 }, { "epoch": 0.7879556991232118, "grad_norm": 0.19383732974529266, "learning_rate": 2.6166173993416154e-05, "loss": 1.0104, "step": 6830 }, { "epoch": 0.7885325334563913, "grad_norm": 0.20124277472496033, "learning_rate": 2.603052154963158e-05, "loss": 1.008, "step": 6835 }, { "epoch": 0.7891093677895709, "grad_norm": 0.20482531189918518, "learning_rate": 2.5895169016303623e-05, "loss": 0.8936, "step": 6840 }, { "epoch": 0.7896862021227503, "grad_norm": 0.1819208562374115, "learning_rate": 2.576011694222139e-05, "loss": 0.9452, "step": 6845 }, { "epoch": 0.7902630364559299, "grad_norm": 0.18517275154590607, "learning_rate": 2.5625365874955674e-05, "loss": 0.8879, "step": 6850 }, { "epoch": 0.7908398707891093, "grad_norm": 0.18698491156101227, "learning_rate": 2.5490916360856853e-05, "loss": 0.929, "step": 6855 }, { "epoch": 0.7914167051222889, "grad_norm": 0.18061292171478271, "learning_rate": 2.5356768945052745e-05, "loss": 0.9385, "step": 6860 }, { "epoch": 0.7919935394554684, "grad_norm": 0.1996062994003296, "learning_rate": 2.522292417144617e-05, "loss": 0.9852, "step": 6865 }, { "epoch": 0.7925703737886479, "grad_norm": 0.1814109832048416, "learning_rate": 2.5089382582712994e-05, "loss": 0.9543, "step": 6870 }, { "epoch": 0.7931472081218274, "grad_norm": 0.19972620904445648, "learning_rate": 2.4956144720299712e-05, "loss": 0.9139, "step": 6875 }, { "epoch": 0.7937240424550069, "grad_norm": 0.19318068027496338, "learning_rate": 2.482321112442151e-05, "loss": 1.0012, "step": 6880 }, { "epoch": 0.7943008767881864, "grad_norm": 0.18499110639095306, "learning_rate": 2.4690582334059685e-05, "loss": 0.9307, "step": 6885 }, { "epoch": 0.794877711121366, "grad_norm": 0.1849159598350525, "learning_rate": 2.455825888695994e-05, "loss": 0.9354, "step": 6890 }, { "epoch": 0.7954545454545454, "grad_norm": 0.1951223909854889, "learning_rate": 2.4426241319629772e-05, "loss": 0.9596, "step": 6895 }, { "epoch": 0.796031379787725, "grad_norm": 0.19276456534862518, "learning_rate": 2.4294530167336615e-05, "loss": 0.9273, "step": 6900 }, { "epoch": 0.7966082141209044, "grad_norm": 0.1889081597328186, "learning_rate": 2.4163125964105448e-05, "loss": 0.979, "step": 6905 }, { "epoch": 0.797185048454084, "grad_norm": 0.19000983238220215, "learning_rate": 2.4032029242716826e-05, "loss": 0.9286, "step": 6910 }, { "epoch": 0.7977618827872635, "grad_norm": 0.19312219321727753, "learning_rate": 2.390124053470443e-05, "loss": 0.9655, "step": 6915 }, { "epoch": 0.798338717120443, "grad_norm": 0.19184233248233795, "learning_rate": 2.3770760370353294e-05, "loss": 0.964, "step": 6920 }, { "epoch": 0.7989155514536225, "grad_norm": 0.19048276543617249, "learning_rate": 2.364058927869732e-05, "loss": 0.9578, "step": 6925 }, { "epoch": 0.799492385786802, "grad_norm": 0.19102397561073303, "learning_rate": 2.3510727787517382e-05, "loss": 0.9342, "step": 6930 }, { "epoch": 0.8000692201199815, "grad_norm": 0.2003382444381714, "learning_rate": 2.3381176423338956e-05, "loss": 0.9429, "step": 6935 }, { "epoch": 0.8006460544531611, "grad_norm": 0.1823520064353943, "learning_rate": 2.325193571143024e-05, "loss": 0.9617, "step": 6940 }, { "epoch": 0.8012228887863405, "grad_norm": 0.18638677895069122, "learning_rate": 2.31230061757997e-05, "loss": 0.9731, "step": 6945 }, { "epoch": 0.8017997231195201, "grad_norm": 0.19570128619670868, "learning_rate": 2.299438833919432e-05, "loss": 0.9684, "step": 6950 }, { "epoch": 0.8023765574526995, "grad_norm": 0.18794956803321838, "learning_rate": 2.286608272309716e-05, "loss": 0.9115, "step": 6955 }, { "epoch": 0.8029533917858791, "grad_norm": 0.20243585109710693, "learning_rate": 2.2738089847725497e-05, "loss": 0.9751, "step": 6960 }, { "epoch": 0.8035302261190586, "grad_norm": 0.18690462410449982, "learning_rate": 2.2610410232028467e-05, "loss": 0.9299, "step": 6965 }, { "epoch": 0.8041070604522381, "grad_norm": 0.1870652735233307, "learning_rate": 2.2483044393685215e-05, "loss": 0.9366, "step": 6970 }, { "epoch": 0.8046838947854176, "grad_norm": 0.2046535462141037, "learning_rate": 2.235599284910258e-05, "loss": 0.9911, "step": 6975 }, { "epoch": 0.8052607291185971, "grad_norm": 0.18992750346660614, "learning_rate": 2.2229256113413087e-05, "loss": 0.9751, "step": 6980 }, { "epoch": 0.8058375634517766, "grad_norm": 0.19411560893058777, "learning_rate": 2.210283470047296e-05, "loss": 0.937, "step": 6985 }, { "epoch": 0.8064143977849562, "grad_norm": 0.1944170445203781, "learning_rate": 2.1976729122859864e-05, "loss": 0.9326, "step": 6990 }, { "epoch": 0.8069912321181357, "grad_norm": 0.18680644035339355, "learning_rate": 2.185093989187087e-05, "loss": 0.936, "step": 6995 }, { "epoch": 0.8075680664513152, "grad_norm": 0.18400752544403076, "learning_rate": 2.1725467517520526e-05, "loss": 0.9861, "step": 7000 }, { "epoch": 0.8081449007844947, "grad_norm": 0.19621169567108154, "learning_rate": 2.1600312508538602e-05, "loss": 0.9284, "step": 7005 }, { "epoch": 0.8087217351176742, "grad_norm": 0.18958927690982819, "learning_rate": 2.1475475372368094e-05, "loss": 0.9258, "step": 7010 }, { "epoch": 0.8092985694508538, "grad_norm": 0.20696218311786652, "learning_rate": 2.1350956615163254e-05, "loss": 0.9426, "step": 7015 }, { "epoch": 0.8098754037840332, "grad_norm": 0.20199459791183472, "learning_rate": 2.1226756741787356e-05, "loss": 0.9625, "step": 7020 }, { "epoch": 0.8104522381172128, "grad_norm": 0.1961347460746765, "learning_rate": 2.1102876255810887e-05, "loss": 0.9664, "step": 7025 }, { "epoch": 0.8110290724503922, "grad_norm": 0.19894124567508698, "learning_rate": 2.0979315659509223e-05, "loss": 0.9508, "step": 7030 }, { "epoch": 0.8116059067835718, "grad_norm": 0.1998283416032791, "learning_rate": 2.085607545386088e-05, "loss": 0.9117, "step": 7035 }, { "epoch": 0.8121827411167513, "grad_norm": 0.18765446543693542, "learning_rate": 2.0733156138545252e-05, "loss": 0.9908, "step": 7040 }, { "epoch": 0.8127595754499308, "grad_norm": 0.18497121334075928, "learning_rate": 2.0610558211940702e-05, "loss": 0.9453, "step": 7045 }, { "epoch": 0.8133364097831103, "grad_norm": 0.20545774698257446, "learning_rate": 2.0488282171122498e-05, "loss": 1.0136, "step": 7050 }, { "epoch": 0.8139132441162898, "grad_norm": 0.18799108266830444, "learning_rate": 2.036632851186091e-05, "loss": 0.9834, "step": 7055 }, { "epoch": 0.8144900784494693, "grad_norm": 0.17781130969524384, "learning_rate": 2.0244697728618966e-05, "loss": 0.906, "step": 7060 }, { "epoch": 0.8150669127826489, "grad_norm": 0.1909274309873581, "learning_rate": 2.0123390314550717e-05, "loss": 0.9443, "step": 7065 }, { "epoch": 0.8156437471158283, "grad_norm": 0.1991771012544632, "learning_rate": 2.000240676149904e-05, "loss": 0.9451, "step": 7070 }, { "epoch": 0.8162205814490079, "grad_norm": 0.18566997349262238, "learning_rate": 1.9881747559993703e-05, "loss": 0.9015, "step": 7075 }, { "epoch": 0.8167974157821873, "grad_norm": 0.19024524092674255, "learning_rate": 1.976141319924939e-05, "loss": 0.9458, "step": 7080 }, { "epoch": 0.8173742501153669, "grad_norm": 0.19629044830799103, "learning_rate": 1.964140416716379e-05, "loss": 0.9608, "step": 7085 }, { "epoch": 0.8179510844485464, "grad_norm": 0.185940220952034, "learning_rate": 1.9521720950315403e-05, "loss": 0.8914, "step": 7090 }, { "epoch": 0.8185279187817259, "grad_norm": 0.18228422105312347, "learning_rate": 1.940236403396186e-05, "loss": 0.9645, "step": 7095 }, { "epoch": 0.8191047531149054, "grad_norm": 0.18275770545005798, "learning_rate": 1.9283333902037694e-05, "loss": 0.9554, "step": 7100 }, { "epoch": 0.819681587448085, "grad_norm": 0.19047953188419342, "learning_rate": 1.9164631037152513e-05, "loss": 0.9352, "step": 7105 }, { "epoch": 0.8202584217812644, "grad_norm": 0.211176335811615, "learning_rate": 1.9046255920588985e-05, "loss": 0.9209, "step": 7110 }, { "epoch": 0.820835256114444, "grad_norm": 0.19080542027950287, "learning_rate": 1.8928209032301013e-05, "loss": 0.9404, "step": 7115 }, { "epoch": 0.8214120904476234, "grad_norm": 0.20228640735149384, "learning_rate": 1.8810490850911577e-05, "loss": 0.9741, "step": 7120 }, { "epoch": 0.821988924780803, "grad_norm": 0.19433507323265076, "learning_rate": 1.8693101853711004e-05, "loss": 0.9596, "step": 7125 }, { "epoch": 0.8225657591139824, "grad_norm": 0.20206230878829956, "learning_rate": 1.857604251665487e-05, "loss": 0.9728, "step": 7130 }, { "epoch": 0.823142593447162, "grad_norm": 0.1768370121717453, "learning_rate": 1.845931331436219e-05, "loss": 0.9523, "step": 7135 }, { "epoch": 0.8237194277803415, "grad_norm": 0.19058267772197723, "learning_rate": 1.8342914720113404e-05, "loss": 0.982, "step": 7140 }, { "epoch": 0.824296262113521, "grad_norm": 0.1983305811882019, "learning_rate": 1.822684720584852e-05, "loss": 1.0006, "step": 7145 }, { "epoch": 0.8248730964467005, "grad_norm": 0.19151514768600464, "learning_rate": 1.8111111242165124e-05, "loss": 1.0013, "step": 7150 }, { "epoch": 0.82544993077988, "grad_norm": 0.1844472587108612, "learning_rate": 1.7995707298316632e-05, "loss": 0.9621, "step": 7155 }, { "epoch": 0.8260267651130595, "grad_norm": 0.1923513561487198, "learning_rate": 1.788063584221017e-05, "loss": 0.8956, "step": 7160 }, { "epoch": 0.8266035994462391, "grad_norm": 0.1910182386636734, "learning_rate": 1.776589734040487e-05, "loss": 0.9845, "step": 7165 }, { "epoch": 0.8271804337794185, "grad_norm": 0.1840658187866211, "learning_rate": 1.7651492258109835e-05, "loss": 0.9264, "step": 7170 }, { "epoch": 0.8277572681125981, "grad_norm": 0.21792177855968475, "learning_rate": 1.7537421059182314e-05, "loss": 0.9866, "step": 7175 }, { "epoch": 0.8283341024457775, "grad_norm": 0.19659163057804108, "learning_rate": 1.74236842061259e-05, "loss": 0.9331, "step": 7180 }, { "epoch": 0.8289109367789571, "grad_norm": 0.1791463941335678, "learning_rate": 1.7310282160088465e-05, "loss": 0.9154, "step": 7185 }, { "epoch": 0.8294877711121366, "grad_norm": 0.19773773849010468, "learning_rate": 1.7197215380860497e-05, "loss": 0.9588, "step": 7190 }, { "epoch": 0.8300646054453161, "grad_norm": 0.2065158188343048, "learning_rate": 1.7084484326873062e-05, "loss": 0.9579, "step": 7195 }, { "epoch": 0.8306414397784956, "grad_norm": 0.19158004224300385, "learning_rate": 1.6972089455196115e-05, "loss": 0.9358, "step": 7200 }, { "epoch": 0.8312182741116751, "grad_norm": 0.2142983227968216, "learning_rate": 1.6860031221536398e-05, "loss": 0.9572, "step": 7205 }, { "epoch": 0.8317951084448546, "grad_norm": 0.1989503800868988, "learning_rate": 1.674831008023594e-05, "loss": 0.9698, "step": 7210 }, { "epoch": 0.8323719427780342, "grad_norm": 0.19422230124473572, "learning_rate": 1.6636926484269855e-05, "loss": 0.936, "step": 7215 }, { "epoch": 0.8329487771112136, "grad_norm": 0.22429165244102478, "learning_rate": 1.6525880885244815e-05, "loss": 0.9515, "step": 7220 }, { "epoch": 0.8335256114443932, "grad_norm": 0.19105808436870575, "learning_rate": 1.641517373339696e-05, "loss": 0.9732, "step": 7225 }, { "epoch": 0.8341024457775726, "grad_norm": 0.20830240845680237, "learning_rate": 1.6304805477590312e-05, "loss": 0.9794, "step": 7230 }, { "epoch": 0.8346792801107522, "grad_norm": 0.20146532356739044, "learning_rate": 1.6194776565314672e-05, "loss": 0.9492, "step": 7235 }, { "epoch": 0.8352561144439317, "grad_norm": 0.18800169229507446, "learning_rate": 1.6085087442684122e-05, "loss": 0.92, "step": 7240 }, { "epoch": 0.8358329487771112, "grad_norm": 0.19800886511802673, "learning_rate": 1.597573855443497e-05, "loss": 0.9912, "step": 7245 }, { "epoch": 0.8364097831102907, "grad_norm": 0.18179026246070862, "learning_rate": 1.5866730343924085e-05, "loss": 0.9121, "step": 7250 }, { "epoch": 0.8369866174434702, "grad_norm": 0.19229067862033844, "learning_rate": 1.575806325312702e-05, "loss": 0.9529, "step": 7255 }, { "epoch": 0.8375634517766497, "grad_norm": 0.1789880245923996, "learning_rate": 1.5649737722636315e-05, "loss": 0.926, "step": 7260 }, { "epoch": 0.8381402861098293, "grad_norm": 0.1741928905248642, "learning_rate": 1.554175419165951e-05, "loss": 0.9237, "step": 7265 }, { "epoch": 0.8387171204430087, "grad_norm": 0.20456916093826294, "learning_rate": 1.5434113098017667e-05, "loss": 0.9821, "step": 7270 }, { "epoch": 0.8392939547761883, "grad_norm": 0.1989426463842392, "learning_rate": 1.5326814878143304e-05, "loss": 0.9187, "step": 7275 }, { "epoch": 0.8398707891093677, "grad_norm": 0.18545162677764893, "learning_rate": 1.5219859967078854e-05, "loss": 0.945, "step": 7280 }, { "epoch": 0.8404476234425473, "grad_norm": 0.17884254455566406, "learning_rate": 1.5113248798474689e-05, "loss": 0.9181, "step": 7285 }, { "epoch": 0.8410244577757268, "grad_norm": 0.18553291261196136, "learning_rate": 1.5006981804587595e-05, "loss": 0.9737, "step": 7290 }, { "epoch": 0.8416012921089063, "grad_norm": 0.1832725703716278, "learning_rate": 1.4901059416278806e-05, "loss": 0.9121, "step": 7295 }, { "epoch": 0.8421781264420858, "grad_norm": 0.18451160192489624, "learning_rate": 1.4795482063012367e-05, "loss": 0.9595, "step": 7300 }, { "epoch": 0.8427549607752653, "grad_norm": 0.1967657506465912, "learning_rate": 1.4690250172853348e-05, "loss": 0.9664, "step": 7305 }, { "epoch": 0.8433317951084448, "grad_norm": 0.1973174512386322, "learning_rate": 1.4585364172466231e-05, "loss": 0.9763, "step": 7310 }, { "epoch": 0.8439086294416244, "grad_norm": 0.1908571720123291, "learning_rate": 1.4480824487112943e-05, "loss": 0.9536, "step": 7315 }, { "epoch": 0.8444854637748038, "grad_norm": 0.17997972667217255, "learning_rate": 1.437663154065142e-05, "loss": 0.977, "step": 7320 }, { "epoch": 0.8450622981079834, "grad_norm": 0.19456464052200317, "learning_rate": 1.4272785755533601e-05, "loss": 0.96, "step": 7325 }, { "epoch": 0.8456391324411628, "grad_norm": 0.18535476922988892, "learning_rate": 1.4169287552803923e-05, "loss": 0.9358, "step": 7330 }, { "epoch": 0.8462159667743424, "grad_norm": 0.19435110688209534, "learning_rate": 1.4066137352097575e-05, "loss": 0.9578, "step": 7335 }, { "epoch": 0.846792801107522, "grad_norm": 0.196323961019516, "learning_rate": 1.396333557163868e-05, "loss": 0.9587, "step": 7340 }, { "epoch": 0.8473696354407014, "grad_norm": 0.18497958779335022, "learning_rate": 1.3860882628238781e-05, "loss": 0.8837, "step": 7345 }, { "epoch": 0.847946469773881, "grad_norm": 0.1857413649559021, "learning_rate": 1.3758778937294947e-05, "loss": 0.9882, "step": 7350 }, { "epoch": 0.8485233041070604, "grad_norm": 0.18033544719219208, "learning_rate": 1.365702491278833e-05, "loss": 0.9649, "step": 7355 }, { "epoch": 0.84910013844024, "grad_norm": 0.17830757796764374, "learning_rate": 1.3555620967282235e-05, "loss": 0.9306, "step": 7360 }, { "epoch": 0.8496769727734195, "grad_norm": 0.18326257169246674, "learning_rate": 1.3454567511920634e-05, "loss": 0.9583, "step": 7365 }, { "epoch": 0.850253807106599, "grad_norm": 0.1927419751882553, "learning_rate": 1.3353864956426366e-05, "loss": 0.9199, "step": 7370 }, { "epoch": 0.8508306414397785, "grad_norm": 0.20454275608062744, "learning_rate": 1.3253513709099652e-05, "loss": 0.9859, "step": 7375 }, { "epoch": 0.8514074757729581, "grad_norm": 0.1943024843931198, "learning_rate": 1.3153514176816195e-05, "loss": 0.9491, "step": 7380 }, { "epoch": 0.8519843101061375, "grad_norm": 0.196246936917305, "learning_rate": 1.305386676502578e-05, "loss": 0.9904, "step": 7385 }, { "epoch": 0.8525611444393171, "grad_norm": 0.1911761462688446, "learning_rate": 1.2954571877750443e-05, "loss": 0.9533, "step": 7390 }, { "epoch": 0.8531379787724965, "grad_norm": 0.193502277135849, "learning_rate": 1.2855629917582935e-05, "loss": 0.9714, "step": 7395 }, { "epoch": 0.8537148131056761, "grad_norm": 0.193160742521286, "learning_rate": 1.2757041285685011e-05, "loss": 0.9481, "step": 7400 }, { "epoch": 0.8542916474388556, "grad_norm": 0.19016680121421814, "learning_rate": 1.2658806381785926e-05, "loss": 0.9405, "step": 7405 }, { "epoch": 0.8548684817720351, "grad_norm": 0.19033581018447876, "learning_rate": 1.2560925604180673e-05, "loss": 0.9857, "step": 7410 }, { "epoch": 0.8554453161052146, "grad_norm": 0.17935208976268768, "learning_rate": 1.2463399349728488e-05, "loss": 0.9586, "step": 7415 }, { "epoch": 0.8560221504383941, "grad_norm": 0.18908259272575378, "learning_rate": 1.2366228013851156e-05, "loss": 0.9478, "step": 7420 }, { "epoch": 0.8565989847715736, "grad_norm": 0.18578480184078217, "learning_rate": 1.2269411990531421e-05, "loss": 0.9384, "step": 7425 }, { "epoch": 0.8571758191047532, "grad_norm": 0.18580469489097595, "learning_rate": 1.2172951672311427e-05, "loss": 0.9289, "step": 7430 }, { "epoch": 0.8577526534379326, "grad_norm": 0.20112477242946625, "learning_rate": 1.207684745029114e-05, "loss": 0.9331, "step": 7435 }, { "epoch": 0.8583294877711122, "grad_norm": 0.19187471270561218, "learning_rate": 1.1981099714126654e-05, "loss": 0.9518, "step": 7440 }, { "epoch": 0.8589063221042916, "grad_norm": 0.1830911487340927, "learning_rate": 1.1885708852028777e-05, "loss": 0.9235, "step": 7445 }, { "epoch": 0.8594831564374712, "grad_norm": 0.1817176192998886, "learning_rate": 1.1790675250761263e-05, "loss": 0.9221, "step": 7450 }, { "epoch": 0.8600599907706507, "grad_norm": 0.19020894169807434, "learning_rate": 1.1695999295639459e-05, "loss": 0.9953, "step": 7455 }, { "epoch": 0.8606368251038302, "grad_norm": 0.191711887717247, "learning_rate": 1.1601681370528484e-05, "loss": 0.9635, "step": 7460 }, { "epoch": 0.8612136594370097, "grad_norm": 0.19259481132030487, "learning_rate": 1.150772185784198e-05, "loss": 0.9135, "step": 7465 }, { "epoch": 0.8617904937701892, "grad_norm": 0.1911952793598175, "learning_rate": 1.1414121138540279e-05, "loss": 0.9416, "step": 7470 }, { "epoch": 0.8623673281033687, "grad_norm": 0.2062252312898636, "learning_rate": 1.1320879592129052e-05, "loss": 0.9167, "step": 7475 }, { "epoch": 0.8629441624365483, "grad_norm": 0.1907137632369995, "learning_rate": 1.1227997596657636e-05, "loss": 0.9375, "step": 7480 }, { "epoch": 0.8635209967697277, "grad_norm": 0.19318221509456635, "learning_rate": 1.1135475528717642e-05, "loss": 0.936, "step": 7485 }, { "epoch": 0.8640978311029073, "grad_norm": 0.18050383031368256, "learning_rate": 1.1043313763441277e-05, "loss": 0.9388, "step": 7490 }, { "epoch": 0.8646746654360867, "grad_norm": 0.18808601796627045, "learning_rate": 1.0951512674499898e-05, "loss": 0.9033, "step": 7495 }, { "epoch": 0.8652514997692663, "grad_norm": 0.18681325018405914, "learning_rate": 1.0860072634102569e-05, "loss": 0.9511, "step": 7500 }, { "epoch": 0.8658283341024458, "grad_norm": 0.18992042541503906, "learning_rate": 1.0768994012994371e-05, "loss": 0.9316, "step": 7505 }, { "epoch": 0.8664051684356253, "grad_norm": 0.19558371603488922, "learning_rate": 1.0678277180455109e-05, "loss": 0.9203, "step": 7510 }, { "epoch": 0.8669820027688048, "grad_norm": 0.18919962644577026, "learning_rate": 1.0587922504297642e-05, "loss": 0.939, "step": 7515 }, { "epoch": 0.8675588371019843, "grad_norm": 0.18517306447029114, "learning_rate": 1.049793035086647e-05, "loss": 0.9253, "step": 7520 }, { "epoch": 0.8681356714351638, "grad_norm": 0.1969662606716156, "learning_rate": 1.040830108503622e-05, "loss": 0.9644, "step": 7525 }, { "epoch": 0.8687125057683434, "grad_norm": 0.18453893065452576, "learning_rate": 1.031903507021027e-05, "loss": 0.9617, "step": 7530 }, { "epoch": 0.8692893401015228, "grad_norm": 0.18227744102478027, "learning_rate": 1.0230132668319082e-05, "loss": 0.9707, "step": 7535 }, { "epoch": 0.8698661744347024, "grad_norm": 0.19960415363311768, "learning_rate": 1.014159423981893e-05, "loss": 0.9484, "step": 7540 }, { "epoch": 0.8704430087678818, "grad_norm": 0.1987922042608261, "learning_rate": 1.0053420143690284e-05, "loss": 0.931, "step": 7545 }, { "epoch": 0.8710198431010614, "grad_norm": 0.1911085695028305, "learning_rate": 9.965610737436515e-06, "loss": 0.9723, "step": 7550 }, { "epoch": 0.8715966774342409, "grad_norm": 0.1827116310596466, "learning_rate": 9.87816637708221e-06, "loss": 0.9181, "step": 7555 }, { "epoch": 0.8721735117674204, "grad_norm": 0.19509339332580566, "learning_rate": 9.791087417172019e-06, "loss": 0.971, "step": 7560 }, { "epoch": 0.8727503461005999, "grad_norm": 0.18567214906215668, "learning_rate": 9.704374210768952e-06, "loss": 0.9254, "step": 7565 }, { "epoch": 0.8733271804337794, "grad_norm": 0.20758579671382904, "learning_rate": 9.618027109453176e-06, "loss": 0.9661, "step": 7570 }, { "epoch": 0.8739040147669589, "grad_norm": 0.20255382359027863, "learning_rate": 9.532046463320365e-06, "loss": 0.9802, "step": 7575 }, { "epoch": 0.8744808491001385, "grad_norm": 0.18845497071743011, "learning_rate": 9.446432620980517e-06, "loss": 0.94, "step": 7580 }, { "epoch": 0.8750576834333179, "grad_norm": 0.19557009637355804, "learning_rate": 9.361185929556282e-06, "loss": 0.9853, "step": 7585 }, { "epoch": 0.8756345177664975, "grad_norm": 0.1927575021982193, "learning_rate": 9.276306734681805e-06, "loss": 0.966, "step": 7590 }, { "epoch": 0.8762113520996769, "grad_norm": 0.18559756875038147, "learning_rate": 9.191795380501134e-06, "loss": 0.9768, "step": 7595 }, { "epoch": 0.8767881864328565, "grad_norm": 0.18745888769626617, "learning_rate": 9.107652209666973e-06, "loss": 0.9522, "step": 7600 }, { "epoch": 0.877365020766036, "grad_norm": 0.1900642067193985, "learning_rate": 9.023877563339134e-06, "loss": 0.8757, "step": 7605 }, { "epoch": 0.8779418550992155, "grad_norm": 0.1792660653591156, "learning_rate": 8.940471781183335e-06, "loss": 0.9486, "step": 7610 }, { "epoch": 0.878518689432395, "grad_norm": 0.19872300326824188, "learning_rate": 8.857435201369645e-06, "loss": 0.955, "step": 7615 }, { "epoch": 0.8790955237655745, "grad_norm": 0.2203512042760849, "learning_rate": 8.774768160571257e-06, "loss": 0.9289, "step": 7620 }, { "epoch": 0.879672358098754, "grad_norm": 0.19183123111724854, "learning_rate": 8.692470993962987e-06, "loss": 0.9636, "step": 7625 }, { "epoch": 0.8802491924319336, "grad_norm": 0.1879664957523346, "learning_rate": 8.610544035220103e-06, "loss": 0.9431, "step": 7630 }, { "epoch": 0.880826026765113, "grad_norm": 0.19013233482837677, "learning_rate": 8.528987616516748e-06, "loss": 0.8901, "step": 7635 }, { "epoch": 0.8814028610982926, "grad_norm": 0.18684031069278717, "learning_rate": 8.44780206852478e-06, "loss": 0.9107, "step": 7640 }, { "epoch": 0.881979695431472, "grad_norm": 0.24778002500534058, "learning_rate": 8.366987720412322e-06, "loss": 0.9398, "step": 7645 }, { "epoch": 0.8825565297646516, "grad_norm": 0.20799873769283295, "learning_rate": 8.286544899842441e-06, "loss": 0.9893, "step": 7650 }, { "epoch": 0.883133364097831, "grad_norm": 0.18839897215366364, "learning_rate": 8.206473932971903e-06, "loss": 1.0032, "step": 7655 }, { "epoch": 0.8837101984310106, "grad_norm": 0.1880505084991455, "learning_rate": 8.126775144449705e-06, "loss": 0.9634, "step": 7660 }, { "epoch": 0.8842870327641901, "grad_norm": 0.1813763827085495, "learning_rate": 8.04744885741593e-06, "loss": 0.9001, "step": 7665 }, { "epoch": 0.8848638670973696, "grad_norm": 0.18489223718643188, "learning_rate": 7.968495393500285e-06, "loss": 0.9576, "step": 7670 }, { "epoch": 0.8854407014305491, "grad_norm": 0.19136987626552582, "learning_rate": 7.889915072820874e-06, "loss": 0.9586, "step": 7675 }, { "epoch": 0.8860175357637287, "grad_norm": 0.18882869184017181, "learning_rate": 7.811708213982883e-06, "loss": 0.938, "step": 7680 }, { "epoch": 0.8865943700969081, "grad_norm": 0.19385488331317902, "learning_rate": 7.733875134077307e-06, "loss": 0.9481, "step": 7685 }, { "epoch": 0.8871712044300877, "grad_norm": 0.2002614289522171, "learning_rate": 7.656416148679612e-06, "loss": 0.9657, "step": 7690 }, { "epoch": 0.8877480387632672, "grad_norm": 0.19226016104221344, "learning_rate": 7.579331571848569e-06, "loss": 1.0032, "step": 7695 }, { "epoch": 0.8883248730964467, "grad_norm": 0.192755326628685, "learning_rate": 7.502621716124791e-06, "loss": 0.9508, "step": 7700 }, { "epoch": 0.8889017074296263, "grad_norm": 0.19249974191188812, "learning_rate": 7.4262868925296995e-06, "loss": 0.9289, "step": 7705 }, { "epoch": 0.8894785417628057, "grad_norm": 0.19001740217208862, "learning_rate": 7.35032741056404e-06, "loss": 0.9426, "step": 7710 }, { "epoch": 0.8900553760959853, "grad_norm": 0.20642143487930298, "learning_rate": 7.274743578206788e-06, "loss": 0.9962, "step": 7715 }, { "epoch": 0.8906322104291647, "grad_norm": 0.1881352961063385, "learning_rate": 7.199535701913806e-06, "loss": 0.92, "step": 7720 }, { "epoch": 0.8912090447623443, "grad_norm": 0.18823300302028656, "learning_rate": 7.124704086616684e-06, "loss": 0.9823, "step": 7725 }, { "epoch": 0.8917858790955238, "grad_norm": 0.19650129973888397, "learning_rate": 7.05024903572139e-06, "loss": 0.9415, "step": 7730 }, { "epoch": 0.8923627134287033, "grad_norm": 0.18661227822303772, "learning_rate": 6.976170851107178e-06, "loss": 0.986, "step": 7735 }, { "epoch": 0.8929395477618828, "grad_norm": 0.1886250525712967, "learning_rate": 6.902469833125236e-06, "loss": 0.9679, "step": 7740 }, { "epoch": 0.8935163820950623, "grad_norm": 0.1884276568889618, "learning_rate": 6.8291462805975535e-06, "loss": 0.9508, "step": 7745 }, { "epoch": 0.8940932164282418, "grad_norm": 0.18795393407344818, "learning_rate": 6.756200490815645e-06, "loss": 0.9148, "step": 7750 }, { "epoch": 0.8946700507614214, "grad_norm": 0.19203375279903412, "learning_rate": 6.683632759539449e-06, "loss": 0.9604, "step": 7755 }, { "epoch": 0.8952468850946008, "grad_norm": 0.19676725566387177, "learning_rate": 6.611443380995963e-06, "loss": 0.964, "step": 7760 }, { "epoch": 0.8958237194277804, "grad_norm": 0.17924650013446808, "learning_rate": 6.5396326478782465e-06, "loss": 0.8975, "step": 7765 }, { "epoch": 0.8964005537609598, "grad_norm": 0.18147966265678406, "learning_rate": 6.468200851344042e-06, "loss": 0.9611, "step": 7770 }, { "epoch": 0.8969773880941394, "grad_norm": 0.17754757404327393, "learning_rate": 6.397148281014798e-06, "loss": 0.9375, "step": 7775 }, { "epoch": 0.8975542224273189, "grad_norm": 0.192587211728096, "learning_rate": 6.326475224974249e-06, "loss": 0.9408, "step": 7780 }, { "epoch": 0.8981310567604984, "grad_norm": 0.1927655041217804, "learning_rate": 6.256181969767505e-06, "loss": 0.9238, "step": 7785 }, { "epoch": 0.8987078910936779, "grad_norm": 0.18646924197673798, "learning_rate": 6.186268800399675e-06, "loss": 0.9693, "step": 7790 }, { "epoch": 0.8992847254268574, "grad_norm": 0.18755947053432465, "learning_rate": 6.116736000334888e-06, "loss": 0.9422, "step": 7795 }, { "epoch": 0.8998615597600369, "grad_norm": 0.19362567365169525, "learning_rate": 6.047583851494965e-06, "loss": 0.9084, "step": 7800 }, { "epoch": 0.9004383940932165, "grad_norm": 0.1956445276737213, "learning_rate": 5.978812634258468e-06, "loss": 0.9196, "step": 7805 }, { "epoch": 0.9010152284263959, "grad_norm": 0.20970386266708374, "learning_rate": 5.910422627459411e-06, "loss": 0.968, "step": 7810 }, { "epoch": 0.9015920627595755, "grad_norm": 0.19140861928462982, "learning_rate": 5.842414108386151e-06, "loss": 0.9587, "step": 7815 }, { "epoch": 0.9021688970927549, "grad_norm": 0.18535487353801727, "learning_rate": 5.774787352780387e-06, "loss": 0.9869, "step": 7820 }, { "epoch": 0.9027457314259345, "grad_norm": 0.1941797137260437, "learning_rate": 5.707542634835883e-06, "loss": 0.983, "step": 7825 }, { "epoch": 0.903322565759114, "grad_norm": 0.1803128570318222, "learning_rate": 5.640680227197426e-06, "loss": 0.9323, "step": 7830 }, { "epoch": 0.9038994000922935, "grad_norm": 0.18415096402168274, "learning_rate": 5.574200400959773e-06, "loss": 0.9442, "step": 7835 }, { "epoch": 0.904476234425473, "grad_norm": 0.19649024307727814, "learning_rate": 5.5081034256664445e-06, "loss": 1.0359, "step": 7840 }, { "epoch": 0.9050530687586525, "grad_norm": 0.19169114530086517, "learning_rate": 5.442389569308703e-06, "loss": 0.9368, "step": 7845 }, { "epoch": 0.905629903091832, "grad_norm": 0.19056066870689392, "learning_rate": 5.377059098324455e-06, "loss": 0.9447, "step": 7850 }, { "epoch": 0.9062067374250116, "grad_norm": 0.18303756415843964, "learning_rate": 5.312112277597159e-06, "loss": 0.8908, "step": 7855 }, { "epoch": 0.906783571758191, "grad_norm": 0.20311231911182404, "learning_rate": 5.247549370454763e-06, "loss": 0.9742, "step": 7860 }, { "epoch": 0.9073604060913706, "grad_norm": 0.19544216990470886, "learning_rate": 5.183370638668616e-06, "loss": 1.0013, "step": 7865 }, { "epoch": 0.90793724042455, "grad_norm": 0.19310228526592255, "learning_rate": 5.119576342452459e-06, "loss": 0.9131, "step": 7870 }, { "epoch": 0.9085140747577296, "grad_norm": 0.2016146332025528, "learning_rate": 5.056166740461265e-06, "loss": 0.9889, "step": 7875 }, { "epoch": 0.9090909090909091, "grad_norm": 0.25115054845809937, "learning_rate": 4.993142089790337e-06, "loss": 0.9626, "step": 7880 }, { "epoch": 0.9096677434240886, "grad_norm": 0.19337384402751923, "learning_rate": 4.9305026459741224e-06, "loss": 0.9716, "step": 7885 }, { "epoch": 0.9102445777572681, "grad_norm": 0.1815733164548874, "learning_rate": 4.8682486629852975e-06, "loss": 0.9293, "step": 7890 }, { "epoch": 0.9108214120904476, "grad_norm": 0.18898643553256989, "learning_rate": 4.8063803932336114e-06, "loss": 0.8889, "step": 7895 }, { "epoch": 0.9113982464236271, "grad_norm": 0.1844266951084137, "learning_rate": 4.74489808756502e-06, "loss": 0.9805, "step": 7900 }, { "epoch": 0.9119750807568067, "grad_norm": 0.18842408061027527, "learning_rate": 4.683801995260484e-06, "loss": 0.9662, "step": 7905 }, { "epoch": 0.9125519150899861, "grad_norm": 0.20098374783992767, "learning_rate": 4.623092364035153e-06, "loss": 0.9969, "step": 7910 }, { "epoch": 0.9131287494231657, "grad_norm": 0.19723452627658844, "learning_rate": 4.562769440037174e-06, "loss": 0.8958, "step": 7915 }, { "epoch": 0.9137055837563451, "grad_norm": 0.20587897300720215, "learning_rate": 4.502833467846857e-06, "loss": 1.036, "step": 7920 }, { "epoch": 0.9142824180895247, "grad_norm": 0.18344053626060486, "learning_rate": 4.443284690475558e-06, "loss": 0.9227, "step": 7925 }, { "epoch": 0.9148592524227042, "grad_norm": 0.19742338359355927, "learning_rate": 4.384123349364788e-06, "loss": 0.9817, "step": 7930 }, { "epoch": 0.9154360867558837, "grad_norm": 0.1915377378463745, "learning_rate": 4.32534968438516e-06, "loss": 0.9285, "step": 7935 }, { "epoch": 0.9160129210890632, "grad_norm": 0.20171992480754852, "learning_rate": 4.266963933835455e-06, "loss": 0.9628, "step": 7940 }, { "epoch": 0.9165897554222427, "grad_norm": 0.19588075578212738, "learning_rate": 4.208966334441633e-06, "loss": 0.944, "step": 7945 }, { "epoch": 0.9171665897554222, "grad_norm": 0.1999482959508896, "learning_rate": 4.151357121355947e-06, "loss": 0.9673, "step": 7950 }, { "epoch": 0.9177434240886018, "grad_norm": 0.1867521107196808, "learning_rate": 4.0941365281558454e-06, "loss": 0.9605, "step": 7955 }, { "epoch": 0.9183202584217812, "grad_norm": 0.19164207577705383, "learning_rate": 4.037304786843188e-06, "loss": 0.9841, "step": 7960 }, { "epoch": 0.9188970927549608, "grad_norm": 0.18731051683425903, "learning_rate": 3.980862127843199e-06, "loss": 0.9518, "step": 7965 }, { "epoch": 0.9194739270881402, "grad_norm": 0.18932907283306122, "learning_rate": 3.924808780003531e-06, "loss": 0.9197, "step": 7970 }, { "epoch": 0.9200507614213198, "grad_norm": 0.19089601933956146, "learning_rate": 3.86914497059343e-06, "loss": 0.916, "step": 7975 }, { "epoch": 0.9206275957544993, "grad_norm": 0.19187115132808685, "learning_rate": 3.813870925302698e-06, "loss": 0.9431, "step": 7980 }, { "epoch": 0.9212044300876788, "grad_norm": 0.20279189944267273, "learning_rate": 3.7589868682408434e-06, "loss": 0.9663, "step": 7985 }, { "epoch": 0.9217812644208583, "grad_norm": 0.18457676470279694, "learning_rate": 3.7044930219362063e-06, "loss": 0.9523, "step": 7990 }, { "epoch": 0.9223580987540378, "grad_norm": 0.18185338377952576, "learning_rate": 3.6503896073349587e-06, "loss": 0.8865, "step": 7995 }, { "epoch": 0.9229349330872173, "grad_norm": 0.19247382879257202, "learning_rate": 3.5966768438002507e-06, "loss": 0.9723, "step": 8000 }, { "epoch": 0.9235117674203969, "grad_norm": 0.1936521977186203, "learning_rate": 3.5433549491113884e-06, "loss": 0.9622, "step": 8005 }, { "epoch": 0.9240886017535763, "grad_norm": 0.19727076590061188, "learning_rate": 3.4904241394628557e-06, "loss": 0.921, "step": 8010 }, { "epoch": 0.9246654360867559, "grad_norm": 0.19335485994815826, "learning_rate": 3.4378846294634835e-06, "loss": 0.9722, "step": 8015 }, { "epoch": 0.9252422704199353, "grad_norm": 0.1931992471218109, "learning_rate": 3.3857366321355722e-06, "loss": 0.9835, "step": 8020 }, { "epoch": 0.9258191047531149, "grad_norm": 0.17889727652072906, "learning_rate": 3.3339803589140352e-06, "loss": 0.9528, "step": 8025 }, { "epoch": 0.9263959390862944, "grad_norm": 0.19378302991390228, "learning_rate": 3.2826160196455123e-06, "loss": 0.9556, "step": 8030 }, { "epoch": 0.9269727734194739, "grad_norm": 0.18681880831718445, "learning_rate": 3.23164382258756e-06, "loss": 0.9854, "step": 8035 }, { "epoch": 0.9275496077526535, "grad_norm": 0.19622889161109924, "learning_rate": 3.181063974407772e-06, "loss": 0.9782, "step": 8040 }, { "epoch": 0.928126442085833, "grad_norm": 0.17983347177505493, "learning_rate": 3.1308766801829926e-06, "loss": 0.968, "step": 8045 }, { "epoch": 0.9287032764190125, "grad_norm": 0.2013084590435028, "learning_rate": 3.081082143398395e-06, "loss": 0.9816, "step": 8050 }, { "epoch": 0.929280110752192, "grad_norm": 0.2396339625120163, "learning_rate": 3.0316805659467705e-06, "loss": 0.9845, "step": 8055 }, { "epoch": 0.9298569450853715, "grad_norm": 0.19670408964157104, "learning_rate": 2.9826721481276077e-06, "loss": 0.9738, "step": 8060 }, { "epoch": 0.930433779418551, "grad_norm": 0.19861535727977753, "learning_rate": 2.934057088646336e-06, "loss": 0.9276, "step": 8065 }, { "epoch": 0.9310106137517306, "grad_norm": 0.19099079072475433, "learning_rate": 2.8858355846134944e-06, "loss": 0.9658, "step": 8070 }, { "epoch": 0.93158744808491, "grad_norm": 0.18421880900859833, "learning_rate": 2.8380078315439653e-06, "loss": 0.945, "step": 8075 }, { "epoch": 0.9321642824180896, "grad_norm": 0.1922229677438736, "learning_rate": 2.790574023356163e-06, "loss": 0.9513, "step": 8080 }, { "epoch": 0.932741116751269, "grad_norm": 0.18163804709911346, "learning_rate": 2.7435343523712242e-06, "loss": 0.93, "step": 8085 }, { "epoch": 0.9333179510844486, "grad_norm": 0.2035042643547058, "learning_rate": 2.6968890093122754e-06, "loss": 0.955, "step": 8090 }, { "epoch": 0.933894785417628, "grad_norm": 0.19784539937973022, "learning_rate": 2.650638183303611e-06, "loss": 0.9324, "step": 8095 }, { "epoch": 0.9344716197508076, "grad_norm": 0.2163614183664322, "learning_rate": 2.6047820618699592e-06, "loss": 0.9646, "step": 8100 }, { "epoch": 0.9350484540839871, "grad_norm": 0.19160954654216766, "learning_rate": 2.5593208309357187e-06, "loss": 0.9506, "step": 8105 }, { "epoch": 0.9356252884171666, "grad_norm": 0.18938621878623962, "learning_rate": 2.514254674824168e-06, "loss": 0.9404, "step": 8110 }, { "epoch": 0.9362021227503461, "grad_norm": 0.20650902390480042, "learning_rate": 2.469583776256812e-06, "loss": 0.9374, "step": 8115 }, { "epoch": 0.9367789570835257, "grad_norm": 0.19179539382457733, "learning_rate": 2.4253083163525038e-06, "loss": 0.9592, "step": 8120 }, { "epoch": 0.9373557914167051, "grad_norm": 0.18857675790786743, "learning_rate": 2.3814284746268344e-06, "loss": 0.9976, "step": 8125 }, { "epoch": 0.9379326257498847, "grad_norm": 0.19117896258831024, "learning_rate": 2.3379444289913342e-06, "loss": 0.9419, "step": 8130 }, { "epoch": 0.9385094600830641, "grad_norm": 0.2242845743894577, "learning_rate": 2.2948563557527836e-06, "loss": 0.9697, "step": 8135 }, { "epoch": 0.9390862944162437, "grad_norm": 0.1854942888021469, "learning_rate": 2.2521644296124466e-06, "loss": 0.9324, "step": 8140 }, { "epoch": 0.9396631287494231, "grad_norm": 0.19978578388690948, "learning_rate": 2.209868823665473e-06, "loss": 0.9658, "step": 8145 }, { "epoch": 0.9402399630826027, "grad_norm": 0.19244587421417236, "learning_rate": 2.1679697094000638e-06, "loss": 0.9476, "step": 8150 }, { "epoch": 0.9408167974157822, "grad_norm": 0.18521623313426971, "learning_rate": 2.1264672566968736e-06, "loss": 0.9274, "step": 8155 }, { "epoch": 0.9413936317489617, "grad_norm": 0.18388035893440247, "learning_rate": 2.0853616338282644e-06, "loss": 0.9005, "step": 8160 }, { "epoch": 0.9419704660821412, "grad_norm": 0.2041049599647522, "learning_rate": 2.044653007457653e-06, "loss": 0.9389, "step": 8165 }, { "epoch": 0.9425473004153208, "grad_norm": 0.18562230467796326, "learning_rate": 2.0043415426388324e-06, "loss": 0.9862, "step": 8170 }, { "epoch": 0.9431241347485002, "grad_norm": 0.192937970161438, "learning_rate": 1.964427402815294e-06, "loss": 0.9155, "step": 8175 }, { "epoch": 0.9437009690816798, "grad_norm": 0.1922484189271927, "learning_rate": 1.924910749819586e-06, "loss": 0.9495, "step": 8180 }, { "epoch": 0.9442778034148592, "grad_norm": 0.170320063829422, "learning_rate": 1.8857917438725892e-06, "loss": 0.9415, "step": 8185 }, { "epoch": 0.9448546377480388, "grad_norm": 0.20970895886421204, "learning_rate": 1.8470705435829849e-06, "loss": 0.9352, "step": 8190 }, { "epoch": 0.9454314720812182, "grad_norm": 0.18411558866500854, "learning_rate": 1.8087473059464788e-06, "loss": 0.9427, "step": 8195 }, { "epoch": 0.9460083064143978, "grad_norm": 0.19260326027870178, "learning_rate": 1.770822186345289e-06, "loss": 0.9852, "step": 8200 }, { "epoch": 0.9465851407475773, "grad_norm": 0.18823884427547455, "learning_rate": 1.7332953385474027e-06, "loss": 0.9574, "step": 8205 }, { "epoch": 0.9471619750807568, "grad_norm": 0.20108766853809357, "learning_rate": 1.6961669147060765e-06, "loss": 0.9593, "step": 8210 }, { "epoch": 0.9477388094139363, "grad_norm": 0.18643486499786377, "learning_rate": 1.6594370653590706e-06, "loss": 0.9279, "step": 8215 }, { "epoch": 0.9483156437471159, "grad_norm": 0.19154737889766693, "learning_rate": 1.6231059394281934e-06, "loss": 0.9204, "step": 8220 }, { "epoch": 0.9488924780802953, "grad_norm": 0.20026449859142303, "learning_rate": 1.587173684218557e-06, "loss": 0.9065, "step": 8225 }, { "epoch": 0.9494693124134749, "grad_norm": 0.20944979786872864, "learning_rate": 1.55164044541809e-06, "loss": 0.9708, "step": 8230 }, { "epoch": 0.9500461467466543, "grad_norm": 0.21715082228183746, "learning_rate": 1.5165063670968926e-06, "loss": 0.9512, "step": 8235 }, { "epoch": 0.9506229810798339, "grad_norm": 0.19158127903938293, "learning_rate": 1.4817715917066488e-06, "loss": 0.9409, "step": 8240 }, { "epoch": 0.9511998154130133, "grad_norm": 0.18626423180103302, "learning_rate": 1.4474362600800706e-06, "loss": 0.9412, "step": 8245 }, { "epoch": 0.9517766497461929, "grad_norm": 0.1935131996870041, "learning_rate": 1.4135005114303435e-06, "loss": 0.9377, "step": 8250 }, { "epoch": 0.9523534840793724, "grad_norm": 0.19177401065826416, "learning_rate": 1.379964483350482e-06, "loss": 0.9414, "step": 8255 }, { "epoch": 0.9529303184125519, "grad_norm": 0.1947384476661682, "learning_rate": 1.3468283118128756e-06, "loss": 1.0007, "step": 8260 }, { "epoch": 0.9535071527457314, "grad_norm": 0.2043754905462265, "learning_rate": 1.314092131168665e-06, "loss": 0.9421, "step": 8265 }, { "epoch": 0.954083987078911, "grad_norm": 0.19373486936092377, "learning_rate": 1.2817560741472445e-06, "loss": 1.0349, "step": 8270 }, { "epoch": 0.9546608214120904, "grad_norm": 0.1884533017873764, "learning_rate": 1.2498202718556617e-06, "loss": 0.9743, "step": 8275 }, { "epoch": 0.95523765574527, "grad_norm": 0.20388375222682953, "learning_rate": 1.2182848537781622e-06, "loss": 0.9565, "step": 8280 }, { "epoch": 0.9558144900784494, "grad_norm": 0.18673211336135864, "learning_rate": 1.187149947775612e-06, "loss": 0.9801, "step": 8285 }, { "epoch": 0.956391324411629, "grad_norm": 0.19092413783073425, "learning_rate": 1.1564156800849879e-06, "loss": 0.9634, "step": 8290 }, { "epoch": 0.9569681587448085, "grad_norm": 0.19628967344760895, "learning_rate": 1.1260821753188987e-06, "loss": 0.9575, "step": 8295 }, { "epoch": 0.957544993077988, "grad_norm": 0.19361461699008942, "learning_rate": 1.0961495564650092e-06, "loss": 0.9055, "step": 8300 }, { "epoch": 0.9581218274111675, "grad_norm": 0.20304569602012634, "learning_rate": 1.0666179448856174e-06, "loss": 0.8844, "step": 8305 }, { "epoch": 0.958698661744347, "grad_norm": 0.19096963107585907, "learning_rate": 1.0374874603171326e-06, "loss": 0.9868, "step": 8310 }, { "epoch": 0.9592754960775265, "grad_norm": 0.1914781928062439, "learning_rate": 1.0087582208695768e-06, "loss": 0.8896, "step": 8315 }, { "epoch": 0.9598523304107061, "grad_norm": 0.19837866723537445, "learning_rate": 9.804303430261174e-07, "loss": 0.9526, "step": 8320 }, { "epoch": 0.9604291647438855, "grad_norm": 0.1940370500087738, "learning_rate": 9.525039416425907e-07, "loss": 0.947, "step": 8325 }, { "epoch": 0.9610059990770651, "grad_norm": 0.1854429841041565, "learning_rate": 9.249791299470567e-07, "loss": 0.9243, "step": 8330 }, { "epoch": 0.9615828334102445, "grad_norm": 0.20479361712932587, "learning_rate": 8.978560195393115e-07, "loss": 0.9391, "step": 8335 }, { "epoch": 0.9621596677434241, "grad_norm": 0.19264739751815796, "learning_rate": 8.711347203904541e-07, "loss": 0.9304, "step": 8340 }, { "epoch": 0.9627365020766036, "grad_norm": 0.18168406188488007, "learning_rate": 8.448153408424087e-07, "loss": 0.9627, "step": 8345 }, { "epoch": 0.9633133364097831, "grad_norm": 0.1960950791835785, "learning_rate": 8.188979876075475e-07, "loss": 0.9653, "step": 8350 }, { "epoch": 0.9638901707429626, "grad_norm": 0.22994817793369293, "learning_rate": 7.933827657682025e-07, "loss": 0.9642, "step": 8355 }, { "epoch": 0.9644670050761421, "grad_norm": 0.19266270101070404, "learning_rate": 7.682697787762317e-07, "loss": 0.9601, "step": 8360 }, { "epoch": 0.9650438394093216, "grad_norm": 0.20038749277591705, "learning_rate": 7.435591284526866e-07, "loss": 1.018, "step": 8365 }, { "epoch": 0.9656206737425012, "grad_norm": 0.18728072941303253, "learning_rate": 7.192509149872684e-07, "loss": 0.9477, "step": 8370 }, { "epoch": 0.9661975080756806, "grad_norm": 0.1958947330713272, "learning_rate": 6.953452369380497e-07, "loss": 0.9806, "step": 8375 }, { "epoch": 0.9667743424088602, "grad_norm": 0.1987629234790802, "learning_rate": 6.718421912309758e-07, "loss": 0.8928, "step": 8380 }, { "epoch": 0.9673511767420396, "grad_norm": 0.19647015631198883, "learning_rate": 6.487418731595418e-07, "loss": 0.9933, "step": 8385 }, { "epoch": 0.9679280110752192, "grad_norm": 0.19900114834308624, "learning_rate": 6.260443763843493e-07, "loss": 0.951, "step": 8390 }, { "epoch": 0.9685048454083988, "grad_norm": 0.19372044503688812, "learning_rate": 6.037497929327839e-07, "loss": 0.9665, "step": 8395 }, { "epoch": 0.9690816797415782, "grad_norm": 0.20044179260730743, "learning_rate": 5.818582131985939e-07, "loss": 0.9543, "step": 8400 }, { "epoch": 0.9696585140747578, "grad_norm": 0.19964653253555298, "learning_rate": 5.603697259415341e-07, "loss": 0.9529, "step": 8405 }, { "epoch": 0.9702353484079372, "grad_norm": 0.19442757964134216, "learning_rate": 5.392844182870449e-07, "loss": 0.987, "step": 8410 }, { "epoch": 0.9708121827411168, "grad_norm": 0.18775001168251038, "learning_rate": 5.186023757258407e-07, "loss": 0.9705, "step": 8415 }, { "epoch": 0.9713890170742963, "grad_norm": 0.19862832129001617, "learning_rate": 4.983236821135995e-07, "loss": 0.9409, "step": 8420 }, { "epoch": 0.9719658514074758, "grad_norm": 0.18501073122024536, "learning_rate": 4.784484196706073e-07, "loss": 0.9682, "step": 8425 }, { "epoch": 0.9725426857406553, "grad_norm": 0.19959832727909088, "learning_rate": 4.5897666898145896e-07, "loss": 0.9556, "step": 8430 }, { "epoch": 0.9731195200738348, "grad_norm": 0.18275192379951477, "learning_rate": 4.3990850899467975e-07, "loss": 0.9612, "step": 8435 }, { "epoch": 0.9736963544070143, "grad_norm": 0.18589888513088226, "learning_rate": 4.2124401702241524e-07, "loss": 0.947, "step": 8440 }, { "epoch": 0.9742731887401939, "grad_norm": 0.19903455674648285, "learning_rate": 4.029832687401758e-07, "loss": 0.9574, "step": 8445 }, { "epoch": 0.9748500230733733, "grad_norm": 0.1937977373600006, "learning_rate": 3.851263381864589e-07, "loss": 0.9699, "step": 8450 }, { "epoch": 0.9754268574065529, "grad_norm": 0.19150091707706451, "learning_rate": 3.67673297762483e-07, "loss": 0.9158, "step": 8455 }, { "epoch": 0.9760036917397323, "grad_norm": 0.1840011477470398, "learning_rate": 3.506242182318653e-07, "loss": 0.9732, "step": 8460 }, { "epoch": 0.9765805260729119, "grad_norm": 0.1933344304561615, "learning_rate": 3.339791687203997e-07, "loss": 0.9664, "step": 8465 }, { "epoch": 0.9771573604060914, "grad_norm": 0.18675784766674042, "learning_rate": 3.177382167156906e-07, "loss": 0.9303, "step": 8470 }, { "epoch": 0.9777341947392709, "grad_norm": 0.19082637131214142, "learning_rate": 3.019014280669641e-07, "loss": 0.9024, "step": 8475 }, { "epoch": 0.9783110290724504, "grad_norm": 0.2020220011472702, "learning_rate": 2.8646886698473484e-07, "loss": 0.9963, "step": 8480 }, { "epoch": 0.9788878634056299, "grad_norm": 0.20680001378059387, "learning_rate": 2.7144059604055085e-07, "loss": 0.9355, "step": 8485 }, { "epoch": 0.9794646977388094, "grad_norm": 0.1897319108247757, "learning_rate": 2.568166761668156e-07, "loss": 0.9195, "step": 8490 }, { "epoch": 0.980041532071989, "grad_norm": 0.19423364102840424, "learning_rate": 2.4259716665641083e-07, "loss": 0.9845, "step": 8495 }, { "epoch": 0.9806183664051684, "grad_norm": 0.19203795492649078, "learning_rate": 2.2878212516260766e-07, "loss": 0.9134, "step": 8500 }, { "epoch": 0.981195200738348, "grad_norm": 0.18443405628204346, "learning_rate": 2.1537160769870002e-07, "loss": 0.9501, "step": 8505 }, { "epoch": 0.9817720350715274, "grad_norm": 0.2106209546327591, "learning_rate": 2.0236566863784944e-07, "loss": 1.0056, "step": 8510 }, { "epoch": 0.982348869404707, "grad_norm": 0.19864287972450256, "learning_rate": 1.8976436071284076e-07, "loss": 0.9141, "step": 8515 }, { "epoch": 0.9829257037378865, "grad_norm": 0.19259801506996155, "learning_rate": 1.775677350159044e-07, "loss": 0.9578, "step": 8520 }, { "epoch": 0.983502538071066, "grad_norm": 0.1885799914598465, "learning_rate": 1.657758409984278e-07, "loss": 0.9788, "step": 8525 }, { "epoch": 0.9840793724042455, "grad_norm": 0.18557386100292206, "learning_rate": 1.5438872647086655e-07, "loss": 0.9251, "step": 8530 }, { "epoch": 0.984656206737425, "grad_norm": 0.1834561824798584, "learning_rate": 1.4340643760244464e-07, "loss": 0.9683, "step": 8535 }, { "epoch": 0.9852330410706045, "grad_norm": 0.2051221877336502, "learning_rate": 1.328290189210435e-07, "loss": 0.9747, "step": 8540 }, { "epoch": 0.9858098754037841, "grad_norm": 0.18745014071464539, "learning_rate": 1.2265651331296869e-07, "loss": 0.9445, "step": 8545 }, { "epoch": 0.9863867097369635, "grad_norm": 0.188162162899971, "learning_rate": 1.1288896202281685e-07, "loss": 0.9681, "step": 8550 }, { "epoch": 0.9869635440701431, "grad_norm": 0.1787605881690979, "learning_rate": 1.0352640465327578e-07, "loss": 0.9767, "step": 8555 }, { "epoch": 0.9875403784033225, "grad_norm": 0.19322550296783447, "learning_rate": 9.456887916499125e-08, "loss": 0.9264, "step": 8560 }, { "epoch": 0.9881172127365021, "grad_norm": 0.210786372423172, "learning_rate": 8.601642187640036e-08, "loss": 0.9254, "step": 8565 }, { "epoch": 0.9886940470696816, "grad_norm": 0.18473652005195618, "learning_rate": 7.786906746358735e-08, "loss": 0.9626, "step": 8570 }, { "epoch": 0.9892708814028611, "grad_norm": 0.21195201575756073, "learning_rate": 7.012684896011702e-08, "loss": 0.9211, "step": 8575 }, { "epoch": 0.9898477157360406, "grad_norm": 0.18607978522777557, "learning_rate": 6.278979775694582e-08, "loss": 0.9473, "step": 8580 }, { "epoch": 0.9904245500692201, "grad_norm": 0.194194957613945, "learning_rate": 5.585794360226659e-08, "loss": 0.9632, "step": 8585 }, { "epoch": 0.9910013844023996, "grad_norm": 0.199600949883461, "learning_rate": 4.9331314601408495e-08, "loss": 0.9848, "step": 8590 }, { "epoch": 0.9915782187355792, "grad_norm": 0.20218248665332794, "learning_rate": 4.320993721668165e-08, "loss": 0.9596, "step": 8595 }, { "epoch": 0.9921550530687586, "grad_norm": 0.19225312769412994, "learning_rate": 3.7493836267310514e-08, "loss": 0.9497, "step": 8600 }, { "epoch": 0.9927318874019382, "grad_norm": 0.18986690044403076, "learning_rate": 3.218303492932284e-08, "loss": 0.9513, "step": 8605 }, { "epoch": 0.9933087217351176, "grad_norm": 0.19648917019367218, "learning_rate": 2.7277554735449794e-08, "loss": 0.9769, "step": 8610 }, { "epoch": 0.9938855560682972, "grad_norm": 0.24255739152431488, "learning_rate": 2.2777415575037098e-08, "loss": 0.9355, "step": 8615 }, { "epoch": 0.9944623904014767, "grad_norm": 0.1974877566099167, "learning_rate": 1.8682635693978433e-08, "loss": 0.9339, "step": 8620 }, { "epoch": 0.9950392247346562, "grad_norm": 0.1958783119916916, "learning_rate": 1.499323169462663e-08, "loss": 0.9779, "step": 8625 }, { "epoch": 0.9956160590678357, "grad_norm": 0.18412433564662933, "learning_rate": 1.1709218535715938e-08, "loss": 0.9261, "step": 8630 }, { "epoch": 0.9961928934010152, "grad_norm": 0.18094801902770996, "learning_rate": 8.83060953235093e-09, "loss": 0.904, "step": 8635 }, { "epoch": 0.9967697277341947, "grad_norm": 0.19589665532112122, "learning_rate": 6.357416355884382e-09, "loss": 0.9594, "step": 8640 }, { "epoch": 0.9973465620673743, "grad_norm": 0.19667023420333862, "learning_rate": 4.289649033928367e-09, "loss": 1.0295, "step": 8645 }, { "epoch": 0.9979233964005537, "grad_norm": 0.1948215663433075, "learning_rate": 2.627315950265441e-09, "loss": 0.982, "step": 8650 }, { "epoch": 0.9985002307337333, "grad_norm": 0.18232625722885132, "learning_rate": 1.3704238448708496e-09, "loss": 0.959, "step": 8655 }, { "epoch": 0.9990770650669127, "grad_norm": 0.18606936931610107, "learning_rate": 5.189778138237067e-10, "loss": 0.9795, "step": 8660 }, { "epoch": 0.9996538994000923, "grad_norm": 0.20517615973949432, "learning_rate": 7.298130931809865e-11, "loss": 0.9515, "step": 8665 }, { "epoch": 1.0, "eval_loss": 0.9548913240432739, "eval_runtime": 3071.7071, "eval_samples_per_second": 4.997, "eval_steps_per_second": 0.313, "step": 8668 }, { "epoch": 1.0, "step": 8668, "total_flos": 1.3416878770665554e+19, "train_loss": 0.9644491172051397, "train_runtime": 101367.857, "train_samples_per_second": 1.368, "train_steps_per_second": 0.086 } ], "logging_steps": 5, "max_steps": 8668, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3416878770665554e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }