{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 8668,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00011536686663590217,
      "grad_norm": 0.39384475350379944,
      "learning_rate": 2.306805074971165e-07,
      "loss": 1.2335,
      "step": 1
    },
    {
      "epoch": 0.0005768343331795108,
      "grad_norm": 0.3301478326320648,
      "learning_rate": 1.1534025374855826e-06,
      "loss": 1.1506,
      "step": 5
    },
    {
      "epoch": 0.0011536686663590216,
      "grad_norm": 0.32749322056770325,
      "learning_rate": 2.3068050749711653e-06,
      "loss": 1.1258,
      "step": 10
    },
    {
      "epoch": 0.0017305029995385325,
      "grad_norm": 0.3380628526210785,
      "learning_rate": 3.4602076124567477e-06,
      "loss": 1.2071,
      "step": 15
    },
    {
      "epoch": 0.0023073373327180432,
      "grad_norm": 0.3165785074234009,
      "learning_rate": 4.6136101499423305e-06,
      "loss": 1.1473,
      "step": 20
    },
    {
      "epoch": 0.002884171665897554,
      "grad_norm": 0.3524048924446106,
      "learning_rate": 5.7670126874279126e-06,
      "loss": 1.1617,
      "step": 25
    },
    {
      "epoch": 0.003461005999077065,
      "grad_norm": 0.31146979331970215,
      "learning_rate": 6.920415224913495e-06,
      "loss": 1.1327,
      "step": 30
    },
    {
      "epoch": 0.0040378403322565756,
      "grad_norm": 0.2818208634853363,
      "learning_rate": 8.073817762399077e-06,
      "loss": 1.1434,
      "step": 35
    },
    {
      "epoch": 0.0046146746654360865,
      "grad_norm": 0.28524142503738403,
      "learning_rate": 9.227220299884661e-06,
      "loss": 1.0858,
      "step": 40
    },
    {
      "epoch": 0.005191508998615597,
      "grad_norm": 0.24573445320129395,
      "learning_rate": 1.0380622837370241e-05,
      "loss": 1.0725,
      "step": 45
    },
    {
      "epoch": 0.005768343331795108,
      "grad_norm": 0.263478547334671,
      "learning_rate": 1.1534025374855825e-05,
      "loss": 1.0934,
      "step": 50
    },
    {
      "epoch": 0.006345177664974619,
      "grad_norm": 0.2519683539867401,
      "learning_rate": 1.2687427912341407e-05,
      "loss": 1.0627,
      "step": 55
    },
    {
      "epoch": 0.00692201199815413,
      "grad_norm": 0.23009565472602844,
      "learning_rate": 1.384083044982699e-05,
      "loss": 1.0259,
      "step": 60
    },
    {
      "epoch": 0.007498846331333641,
      "grad_norm": 0.2220510095357895,
      "learning_rate": 1.4994232987312573e-05,
      "loss": 1.0459,
      "step": 65
    },
    {
      "epoch": 0.008075680664513151,
      "grad_norm": 0.20859792828559875,
      "learning_rate": 1.6147635524798155e-05,
      "loss": 1.0493,
      "step": 70
    },
    {
      "epoch": 0.008652514997692663,
      "grad_norm": 0.24422504007816315,
      "learning_rate": 1.7301038062283735e-05,
      "loss": 1.0426,
      "step": 75
    },
    {
      "epoch": 0.009229349330872173,
      "grad_norm": 0.26209887862205505,
      "learning_rate": 1.8454440599769322e-05,
      "loss": 1.1044,
      "step": 80
    },
    {
      "epoch": 0.009806183664051685,
      "grad_norm": 0.22573940455913544,
      "learning_rate": 1.9607843137254903e-05,
      "loss": 1.0154,
      "step": 85
    },
    {
      "epoch": 0.010383017997231195,
      "grad_norm": 0.23182636499404907,
      "learning_rate": 2.0761245674740483e-05,
      "loss": 0.9841,
      "step": 90
    },
    {
      "epoch": 0.010959852330410707,
      "grad_norm": 0.20166198909282684,
      "learning_rate": 2.191464821222607e-05,
      "loss": 0.9969,
      "step": 95
    },
    {
      "epoch": 0.011536686663590217,
      "grad_norm": 0.24127909541130066,
      "learning_rate": 2.306805074971165e-05,
      "loss": 1.0168,
      "step": 100
    },
    {
      "epoch": 0.012113520996769728,
      "grad_norm": 0.23480503261089325,
      "learning_rate": 2.422145328719723e-05,
      "loss": 1.0291,
      "step": 105
    },
    {
      "epoch": 0.012690355329949238,
      "grad_norm": 0.2245372086763382,
      "learning_rate": 2.5374855824682814e-05,
      "loss": 1.0141,
      "step": 110
    },
    {
      "epoch": 0.01326718966312875,
      "grad_norm": 0.22040125727653503,
      "learning_rate": 2.6528258362168395e-05,
      "loss": 1.0327,
      "step": 115
    },
    {
      "epoch": 0.01384402399630826,
      "grad_norm": 0.22710174322128296,
      "learning_rate": 2.768166089965398e-05,
      "loss": 1.055,
      "step": 120
    },
    {
      "epoch": 0.01442085832948777,
      "grad_norm": 0.21789094805717468,
      "learning_rate": 2.8835063437139565e-05,
      "loss": 1.0143,
      "step": 125
    },
    {
      "epoch": 0.014997692662667282,
      "grad_norm": 0.26034095883369446,
      "learning_rate": 2.9988465974625146e-05,
      "loss": 1.082,
      "step": 130
    },
    {
      "epoch": 0.015574526995846792,
      "grad_norm": 0.24871432781219482,
      "learning_rate": 3.1141868512110726e-05,
      "loss": 0.9683,
      "step": 135
    },
    {
      "epoch": 0.016151361329026302,
      "grad_norm": 0.24066007137298584,
      "learning_rate": 3.229527104959631e-05,
      "loss": 1.0045,
      "step": 140
    },
    {
      "epoch": 0.016728195662205816,
      "grad_norm": 0.24926799535751343,
      "learning_rate": 3.344867358708189e-05,
      "loss": 1.0534,
      "step": 145
    },
    {
      "epoch": 0.017305029995385326,
      "grad_norm": 0.2561403512954712,
      "learning_rate": 3.460207612456747e-05,
      "loss": 0.9992,
      "step": 150
    },
    {
      "epoch": 0.017881864328564836,
      "grad_norm": 0.25197339057922363,
      "learning_rate": 3.575547866205306e-05,
      "loss": 1.0213,
      "step": 155
    },
    {
      "epoch": 0.018458698661744346,
      "grad_norm": 0.257375031709671,
      "learning_rate": 3.6908881199538644e-05,
      "loss": 1.0,
      "step": 160
    },
    {
      "epoch": 0.01903553299492386,
      "grad_norm": 0.26233741641044617,
      "learning_rate": 3.806228373702422e-05,
      "loss": 0.9835,
      "step": 165
    },
    {
      "epoch": 0.01961236732810337,
      "grad_norm": 0.26200389862060547,
      "learning_rate": 3.9215686274509805e-05,
      "loss": 1.0475,
      "step": 170
    },
    {
      "epoch": 0.02018920166128288,
      "grad_norm": 0.3026478886604309,
      "learning_rate": 4.036908881199539e-05,
      "loss": 1.0337,
      "step": 175
    },
    {
      "epoch": 0.02076603599446239,
      "grad_norm": 0.24392971396446228,
      "learning_rate": 4.1522491349480966e-05,
      "loss": 1.0181,
      "step": 180
    },
    {
      "epoch": 0.0213428703276419,
      "grad_norm": 0.2525658905506134,
      "learning_rate": 4.2675893886966556e-05,
      "loss": 0.9815,
      "step": 185
    },
    {
      "epoch": 0.021919704660821413,
      "grad_norm": 0.2874011695384979,
      "learning_rate": 4.382929642445214e-05,
      "loss": 0.9717,
      "step": 190
    },
    {
      "epoch": 0.022496538994000923,
      "grad_norm": 0.3149228096008301,
      "learning_rate": 4.498269896193772e-05,
      "loss": 1.0196,
      "step": 195
    },
    {
      "epoch": 0.023073373327180433,
      "grad_norm": 0.24751496315002441,
      "learning_rate": 4.61361014994233e-05,
      "loss": 1.029,
      "step": 200
    },
    {
      "epoch": 0.023650207660359943,
      "grad_norm": 0.26091670989990234,
      "learning_rate": 4.7289504036908884e-05,
      "loss": 0.9799,
      "step": 205
    },
    {
      "epoch": 0.024227041993539457,
      "grad_norm": 0.24843664467334747,
      "learning_rate": 4.844290657439446e-05,
      "loss": 0.948,
      "step": 210
    },
    {
      "epoch": 0.024803876326718967,
      "grad_norm": 0.26564517617225647,
      "learning_rate": 4.9596309111880045e-05,
      "loss": 1.0037,
      "step": 215
    },
    {
      "epoch": 0.025380710659898477,
      "grad_norm": 0.25427091121673584,
      "learning_rate": 5.074971164936563e-05,
      "loss": 0.9891,
      "step": 220
    },
    {
      "epoch": 0.025957544993077987,
      "grad_norm": 0.24899506568908691,
      "learning_rate": 5.190311418685121e-05,
      "loss": 0.9797,
      "step": 225
    },
    {
      "epoch": 0.0265343793262575,
      "grad_norm": 0.2365058809518814,
      "learning_rate": 5.305651672433679e-05,
      "loss": 0.9779,
      "step": 230
    },
    {
      "epoch": 0.02711121365943701,
      "grad_norm": 0.25148388743400574,
      "learning_rate": 5.4209919261822386e-05,
      "loss": 0.9934,
      "step": 235
    },
    {
      "epoch": 0.02768804799261652,
      "grad_norm": 0.24851441383361816,
      "learning_rate": 5.536332179930796e-05,
      "loss": 0.9609,
      "step": 240
    },
    {
      "epoch": 0.02826488232579603,
      "grad_norm": 0.2596060633659363,
      "learning_rate": 5.651672433679355e-05,
      "loss": 0.955,
      "step": 245
    },
    {
      "epoch": 0.02884171665897554,
      "grad_norm": 0.26578041911125183,
      "learning_rate": 5.767012687427913e-05,
      "loss": 1.0006,
      "step": 250
    },
    {
      "epoch": 0.029418550992155054,
      "grad_norm": 0.25275397300720215,
      "learning_rate": 5.882352941176471e-05,
      "loss": 1.0443,
      "step": 255
    },
    {
      "epoch": 0.029995385325334564,
      "grad_norm": 0.246384397149086,
      "learning_rate": 5.997693194925029e-05,
      "loss": 1.0123,
      "step": 260
    },
    {
      "epoch": 0.030572219658514074,
      "grad_norm": 0.23048047721385956,
      "learning_rate": 6.113033448673587e-05,
      "loss": 0.9969,
      "step": 265
    },
    {
      "epoch": 0.031149053991693584,
      "grad_norm": 0.24622942507266998,
      "learning_rate": 6.228373702422145e-05,
      "loss": 1.0324,
      "step": 270
    },
    {
      "epoch": 0.031725888324873094,
      "grad_norm": 0.23391634225845337,
      "learning_rate": 6.343713956170704e-05,
      "loss": 0.9575,
      "step": 275
    },
    {
      "epoch": 0.032302722658052604,
      "grad_norm": 0.2237214893102646,
      "learning_rate": 6.459054209919262e-05,
      "loss": 1.0388,
      "step": 280
    },
    {
      "epoch": 0.03287955699123212,
      "grad_norm": 0.22574639320373535,
      "learning_rate": 6.57439446366782e-05,
      "loss": 1.0074,
      "step": 285
    },
    {
      "epoch": 0.03345639132441163,
      "grad_norm": 0.23499815165996552,
      "learning_rate": 6.689734717416379e-05,
      "loss": 0.9567,
      "step": 290
    },
    {
      "epoch": 0.03403322565759114,
      "grad_norm": 0.22160688042640686,
      "learning_rate": 6.805074971164937e-05,
      "loss": 1.0325,
      "step": 295
    },
    {
      "epoch": 0.03461005999077065,
      "grad_norm": 0.2253323346376419,
      "learning_rate": 6.920415224913494e-05,
      "loss": 0.9682,
      "step": 300
    },
    {
      "epoch": 0.03518689432395016,
      "grad_norm": 0.22069986164569855,
      "learning_rate": 7.035755478662054e-05,
      "loss": 1.0038,
      "step": 305
    },
    {
      "epoch": 0.03576372865712967,
      "grad_norm": 0.23517417907714844,
      "learning_rate": 7.151095732410612e-05,
      "loss": 0.9954,
      "step": 310
    },
    {
      "epoch": 0.03634056299030918,
      "grad_norm": 0.22023826837539673,
      "learning_rate": 7.26643598615917e-05,
      "loss": 1.0708,
      "step": 315
    },
    {
      "epoch": 0.03691739732348869,
      "grad_norm": 0.22501811385154724,
      "learning_rate": 7.381776239907729e-05,
      "loss": 1.0403,
      "step": 320
    },
    {
      "epoch": 0.0374942316566682,
      "grad_norm": 0.24813653528690338,
      "learning_rate": 7.497116493656286e-05,
      "loss": 1.0395,
      "step": 325
    },
    {
      "epoch": 0.03807106598984772,
      "grad_norm": 0.213524729013443,
      "learning_rate": 7.612456747404844e-05,
      "loss": 1.0479,
      "step": 330
    },
    {
      "epoch": 0.03864790032302723,
      "grad_norm": 0.2197512686252594,
      "learning_rate": 7.727797001153403e-05,
      "loss": 0.9709,
      "step": 335
    },
    {
      "epoch": 0.03922473465620674,
      "grad_norm": 0.21706676483154297,
      "learning_rate": 7.843137254901961e-05,
      "loss": 0.9824,
      "step": 340
    },
    {
      "epoch": 0.03980156898938625,
      "grad_norm": 0.2092558592557907,
      "learning_rate": 7.95847750865052e-05,
      "loss": 0.9749,
      "step": 345
    },
    {
      "epoch": 0.04037840332256576,
      "grad_norm": 0.20605534315109253,
      "learning_rate": 8.073817762399078e-05,
      "loss": 0.9728,
      "step": 350
    },
    {
      "epoch": 0.04095523765574527,
      "grad_norm": 0.20985351502895355,
      "learning_rate": 8.189158016147636e-05,
      "loss": 0.9399,
      "step": 355
    },
    {
      "epoch": 0.04153207198892478,
      "grad_norm": 0.20424993336200714,
      "learning_rate": 8.304498269896193e-05,
      "loss": 0.9659,
      "step": 360
    },
    {
      "epoch": 0.04210890632210429,
      "grad_norm": 0.2047097533941269,
      "learning_rate": 8.419838523644751e-05,
      "loss": 0.9825,
      "step": 365
    },
    {
      "epoch": 0.0426857406552838,
      "grad_norm": 0.2090773731470108,
      "learning_rate": 8.535178777393311e-05,
      "loss": 0.9915,
      "step": 370
    },
    {
      "epoch": 0.043262574988463316,
      "grad_norm": 0.20311830937862396,
      "learning_rate": 8.65051903114187e-05,
      "loss": 0.9481,
      "step": 375
    },
    {
      "epoch": 0.043839409321642826,
      "grad_norm": 0.20276297628879547,
      "learning_rate": 8.765859284890428e-05,
      "loss": 1.0057,
      "step": 380
    },
    {
      "epoch": 0.044416243654822336,
      "grad_norm": 0.20655770599842072,
      "learning_rate": 8.881199538638986e-05,
      "loss": 1.0034,
      "step": 385
    },
    {
      "epoch": 0.044993077988001846,
      "grad_norm": 0.21446913480758667,
      "learning_rate": 8.996539792387543e-05,
      "loss": 0.958,
      "step": 390
    },
    {
      "epoch": 0.045569912321181356,
      "grad_norm": 0.20291991531848907,
      "learning_rate": 9.111880046136102e-05,
      "loss": 0.9714,
      "step": 395
    },
    {
      "epoch": 0.046146746654360866,
      "grad_norm": 0.19942238926887512,
      "learning_rate": 9.22722029988466e-05,
      "loss": 0.9791,
      "step": 400
    },
    {
      "epoch": 0.046723580987540377,
      "grad_norm": 0.21126116812229156,
      "learning_rate": 9.342560553633218e-05,
      "loss": 0.9764,
      "step": 405
    },
    {
      "epoch": 0.04730041532071989,
      "grad_norm": 0.2006087452173233,
      "learning_rate": 9.457900807381777e-05,
      "loss": 0.9389,
      "step": 410
    },
    {
      "epoch": 0.0478772496538994,
      "grad_norm": 0.1997053176164627,
      "learning_rate": 9.573241061130335e-05,
      "loss": 0.9795,
      "step": 415
    },
    {
      "epoch": 0.048454083987078914,
      "grad_norm": 0.19919082522392273,
      "learning_rate": 9.688581314878892e-05,
      "loss": 0.9865,
      "step": 420
    },
    {
      "epoch": 0.049030918320258424,
      "grad_norm": 0.20420940220355988,
      "learning_rate": 9.80392156862745e-05,
      "loss": 1.0243,
      "step": 425
    },
    {
      "epoch": 0.049607752653437934,
      "grad_norm": 0.20005351305007935,
      "learning_rate": 9.919261822376009e-05,
      "loss": 0.9298,
      "step": 430
    },
    {
      "epoch": 0.050184586986617444,
      "grad_norm": 0.2057618349790573,
      "learning_rate": 0.00010034602076124569,
      "loss": 0.9673,
      "step": 435
    },
    {
      "epoch": 0.050761421319796954,
      "grad_norm": 0.2002270221710205,
      "learning_rate": 0.00010149942329873126,
      "loss": 1.0362,
      "step": 440
    },
    {
      "epoch": 0.051338255652976464,
      "grad_norm": 0.23044097423553467,
      "learning_rate": 0.00010265282583621685,
      "loss": 1.0132,
      "step": 445
    },
    {
      "epoch": 0.051915089986155974,
      "grad_norm": 0.19683632254600525,
      "learning_rate": 0.00010380622837370242,
      "loss": 0.9883,
      "step": 450
    },
    {
      "epoch": 0.052491924319335484,
      "grad_norm": 0.18685606122016907,
      "learning_rate": 0.00010495963091118801,
      "loss": 1.0097,
      "step": 455
    },
    {
      "epoch": 0.053068758652515,
      "grad_norm": 0.19922864437103271,
      "learning_rate": 0.00010611303344867358,
      "loss": 1.0129,
      "step": 460
    },
    {
      "epoch": 0.05364559298569451,
      "grad_norm": 0.22423453629016876,
      "learning_rate": 0.00010726643598615918,
      "loss": 0.9616,
      "step": 465
    },
    {
      "epoch": 0.05422242731887402,
      "grad_norm": 0.19105130434036255,
      "learning_rate": 0.00010841983852364477,
      "loss": 0.9873,
      "step": 470
    },
    {
      "epoch": 0.05479926165205353,
      "grad_norm": 0.191370889544487,
      "learning_rate": 0.00010957324106113034,
      "loss": 0.9869,
      "step": 475
    },
    {
      "epoch": 0.05537609598523304,
      "grad_norm": 0.1888877898454666,
      "learning_rate": 0.00011072664359861593,
      "loss": 0.9754,
      "step": 480
    },
    {
      "epoch": 0.05595293031841255,
      "grad_norm": 0.19908085465431213,
      "learning_rate": 0.0001118800461361015,
      "loss": 0.9978,
      "step": 485
    },
    {
      "epoch": 0.05652976465159206,
      "grad_norm": 0.1849226951599121,
      "learning_rate": 0.0001130334486735871,
      "loss": 1.0036,
      "step": 490
    },
    {
      "epoch": 0.05710659898477157,
      "grad_norm": 0.18880179524421692,
      "learning_rate": 0.00011418685121107266,
      "loss": 0.9969,
      "step": 495
    },
    {
      "epoch": 0.05768343331795108,
      "grad_norm": 0.18281018733978271,
      "learning_rate": 0.00011534025374855826,
      "loss": 1.0163,
      "step": 500
    },
    {
      "epoch": 0.0582602676511306,
      "grad_norm": 0.18743227422237396,
      "learning_rate": 0.00011649365628604383,
      "loss": 0.9774,
      "step": 505
    },
    {
      "epoch": 0.05883710198431011,
      "grad_norm": 0.18649840354919434,
      "learning_rate": 0.00011764705882352942,
      "loss": 0.9948,
      "step": 510
    },
    {
      "epoch": 0.05941393631748962,
      "grad_norm": 0.18914422392845154,
      "learning_rate": 0.00011880046136101499,
      "loss": 0.9789,
      "step": 515
    },
    {
      "epoch": 0.05999077065066913,
      "grad_norm": 0.20373612642288208,
      "learning_rate": 0.00011995386389850058,
      "loss": 0.9594,
      "step": 520
    },
    {
      "epoch": 0.06056760498384864,
      "grad_norm": 0.1853344440460205,
      "learning_rate": 0.00012110726643598615,
      "loss": 0.9422,
      "step": 525
    },
    {
      "epoch": 0.06114443931702815,
      "grad_norm": 0.17580455541610718,
      "learning_rate": 0.00012226066897347174,
      "loss": 0.962,
      "step": 530
    },
    {
      "epoch": 0.06172127365020766,
      "grad_norm": 0.18509171903133392,
      "learning_rate": 0.00012341407151095733,
      "loss": 0.9269,
      "step": 535
    },
    {
      "epoch": 0.06229810798338717,
      "grad_norm": 0.17391765117645264,
      "learning_rate": 0.0001245674740484429,
      "loss": 0.9379,
      "step": 540
    },
    {
      "epoch": 0.06287494231656668,
      "grad_norm": 0.185151144862175,
      "learning_rate": 0.0001257208765859285,
      "loss": 0.9803,
      "step": 545
    },
    {
      "epoch": 0.06345177664974619,
      "grad_norm": 0.18012270331382751,
      "learning_rate": 0.00012687427912341407,
      "loss": 0.9318,
      "step": 550
    },
    {
      "epoch": 0.0640286109829257,
      "grad_norm": 0.19646641612052917,
      "learning_rate": 0.00012802768166089967,
      "loss": 1.0218,
      "step": 555
    },
    {
      "epoch": 0.06460544531610521,
      "grad_norm": 0.1846383512020111,
      "learning_rate": 0.00012918108419838524,
      "loss": 1.025,
      "step": 560
    },
    {
      "epoch": 0.06518227964928472,
      "grad_norm": 0.18073053658008575,
      "learning_rate": 0.00013033448673587084,
      "loss": 0.9418,
      "step": 565
    },
    {
      "epoch": 0.06575911398246424,
      "grad_norm": 0.17771542072296143,
      "learning_rate": 0.0001314878892733564,
      "loss": 0.939,
      "step": 570
    },
    {
      "epoch": 0.06633594831564375,
      "grad_norm": 0.17932404577732086,
      "learning_rate": 0.000132641291810842,
      "loss": 1.0029,
      "step": 575
    },
    {
      "epoch": 0.06691278264882326,
      "grad_norm": 0.1846706122159958,
      "learning_rate": 0.00013379469434832757,
      "loss": 0.9732,
      "step": 580
    },
    {
      "epoch": 0.06748961698200277,
      "grad_norm": 0.18250420689582825,
      "learning_rate": 0.00013494809688581317,
      "loss": 0.9838,
      "step": 585
    },
    {
      "epoch": 0.06806645131518228,
      "grad_norm": 0.18181759119033813,
      "learning_rate": 0.00013610149942329874,
      "loss": 0.9732,
      "step": 590
    },
    {
      "epoch": 0.0686432856483618,
      "grad_norm": 0.19068962335586548,
      "learning_rate": 0.0001372549019607843,
      "loss": 0.9789,
      "step": 595
    },
    {
      "epoch": 0.0692201199815413,
      "grad_norm": 0.17766667902469635,
      "learning_rate": 0.00013840830449826988,
      "loss": 0.9702,
      "step": 600
    },
    {
      "epoch": 0.06979695431472081,
      "grad_norm": 0.18375654518604279,
      "learning_rate": 0.00013956170703575548,
      "loss": 0.9239,
      "step": 605
    },
    {
      "epoch": 0.07037378864790032,
      "grad_norm": 0.17888925969600677,
      "learning_rate": 0.00014071510957324108,
      "loss": 0.9834,
      "step": 610
    },
    {
      "epoch": 0.07095062298107983,
      "grad_norm": 0.1771126687526703,
      "learning_rate": 0.00014186851211072665,
      "loss": 0.9383,
      "step": 615
    },
    {
      "epoch": 0.07152745731425934,
      "grad_norm": 0.19248628616333008,
      "learning_rate": 0.00014302191464821224,
      "loss": 0.931,
      "step": 620
    },
    {
      "epoch": 0.07210429164743885,
      "grad_norm": 0.1845206469297409,
      "learning_rate": 0.0001441753171856978,
      "loss": 0.9919,
      "step": 625
    },
    {
      "epoch": 0.07268112598061836,
      "grad_norm": 0.17463363707065582,
      "learning_rate": 0.0001453287197231834,
      "loss": 0.9698,
      "step": 630
    },
    {
      "epoch": 0.07325796031379787,
      "grad_norm": 0.17882540822029114,
      "learning_rate": 0.00014648212226066898,
      "loss": 0.9985,
      "step": 635
    },
    {
      "epoch": 0.07383479464697738,
      "grad_norm": 0.1773298680782318,
      "learning_rate": 0.00014763552479815458,
      "loss": 0.9572,
      "step": 640
    },
    {
      "epoch": 0.0744116289801569,
      "grad_norm": 0.18714497983455658,
      "learning_rate": 0.00014878892733564015,
      "loss": 1.0213,
      "step": 645
    },
    {
      "epoch": 0.0749884633133364,
      "grad_norm": 0.16878995299339294,
      "learning_rate": 0.00014994232987312572,
      "loss": 0.9478,
      "step": 650
    },
    {
      "epoch": 0.07556529764651591,
      "grad_norm": 0.18111148476600647,
      "learning_rate": 0.0001510957324106113,
      "loss": 0.9727,
      "step": 655
    },
    {
      "epoch": 0.07614213197969544,
      "grad_norm": 0.18033739924430847,
      "learning_rate": 0.00015224913494809689,
      "loss": 0.9953,
      "step": 660
    },
    {
      "epoch": 0.07671896631287495,
      "grad_norm": 0.184474378824234,
      "learning_rate": 0.00015340253748558246,
      "loss": 1.0182,
      "step": 665
    },
    {
      "epoch": 0.07729580064605446,
      "grad_norm": 0.17728130519390106,
      "learning_rate": 0.00015455594002306805,
      "loss": 0.921,
      "step": 670
    },
    {
      "epoch": 0.07787263497923397,
      "grad_norm": 0.1744864135980606,
      "learning_rate": 0.00015570934256055365,
      "loss": 0.9696,
      "step": 675
    },
    {
      "epoch": 0.07844946931241348,
      "grad_norm": 0.16884462535381317,
      "learning_rate": 0.00015686274509803922,
      "loss": 0.9756,
      "step": 680
    },
    {
      "epoch": 0.07902630364559299,
      "grad_norm": 0.1904095709323883,
      "learning_rate": 0.00015801614763552482,
      "loss": 0.9662,
      "step": 685
    },
    {
      "epoch": 0.0796031379787725,
      "grad_norm": 0.1834830790758133,
      "learning_rate": 0.0001591695501730104,
      "loss": 0.9916,
      "step": 690
    },
    {
      "epoch": 0.08017997231195201,
      "grad_norm": 0.17388418316841125,
      "learning_rate": 0.00016032295271049598,
      "loss": 0.9661,
      "step": 695
    },
    {
      "epoch": 0.08075680664513152,
      "grad_norm": 0.17867860198020935,
      "learning_rate": 0.00016147635524798155,
      "loss": 0.9702,
      "step": 700
    },
    {
      "epoch": 0.08133364097831103,
      "grad_norm": 0.184253990650177,
      "learning_rate": 0.00016262975778546715,
      "loss": 0.9628,
      "step": 705
    },
    {
      "epoch": 0.08191047531149054,
      "grad_norm": 0.18207858502864838,
      "learning_rate": 0.00016378316032295272,
      "loss": 0.9882,
      "step": 710
    },
    {
      "epoch": 0.08248730964467005,
      "grad_norm": 0.1673344224691391,
      "learning_rate": 0.0001649365628604383,
      "loss": 1.0004,
      "step": 715
    },
    {
      "epoch": 0.08306414397784956,
      "grad_norm": 0.17834331095218658,
      "learning_rate": 0.00016608996539792386,
      "loss": 0.9624,
      "step": 720
    },
    {
      "epoch": 0.08364097831102907,
      "grad_norm": 0.17010116577148438,
      "learning_rate": 0.00016724336793540946,
      "loss": 0.9419,
      "step": 725
    },
    {
      "epoch": 0.08421781264420858,
      "grad_norm": 0.1887020319700241,
      "learning_rate": 0.00016839677047289503,
      "loss": 0.9735,
      "step": 730
    },
    {
      "epoch": 0.08479464697738809,
      "grad_norm": 0.17254365980625153,
      "learning_rate": 0.00016955017301038063,
      "loss": 0.9959,
      "step": 735
    },
    {
      "epoch": 0.0853714813105676,
      "grad_norm": 0.18531525135040283,
      "learning_rate": 0.00017070357554786622,
      "loss": 1.0063,
      "step": 740
    },
    {
      "epoch": 0.08594831564374712,
      "grad_norm": 0.1854889988899231,
      "learning_rate": 0.0001718569780853518,
      "loss": 1.0276,
      "step": 745
    },
    {
      "epoch": 0.08652514997692663,
      "grad_norm": 0.17261534929275513,
      "learning_rate": 0.0001730103806228374,
      "loss": 0.9459,
      "step": 750
    },
    {
      "epoch": 0.08710198431010614,
      "grad_norm": 0.17594070732593536,
      "learning_rate": 0.00017416378316032296,
      "loss": 0.981,
      "step": 755
    },
    {
      "epoch": 0.08767881864328565,
      "grad_norm": 0.17268770933151245,
      "learning_rate": 0.00017531718569780856,
      "loss": 0.9591,
      "step": 760
    },
    {
      "epoch": 0.08825565297646516,
      "grad_norm": 0.1795402467250824,
      "learning_rate": 0.00017647058823529413,
      "loss": 1.0009,
      "step": 765
    },
    {
      "epoch": 0.08883248730964467,
      "grad_norm": 0.17739154398441315,
      "learning_rate": 0.00017762399077277973,
      "loss": 1.0325,
      "step": 770
    },
    {
      "epoch": 0.08940932164282418,
      "grad_norm": 0.1737346351146698,
      "learning_rate": 0.0001787773933102653,
      "loss": 0.966,
      "step": 775
    },
    {
      "epoch": 0.08998615597600369,
      "grad_norm": 0.178639754652977,
      "learning_rate": 0.00017993079584775087,
      "loss": 0.9921,
      "step": 780
    },
    {
      "epoch": 0.0905629903091832,
      "grad_norm": 0.18672531843185425,
      "learning_rate": 0.00018108419838523644,
      "loss": 0.9398,
      "step": 785
    },
    {
      "epoch": 0.09113982464236271,
      "grad_norm": 0.17832833528518677,
      "learning_rate": 0.00018223760092272203,
      "loss": 1.0191,
      "step": 790
    },
    {
      "epoch": 0.09171665897554222,
      "grad_norm": 0.17171098291873932,
      "learning_rate": 0.0001833910034602076,
      "loss": 0.9889,
      "step": 795
    },
    {
      "epoch": 0.09229349330872173,
      "grad_norm": 0.17071138322353363,
      "learning_rate": 0.0001845444059976932,
      "loss": 0.9414,
      "step": 800
    },
    {
      "epoch": 0.09287032764190124,
      "grad_norm": 0.17644046247005463,
      "learning_rate": 0.0001856978085351788,
      "loss": 1.0259,
      "step": 805
    },
    {
      "epoch": 0.09344716197508075,
      "grad_norm": 0.17984060943126678,
      "learning_rate": 0.00018685121107266437,
      "loss": 0.9826,
      "step": 810
    },
    {
      "epoch": 0.09402399630826026,
      "grad_norm": 0.1776990294456482,
      "learning_rate": 0.00018800461361014997,
      "loss": 0.9663,
      "step": 815
    },
    {
      "epoch": 0.09460083064143977,
      "grad_norm": 0.17558909952640533,
      "learning_rate": 0.00018915801614763554,
      "loss": 1.0345,
      "step": 820
    },
    {
      "epoch": 0.09517766497461928,
      "grad_norm": 0.18706142902374268,
      "learning_rate": 0.00019031141868512113,
      "loss": 1.0199,
      "step": 825
    },
    {
      "epoch": 0.0957544993077988,
      "grad_norm": 0.1777406483888626,
      "learning_rate": 0.0001914648212226067,
      "loss": 1.0523,
      "step": 830
    },
    {
      "epoch": 0.09633133364097832,
      "grad_norm": 0.16959840059280396,
      "learning_rate": 0.00019261822376009227,
      "loss": 0.9452,
      "step": 835
    },
    {
      "epoch": 0.09690816797415783,
      "grad_norm": 0.17502658069133759,
      "learning_rate": 0.00019377162629757784,
      "loss": 1.0417,
      "step": 840
    },
    {
      "epoch": 0.09748500230733734,
      "grad_norm": 0.17616289854049683,
      "learning_rate": 0.00019492502883506344,
      "loss": 0.9871,
      "step": 845
    },
    {
      "epoch": 0.09806183664051685,
      "grad_norm": 0.1845337152481079,
      "learning_rate": 0.000196078431372549,
      "loss": 0.9917,
      "step": 850
    },
    {
      "epoch": 0.09863867097369636,
      "grad_norm": 0.1851508468389511,
      "learning_rate": 0.0001972318339100346,
      "loss": 1.0393,
      "step": 855
    },
    {
      "epoch": 0.09921550530687587,
      "grad_norm": 0.1803300529718399,
      "learning_rate": 0.00019838523644752018,
      "loss": 0.975,
      "step": 860
    },
    {
      "epoch": 0.09979233964005538,
      "grad_norm": 0.17024391889572144,
      "learning_rate": 0.00019953863898500578,
      "loss": 0.9605,
      "step": 865
    },
    {
      "epoch": 0.10036917397323489,
      "grad_norm": 0.17882846295833588,
      "learning_rate": 0.0001999999270186907,
      "loss": 0.9835,
      "step": 870
    },
    {
      "epoch": 0.1009460083064144,
      "grad_norm": 0.1729104071855545,
      "learning_rate": 0.0001999994810221862,
      "loss": 0.9661,
      "step": 875
    },
    {
      "epoch": 0.10152284263959391,
      "grad_norm": 0.1663312315940857,
      "learning_rate": 0.00019999862957615513,
      "loss": 1.0276,
      "step": 880
    },
    {
      "epoch": 0.10209967697277342,
      "grad_norm": 0.1741991490125656,
      "learning_rate": 0.00019999737268404973,
      "loss": 0.9306,
      "step": 885
    },
    {
      "epoch": 0.10267651130595293,
      "grad_norm": 0.19253897666931152,
      "learning_rate": 0.00019999571035096608,
      "loss": 1.0522,
      "step": 890
    },
    {
      "epoch": 0.10325334563913244,
      "grad_norm": 0.19019544124603271,
      "learning_rate": 0.00019999364258364413,
      "loss": 0.991,
      "step": 895
    },
    {
      "epoch": 0.10383017997231195,
      "grad_norm": 0.17976698279380798,
      "learning_rate": 0.00019999116939046764,
      "loss": 0.9986,
      "step": 900
    },
    {
      "epoch": 0.10440701430549146,
      "grad_norm": 0.18436984717845917,
      "learning_rate": 0.0001999882907814643,
      "loss": 0.9901,
      "step": 905
    },
    {
      "epoch": 0.10498384863867097,
      "grad_norm": 0.17687635123729706,
      "learning_rate": 0.0001999850067683054,
      "loss": 1.0231,
      "step": 910
    },
    {
      "epoch": 0.10556068297185048,
      "grad_norm": 0.18162792921066284,
      "learning_rate": 0.00019998131736430604,
      "loss": 0.9746,
      "step": 915
    },
    {
      "epoch": 0.10613751730503,
      "grad_norm": 0.18052905797958374,
      "learning_rate": 0.00019997722258442499,
      "loss": 0.9929,
      "step": 920
    },
    {
      "epoch": 0.10671435163820951,
      "grad_norm": 0.1827809065580368,
      "learning_rate": 0.00019997272244526456,
      "loss": 1.0031,
      "step": 925
    },
    {
      "epoch": 0.10729118597138902,
      "grad_norm": 0.1823299676179886,
      "learning_rate": 0.00019996781696507069,
      "loss": 0.969,
      "step": 930
    },
    {
      "epoch": 0.10786802030456853,
      "grad_norm": 0.18722161650657654,
      "learning_rate": 0.00019996250616373268,
      "loss": 0.9922,
      "step": 935
    },
    {
      "epoch": 0.10844485463774804,
      "grad_norm": 0.1760849803686142,
      "learning_rate": 0.0001999567900627833,
      "loss": 1.0221,
      "step": 940
    },
    {
      "epoch": 0.10902168897092755,
      "grad_norm": 0.17676706612110138,
      "learning_rate": 0.0001999506686853986,
      "loss": 0.9472,
      "step": 945
    },
    {
      "epoch": 0.10959852330410706,
      "grad_norm": 0.1758042424917221,
      "learning_rate": 0.00019994414205639775,
      "loss": 0.9059,
      "step": 950
    },
    {
      "epoch": 0.11017535763728657,
      "grad_norm": 0.18398715555667877,
      "learning_rate": 0.00019993721020224308,
      "loss": 0.9649,
      "step": 955
    },
    {
      "epoch": 0.11075219197046608,
      "grad_norm": 0.17959755659103394,
      "learning_rate": 0.0001999298731510399,
      "loss": 0.9904,
      "step": 960
    },
    {
      "epoch": 0.11132902630364559,
      "grad_norm": 0.17280980944633484,
      "learning_rate": 0.00019992213093253643,
      "loss": 1.0314,
      "step": 965
    },
    {
      "epoch": 0.1119058606368251,
      "grad_norm": 0.17635640501976013,
      "learning_rate": 0.0001999139835781236,
      "loss": 1.0148,
      "step": 970
    },
    {
      "epoch": 0.11248269497000461,
      "grad_norm": 0.1721516102552414,
      "learning_rate": 0.00019990543112083503,
      "loss": 0.9573,
      "step": 975
    },
    {
      "epoch": 0.11305952930318412,
      "grad_norm": 0.18502525985240936,
      "learning_rate": 0.00019989647359534672,
      "loss": 1.0328,
      "step": 980
    },
    {
      "epoch": 0.11363636363636363,
      "grad_norm": 0.18054784834384918,
      "learning_rate": 0.0001998871110379772,
      "loss": 0.9956,
      "step": 985
    },
    {
      "epoch": 0.11421319796954314,
      "grad_norm": 0.1870676875114441,
      "learning_rate": 0.00019987734348668706,
      "loss": 0.9665,
      "step": 990
    },
    {
      "epoch": 0.11479003230272265,
      "grad_norm": 0.18523703515529633,
      "learning_rate": 0.00019986717098107896,
      "loss": 0.9926,
      "step": 995
    },
    {
      "epoch": 0.11536686663590216,
      "grad_norm": 0.2257257103919983,
      "learning_rate": 0.00019985659356239758,
      "loss": 0.9635,
      "step": 1000
    },
    {
      "epoch": 0.11594370096908169,
      "grad_norm": 0.1749376505613327,
      "learning_rate": 0.00019984561127352914,
      "loss": 0.9773,
      "step": 1005
    },
    {
      "epoch": 0.1165205353022612,
      "grad_norm": 0.18685142695903778,
      "learning_rate": 0.00019983422415900158,
      "loss": 0.967,
      "step": 1010
    },
    {
      "epoch": 0.1170973696354407,
      "grad_norm": 0.1762312352657318,
      "learning_rate": 0.00019982243226498411,
      "loss": 0.9666,
      "step": 1015
    },
    {
      "epoch": 0.11767420396862022,
      "grad_norm": 0.18300583958625793,
      "learning_rate": 0.00019981023563928716,
      "loss": 0.9654,
      "step": 1020
    },
    {
      "epoch": 0.11825103830179973,
      "grad_norm": 0.17595453560352325,
      "learning_rate": 0.00019979763433136216,
      "loss": 0.9668,
      "step": 1025
    },
    {
      "epoch": 0.11882787263497924,
      "grad_norm": 0.17857778072357178,
      "learning_rate": 0.00019978462839230133,
      "loss": 0.9814,
      "step": 1030
    },
    {
      "epoch": 0.11940470696815875,
      "grad_norm": 0.194159135222435,
      "learning_rate": 0.0001997712178748374,
      "loss": 0.9548,
      "step": 1035
    },
    {
      "epoch": 0.11998154130133826,
      "grad_norm": 0.1872921586036682,
      "learning_rate": 0.0001997574028333436,
      "loss": 0.9537,
      "step": 1040
    },
    {
      "epoch": 0.12055837563451777,
      "grad_norm": 0.18175874650478363,
      "learning_rate": 0.0001997431833238332,
      "loss": 0.9708,
      "step": 1045
    },
    {
      "epoch": 0.12113520996769728,
      "grad_norm": 0.17377467453479767,
      "learning_rate": 0.00019972855940395947,
      "loss": 1.0154,
      "step": 1050
    },
    {
      "epoch": 0.12171204430087679,
      "grad_norm": 0.1744399070739746,
      "learning_rate": 0.00019971353113301527,
      "loss": 0.983,
      "step": 1055
    },
    {
      "epoch": 0.1222888786340563,
      "grad_norm": 0.19033999741077423,
      "learning_rate": 0.00019969809857193306,
      "loss": 0.9915,
      "step": 1060
    },
    {
      "epoch": 0.12286571296723581,
      "grad_norm": 0.18971246480941772,
      "learning_rate": 0.0001996822617832843,
      "loss": 0.974,
      "step": 1065
    },
    {
      "epoch": 0.12344254730041532,
      "grad_norm": 0.18075977265834808,
      "learning_rate": 0.0001996660208312796,
      "loss": 0.9888,
      "step": 1070
    },
    {
      "epoch": 0.12401938163359483,
      "grad_norm": 0.18304185569286346,
      "learning_rate": 0.00019964937578176816,
      "loss": 1.0237,
      "step": 1075
    },
    {
      "epoch": 0.12459621596677434,
      "grad_norm": 0.18264040350914001,
      "learning_rate": 0.00019963232670223752,
      "loss": 1.023,
      "step": 1080
    },
    {
      "epoch": 0.12517305029995385,
      "grad_norm": 0.17688079178333282,
      "learning_rate": 0.00019961487366181355,
      "loss": 0.9399,
      "step": 1085
    },
    {
      "epoch": 0.12574988463313336,
      "grad_norm": 0.18298649787902832,
      "learning_rate": 0.00019959701673125983,
      "loss": 1.0495,
      "step": 1090
    },
    {
      "epoch": 0.12632671896631287,
      "grad_norm": 0.1869022250175476,
      "learning_rate": 0.00019957875598297759,
      "loss": 0.9749,
      "step": 1095
    },
    {
      "epoch": 0.12690355329949238,
      "grad_norm": 0.17531757056713104,
      "learning_rate": 0.00019956009149100533,
      "loss": 0.9841,
      "step": 1100
    },
    {
      "epoch": 0.1274803876326719,
      "grad_norm": 0.18799515068531036,
      "learning_rate": 0.00019954102333101856,
      "loss": 0.985,
      "step": 1105
    },
    {
      "epoch": 0.1280572219658514,
      "grad_norm": 0.19451211392879486,
      "learning_rate": 0.0001995215515803294,
      "loss": 1.0184,
      "step": 1110
    },
    {
      "epoch": 0.1286340562990309,
      "grad_norm": 0.17992301285266876,
      "learning_rate": 0.00019950167631788642,
      "loss": 1.0029,
      "step": 1115
    },
    {
      "epoch": 0.12921089063221042,
      "grad_norm": 0.18217399716377258,
      "learning_rate": 0.00019948139762427416,
      "loss": 0.9786,
      "step": 1120
    },
    {
      "epoch": 0.12978772496538993,
      "grad_norm": 0.1724836379289627,
      "learning_rate": 0.000199460715581713,
      "loss": 0.9503,
      "step": 1125
    },
    {
      "epoch": 0.13036455929856944,
      "grad_norm": 0.18973779678344727,
      "learning_rate": 0.0001994396302740585,
      "loss": 0.9589,
      "step": 1130
    },
    {
      "epoch": 0.13094139363174895,
      "grad_norm": 0.1789364516735077,
      "learning_rate": 0.00019941814178680144,
      "loss": 1.0123,
      "step": 1135
    },
    {
      "epoch": 0.13151822796492849,
      "grad_norm": 0.179600328207016,
      "learning_rate": 0.00019939625020706724,
      "loss": 0.9644,
      "step": 1140
    },
    {
      "epoch": 0.132095062298108,
      "grad_norm": 0.20578639209270477,
      "learning_rate": 0.00019937395562361564,
      "loss": 0.9155,
      "step": 1145
    },
    {
      "epoch": 0.1326718966312875,
      "grad_norm": 0.17455343902111053,
      "learning_rate": 0.00019935125812684047,
      "loss": 1.0081,
      "step": 1150
    },
    {
      "epoch": 0.13324873096446702,
      "grad_norm": 0.18840928375720978,
      "learning_rate": 0.00019932815780876904,
      "loss": 0.9383,
      "step": 1155
    },
    {
      "epoch": 0.13382556529764653,
      "grad_norm": 0.18001633882522583,
      "learning_rate": 0.00019930465476306197,
      "loss": 0.997,
      "step": 1160
    },
    {
      "epoch": 0.13440239963082604,
      "grad_norm": 0.18877308070659637,
      "learning_rate": 0.00019928074908501272,
      "loss": 1.0056,
      "step": 1165
    },
    {
      "epoch": 0.13497923396400555,
      "grad_norm": 0.19247262179851532,
      "learning_rate": 0.00019925644087154734,
      "loss": 0.9396,
      "step": 1170
    },
    {
      "epoch": 0.13555606829718506,
      "grad_norm": 0.1856631338596344,
      "learning_rate": 0.00019923173022122378,
      "loss": 0.9777,
      "step": 1175
    },
    {
      "epoch": 0.13613290263036457,
      "grad_norm": 0.18794025480747223,
      "learning_rate": 0.00019920661723423183,
      "loss": 0.9611,
      "step": 1180
    },
    {
      "epoch": 0.13670973696354408,
      "grad_norm": 0.18122021853923798,
      "learning_rate": 0.00019918110201239247,
      "loss": 0.9364,
      "step": 1185
    },
    {
      "epoch": 0.1372865712967236,
      "grad_norm": 0.18005718290805817,
      "learning_rate": 0.00019915518465915758,
      "loss": 0.9338,
      "step": 1190
    },
    {
      "epoch": 0.1378634056299031,
      "grad_norm": 0.1888824999332428,
      "learning_rate": 0.00019912886527960954,
      "loss": 0.9059,
      "step": 1195
    },
    {
      "epoch": 0.1384402399630826,
      "grad_norm": 0.17332522571086884,
      "learning_rate": 0.0001991021439804607,
      "loss": 0.9822,
      "step": 1200
    },
    {
      "epoch": 0.13901707429626212,
      "grad_norm": 0.18236401677131653,
      "learning_rate": 0.00019907502087005297,
      "loss": 1.0221,
      "step": 1205
    },
    {
      "epoch": 0.13959390862944163,
      "grad_norm": 0.18748964369297028,
      "learning_rate": 0.00019904749605835742,
      "loss": 1.0282,
      "step": 1210
    },
    {
      "epoch": 0.14017074296262114,
      "grad_norm": 0.18207697570323944,
      "learning_rate": 0.00019901956965697387,
      "loss": 1.0046,
      "step": 1215
    },
    {
      "epoch": 0.14074757729580065,
      "grad_norm": 0.18568968772888184,
      "learning_rate": 0.00019899124177913041,
      "loss": 1.0182,
      "step": 1220
    },
    {
      "epoch": 0.14132441162898016,
      "grad_norm": 0.17385929822921753,
      "learning_rate": 0.00019896251253968288,
      "loss": 0.956,
      "step": 1225
    },
    {
      "epoch": 0.14190124596215967,
      "grad_norm": 0.18582086265087128,
      "learning_rate": 0.0001989333820551144,
      "loss": 1.0011,
      "step": 1230
    },
    {
      "epoch": 0.14247808029533918,
      "grad_norm": 0.18691149353981018,
      "learning_rate": 0.00019890385044353501,
      "loss": 1.0031,
      "step": 1235
    },
    {
      "epoch": 0.1430549146285187,
      "grad_norm": 0.17932343482971191,
      "learning_rate": 0.00019887391782468113,
      "loss": 1.0163,
      "step": 1240
    },
    {
      "epoch": 0.1436317489616982,
      "grad_norm": 0.1789650321006775,
      "learning_rate": 0.000198843584319915,
      "loss": 0.9428,
      "step": 1245
    },
    {
      "epoch": 0.1442085832948777,
      "grad_norm": 0.17853863537311554,
      "learning_rate": 0.0001988128500522244,
      "loss": 0.9619,
      "step": 1250
    },
    {
      "epoch": 0.14478541762805722,
      "grad_norm": 0.18588118255138397,
      "learning_rate": 0.00019878171514622187,
      "loss": 0.9928,
      "step": 1255
    },
    {
      "epoch": 0.14536225196123673,
      "grad_norm": 0.1837441772222519,
      "learning_rate": 0.00019875017972814435,
      "loss": 0.9711,
      "step": 1260
    },
    {
      "epoch": 0.14593908629441624,
      "grad_norm": 0.1958654373884201,
      "learning_rate": 0.00019871824392585276,
      "loss": 0.9413,
      "step": 1265
    },
    {
      "epoch": 0.14651592062759575,
      "grad_norm": 0.19134190678596497,
      "learning_rate": 0.00019868590786883134,
      "loss": 0.9717,
      "step": 1270
    },
    {
      "epoch": 0.14709275496077526,
      "grad_norm": 0.18663935363292694,
      "learning_rate": 0.00019865317168818713,
      "loss": 0.9806,
      "step": 1275
    },
    {
      "epoch": 0.14766958929395477,
      "grad_norm": 0.18832144141197205,
      "learning_rate": 0.0001986200355166495,
      "loss": 0.9256,
      "step": 1280
    },
    {
      "epoch": 0.14824642362713428,
      "grad_norm": 0.17965355515480042,
      "learning_rate": 0.0001985864994885697,
      "loss": 0.9852,
      "step": 1285
    },
    {
      "epoch": 0.1488232579603138,
      "grad_norm": 0.18879751861095428,
      "learning_rate": 0.00019855256373991993,
      "loss": 0.9489,
      "step": 1290
    },
    {
      "epoch": 0.1494000922934933,
      "grad_norm": 0.18717263638973236,
      "learning_rate": 0.00019851822840829338,
      "loss": 0.9698,
      "step": 1295
    },
    {
      "epoch": 0.1499769266266728,
      "grad_norm": 0.19117680191993713,
      "learning_rate": 0.0001984834936329031,
      "loss": 0.9967,
      "step": 1300
    },
    {
      "epoch": 0.15055376095985232,
      "grad_norm": 0.18893282115459442,
      "learning_rate": 0.00019844835955458193,
      "loss": 0.9391,
      "step": 1305
    },
    {
      "epoch": 0.15113059529303183,
      "grad_norm": 0.1852642297744751,
      "learning_rate": 0.00019841282631578145,
      "loss": 0.9871,
      "step": 1310
    },
    {
      "epoch": 0.15170742962621137,
      "grad_norm": 0.1851365864276886,
      "learning_rate": 0.00019837689406057183,
      "loss": 0.9459,
      "step": 1315
    },
    {
      "epoch": 0.15228426395939088,
      "grad_norm": 0.216008722782135,
      "learning_rate": 0.00019834056293464093,
      "loss": 0.9901,
      "step": 1320
    },
    {
      "epoch": 0.15286109829257039,
      "grad_norm": 0.17874599993228912,
      "learning_rate": 0.00019830383308529393,
      "loss": 0.984,
      "step": 1325
    },
    {
      "epoch": 0.1534379326257499,
      "grad_norm": 0.18888545036315918,
      "learning_rate": 0.00019826670466145262,
      "loss": 0.9617,
      "step": 1330
    },
    {
      "epoch": 0.1540147669589294,
      "grad_norm": 0.1813315898180008,
      "learning_rate": 0.00019822917781365474,
      "loss": 0.9944,
      "step": 1335
    },
    {
      "epoch": 0.15459160129210892,
      "grad_norm": 0.18800750374794006,
      "learning_rate": 0.00019819125269405352,
      "loss": 0.9283,
      "step": 1340
    },
    {
      "epoch": 0.15516843562528843,
      "grad_norm": 0.19481781125068665,
      "learning_rate": 0.00019815292945641705,
      "loss": 0.9559,
      "step": 1345
    },
    {
      "epoch": 0.15574526995846794,
      "grad_norm": 0.17894810438156128,
      "learning_rate": 0.0001981142082561274,
      "loss": 0.9628,
      "step": 1350
    },
    {
      "epoch": 0.15632210429164745,
      "grad_norm": 0.1818206012248993,
      "learning_rate": 0.0001980750892501804,
      "loss": 1.0073,
      "step": 1355
    },
    {
      "epoch": 0.15689893862482696,
      "grad_norm": 0.19612440466880798,
      "learning_rate": 0.0001980355725971847,
      "loss": 0.9837,
      "step": 1360
    },
    {
      "epoch": 0.15747577295800647,
      "grad_norm": 0.1835489571094513,
      "learning_rate": 0.0001979956584573612,
      "loss": 1.0062,
      "step": 1365
    },
    {
      "epoch": 0.15805260729118598,
      "grad_norm": 0.18093307316303253,
      "learning_rate": 0.00019795534699254238,
      "loss": 0.9496,
      "step": 1370
    },
    {
      "epoch": 0.15862944162436549,
      "grad_norm": 0.1910361796617508,
      "learning_rate": 0.00019791463836617176,
      "loss": 1.0064,
      "step": 1375
    },
    {
      "epoch": 0.159206275957545,
      "grad_norm": 0.17584437131881714,
      "learning_rate": 0.00019787353274330313,
      "loss": 0.9604,
      "step": 1380
    },
    {
      "epoch": 0.1597831102907245,
      "grad_norm": 0.193894624710083,
      "learning_rate": 0.00019783203029059997,
      "loss": 0.9816,
      "step": 1385
    },
    {
      "epoch": 0.16035994462390402,
      "grad_norm": 0.18043072521686554,
      "learning_rate": 0.00019779013117633454,
      "loss": 0.9106,
      "step": 1390
    },
    {
      "epoch": 0.16093677895708353,
      "grad_norm": 0.18467725813388824,
      "learning_rate": 0.00019774783557038755,
      "loss": 0.9019,
      "step": 1395
    },
    {
      "epoch": 0.16151361329026304,
      "grad_norm": 0.17407982051372528,
      "learning_rate": 0.00019770514364424725,
      "loss": 0.9465,
      "step": 1400
    },
    {
      "epoch": 0.16209044762344255,
      "grad_norm": 0.18963223695755005,
      "learning_rate": 0.00019766205557100868,
      "loss": 0.9775,
      "step": 1405
    },
    {
      "epoch": 0.16266728195662206,
      "grad_norm": 0.18725010752677917,
      "learning_rate": 0.0001976185715253732,
      "loss": 0.9709,
      "step": 1410
    },
    {
      "epoch": 0.16324411628980157,
      "grad_norm": 0.17535746097564697,
      "learning_rate": 0.0001975746916836475,
      "loss": 0.9495,
      "step": 1415
    },
    {
      "epoch": 0.16382095062298108,
      "grad_norm": 0.19157184660434723,
      "learning_rate": 0.0001975304162237432,
      "loss": 0.9545,
      "step": 1420
    },
    {
      "epoch": 0.1643977849561606,
      "grad_norm": 0.20112945139408112,
      "learning_rate": 0.00019748574532517586,
      "loss": 0.9671,
      "step": 1425
    },
    {
      "epoch": 0.1649746192893401,
      "grad_norm": 0.19094939529895782,
      "learning_rate": 0.0001974406791690643,
      "loss": 0.9768,
      "step": 1430
    },
    {
      "epoch": 0.1655514536225196,
      "grad_norm": 0.175222247838974,
      "learning_rate": 0.00019739521793813006,
      "loss": 0.9699,
      "step": 1435
    },
    {
      "epoch": 0.16612828795569912,
      "grad_norm": 0.18948869407176971,
      "learning_rate": 0.00019734936181669638,
      "loss": 1.0102,
      "step": 1440
    },
    {
      "epoch": 0.16670512228887863,
      "grad_norm": 0.18475863337516785,
      "learning_rate": 0.00019730311099068771,
      "loss": 0.922,
      "step": 1445
    },
    {
      "epoch": 0.16728195662205814,
      "grad_norm": 0.18690991401672363,
      "learning_rate": 0.00019725646564762878,
      "loss": 0.9693,
      "step": 1450
    },
    {
      "epoch": 0.16785879095523765,
      "grad_norm": 0.1863308697938919,
      "learning_rate": 0.00019720942597664385,
      "loss": 0.9639,
      "step": 1455
    },
    {
      "epoch": 0.16843562528841716,
      "grad_norm": 0.18741768598556519,
      "learning_rate": 0.00019716199216845604,
      "loss": 1.0382,
      "step": 1460
    },
    {
      "epoch": 0.16901245962159667,
      "grad_norm": 0.18736031651496887,
      "learning_rate": 0.00019711416441538652,
      "loss": 1.0025,
      "step": 1465
    },
    {
      "epoch": 0.16958929395477618,
      "grad_norm": 0.18398988246917725,
      "learning_rate": 0.00019706594291135366,
      "loss": 0.9566,
      "step": 1470
    },
    {
      "epoch": 0.1701661282879557,
      "grad_norm": 0.18206271529197693,
      "learning_rate": 0.0001970173278518724,
      "loss": 0.9727,
      "step": 1475
    },
    {
      "epoch": 0.1707429626211352,
      "grad_norm": 0.24337929487228394,
      "learning_rate": 0.00019696831943405324,
      "loss": 1.0105,
      "step": 1480
    },
    {
      "epoch": 0.1713197969543147,
      "grad_norm": 0.20907312631607056,
      "learning_rate": 0.0001969189178566016,
      "loss": 0.9619,
      "step": 1485
    },
    {
      "epoch": 0.17189663128749424,
      "grad_norm": 0.1832038313150406,
      "learning_rate": 0.00019686912331981702,
      "loss": 0.9998,
      "step": 1490
    },
    {
      "epoch": 0.17247346562067375,
      "grad_norm": 0.1847505420446396,
      "learning_rate": 0.00019681893602559224,
      "loss": 0.9444,
      "step": 1495
    },
    {
      "epoch": 0.17305029995385326,
      "grad_norm": 0.19578874111175537,
      "learning_rate": 0.00019676835617741249,
      "loss": 0.966,
      "step": 1500
    },
    {
      "epoch": 0.17362713428703277,
      "grad_norm": 0.20430216193199158,
      "learning_rate": 0.0001967173839803545,
      "loss": 0.9983,
      "step": 1505
    },
    {
      "epoch": 0.17420396862021229,
      "grad_norm": 0.19105270504951477,
      "learning_rate": 0.00019666601964108598,
      "loss": 0.9622,
      "step": 1510
    },
    {
      "epoch": 0.1747808029533918,
      "grad_norm": 0.19650229811668396,
      "learning_rate": 0.00019661426336786445,
      "loss": 0.924,
      "step": 1515
    },
    {
      "epoch": 0.1753576372865713,
      "grad_norm": 0.18799568712711334,
      "learning_rate": 0.00019656211537053654,
      "loss": 0.9319,
      "step": 1520
    },
    {
      "epoch": 0.17593447161975082,
      "grad_norm": 0.19247035682201385,
      "learning_rate": 0.00019650957586053716,
      "loss": 0.9913,
      "step": 1525
    },
    {
      "epoch": 0.17651130595293033,
      "grad_norm": 0.18789616227149963,
      "learning_rate": 0.00019645664505088864,
      "loss": 0.8992,
      "step": 1530
    },
    {
      "epoch": 0.17708814028610984,
      "grad_norm": 0.18446215987205505,
      "learning_rate": 0.00019640332315619977,
      "loss": 0.987,
      "step": 1535
    },
    {
      "epoch": 0.17766497461928935,
      "grad_norm": 0.1788845956325531,
      "learning_rate": 0.00019634961039266506,
      "loss": 0.9455,
      "step": 1540
    },
    {
      "epoch": 0.17824180895246886,
      "grad_norm": 0.17681817710399628,
      "learning_rate": 0.0001962955069780638,
      "loss": 1.0042,
      "step": 1545
    },
    {
      "epoch": 0.17881864328564837,
      "grad_norm": 0.18479640781879425,
      "learning_rate": 0.00019624101313175918,
      "loss": 0.9973,
      "step": 1550
    },
    {
      "epoch": 0.17939547761882788,
      "grad_norm": 0.17797234654426575,
      "learning_rate": 0.00019618612907469732,
      "loss": 0.959,
      "step": 1555
    },
    {
      "epoch": 0.17997231195200739,
      "grad_norm": 0.1861361563205719,
      "learning_rate": 0.00019613085502940658,
      "loss": 0.9529,
      "step": 1560
    },
    {
      "epoch": 0.1805491462851869,
      "grad_norm": 0.19123396277427673,
      "learning_rate": 0.00019607519121999647,
      "loss": 0.9506,
      "step": 1565
    },
    {
      "epoch": 0.1811259806183664,
      "grad_norm": 0.19712139666080475,
      "learning_rate": 0.00019601913787215683,
      "loss": 1.0023,
      "step": 1570
    },
    {
      "epoch": 0.18170281495154592,
      "grad_norm": 0.18968220055103302,
      "learning_rate": 0.0001959626952131568,
      "loss": 0.9354,
      "step": 1575
    },
    {
      "epoch": 0.18227964928472543,
      "grad_norm": 0.1877613365650177,
      "learning_rate": 0.00019590586347184417,
      "loss": 0.9825,
      "step": 1580
    },
    {
      "epoch": 0.18285648361790494,
      "grad_norm": 0.1915743201971054,
      "learning_rate": 0.00019584864287864408,
      "loss": 0.9456,
      "step": 1585
    },
    {
      "epoch": 0.18343331795108445,
      "grad_norm": 0.19143231213092804,
      "learning_rate": 0.0001957910336655584,
      "loss": 0.9993,
      "step": 1590
    },
    {
      "epoch": 0.18401015228426396,
      "grad_norm": 0.1909196525812149,
      "learning_rate": 0.00019573303606616459,
      "loss": 0.9774,
      "step": 1595
    },
    {
      "epoch": 0.18458698661744347,
      "grad_norm": 0.19479255378246307,
      "learning_rate": 0.00019567465031561487,
      "loss": 0.9727,
      "step": 1600
    },
    {
      "epoch": 0.18516382095062298,
      "grad_norm": 0.18205581605434418,
      "learning_rate": 0.0001956158766506352,
      "loss": 0.9786,
      "step": 1605
    },
    {
      "epoch": 0.18574065528380249,
      "grad_norm": 0.19251278042793274,
      "learning_rate": 0.00019555671530952445,
      "loss": 0.9815,
      "step": 1610
    },
    {
      "epoch": 0.186317489616982,
      "grad_norm": 0.1939883977174759,
      "learning_rate": 0.00019549716653215318,
      "loss": 0.9449,
      "step": 1615
    },
    {
      "epoch": 0.1868943239501615,
      "grad_norm": 0.187269926071167,
      "learning_rate": 0.00019543723055996282,
      "loss": 0.9425,
      "step": 1620
    },
    {
      "epoch": 0.18747115828334102,
      "grad_norm": 0.18750888109207153,
      "learning_rate": 0.00019537690763596487,
      "loss": 0.961,
      "step": 1625
    },
    {
      "epoch": 0.18804799261652053,
      "grad_norm": 0.20002637803554535,
      "learning_rate": 0.00019531619800473952,
      "loss": 0.9399,
      "step": 1630
    },
    {
      "epoch": 0.18862482694970004,
      "grad_norm": 0.19453556835651398,
      "learning_rate": 0.00019525510191243498,
      "loss": 0.9138,
      "step": 1635
    },
    {
      "epoch": 0.18920166128287955,
      "grad_norm": 0.18952696025371552,
      "learning_rate": 0.0001951936196067664,
      "loss": 1.0256,
      "step": 1640
    },
    {
      "epoch": 0.18977849561605906,
      "grad_norm": 0.22311453521251678,
      "learning_rate": 0.00019513175133701474,
      "loss": 0.9247,
      "step": 1645
    },
    {
      "epoch": 0.19035532994923857,
      "grad_norm": 0.1846632957458496,
      "learning_rate": 0.00019506949735402588,
      "loss": 0.9555,
      "step": 1650
    },
    {
      "epoch": 0.19093216428241808,
      "grad_norm": 0.1898278146982193,
      "learning_rate": 0.00019500685791020968,
      "loss": 0.9954,
      "step": 1655
    },
    {
      "epoch": 0.1915089986155976,
      "grad_norm": 0.18607108294963837,
      "learning_rate": 0.00019494383325953875,
      "loss": 0.9065,
      "step": 1660
    },
    {
      "epoch": 0.19208583294877712,
      "grad_norm": 0.19422227144241333,
      "learning_rate": 0.00019488042365754758,
      "loss": 0.9198,
      "step": 1665
    },
    {
      "epoch": 0.19266266728195663,
      "grad_norm": 0.1838252693414688,
      "learning_rate": 0.0001948166293613314,
      "loss": 0.9501,
      "step": 1670
    },
    {
      "epoch": 0.19323950161513614,
      "grad_norm": 0.1975485235452652,
      "learning_rate": 0.00019475245062954523,
      "loss": 0.9422,
      "step": 1675
    },
    {
      "epoch": 0.19381633594831565,
      "grad_norm": 0.19868746399879456,
      "learning_rate": 0.00019468788772240286,
      "loss": 0.9407,
      "step": 1680
    },
    {
      "epoch": 0.19439317028149516,
      "grad_norm": 0.19394518435001373,
      "learning_rate": 0.00019462294090167554,
      "loss": 0.9775,
      "step": 1685
    },
    {
      "epoch": 0.19497000461467467,
      "grad_norm": 0.19594334065914154,
      "learning_rate": 0.0001945576104306913,
      "loss": 0.9393,
      "step": 1690
    },
    {
      "epoch": 0.19554683894785418,
      "grad_norm": 0.18195070326328278,
      "learning_rate": 0.00019449189657433358,
      "loss": 0.9847,
      "step": 1695
    },
    {
      "epoch": 0.1961236732810337,
      "grad_norm": 0.19553768634796143,
      "learning_rate": 0.00019442579959904024,
      "loss": 0.946,
      "step": 1700
    },
    {
      "epoch": 0.1967005076142132,
      "grad_norm": 0.18642808496952057,
      "learning_rate": 0.0001943593197728026,
      "loss": 0.938,
      "step": 1705
    },
    {
      "epoch": 0.19727734194739271,
      "grad_norm": 0.18931232392787933,
      "learning_rate": 0.00019429245736516415,
      "loss": 0.9308,
      "step": 1710
    },
    {
      "epoch": 0.19785417628057222,
      "grad_norm": 0.1960645318031311,
      "learning_rate": 0.00019422521264721962,
      "loss": 0.9677,
      "step": 1715
    },
    {
      "epoch": 0.19843101061375173,
      "grad_norm": 0.18322202563285828,
      "learning_rate": 0.00019415758589161385,
      "loss": 0.9631,
      "step": 1720
    },
    {
      "epoch": 0.19900784494693124,
      "grad_norm": 0.19517329335212708,
      "learning_rate": 0.0001940895773725406,
      "loss": 0.9644,
      "step": 1725
    },
    {
      "epoch": 0.19958467928011075,
      "grad_norm": 0.19266700744628906,
      "learning_rate": 0.00019402118736574155,
      "loss": 0.9747,
      "step": 1730
    },
    {
      "epoch": 0.20016151361329027,
      "grad_norm": 0.18284273147583008,
      "learning_rate": 0.00019395241614850504,
      "loss": 0.922,
      "step": 1735
    },
    {
      "epoch": 0.20073834794646978,
      "grad_norm": 0.19635812938213348,
      "learning_rate": 0.00019388326399966515,
      "loss": 0.9793,
      "step": 1740
    },
    {
      "epoch": 0.20131518227964929,
      "grad_norm": 0.19478964805603027,
      "learning_rate": 0.00019381373119960033,
      "loss": 0.9948,
      "step": 1745
    },
    {
      "epoch": 0.2018920166128288,
      "grad_norm": 0.2036595195531845,
      "learning_rate": 0.00019374381803023252,
      "loss": 1.0269,
      "step": 1750
    },
    {
      "epoch": 0.2024688509460083,
      "grad_norm": 0.20682990550994873,
      "learning_rate": 0.00019367352477502576,
      "loss": 0.9985,
      "step": 1755
    },
    {
      "epoch": 0.20304568527918782,
      "grad_norm": 0.178926020860672,
      "learning_rate": 0.0001936028517189852,
      "loss": 0.9763,
      "step": 1760
    },
    {
      "epoch": 0.20362251961236733,
      "grad_norm": 0.19505412876605988,
      "learning_rate": 0.00019353179914865596,
      "loss": 0.9484,
      "step": 1765
    },
    {
      "epoch": 0.20419935394554684,
      "grad_norm": 0.20620250701904297,
      "learning_rate": 0.00019346036735212177,
      "loss": 0.9963,
      "step": 1770
    },
    {
      "epoch": 0.20477618827872635,
      "grad_norm": 0.19574880599975586,
      "learning_rate": 0.00019338855661900405,
      "loss": 0.9976,
      "step": 1775
    },
    {
      "epoch": 0.20535302261190586,
      "grad_norm": 0.19267630577087402,
      "learning_rate": 0.00019331636724046058,
      "loss": 0.9666,
      "step": 1780
    },
    {
      "epoch": 0.20592985694508537,
      "grad_norm": 0.18831545114517212,
      "learning_rate": 0.00019324379950918437,
      "loss": 1.0032,
      "step": 1785
    },
    {
      "epoch": 0.20650669127826488,
      "grad_norm": 0.19826582074165344,
      "learning_rate": 0.00019317085371940246,
      "loss": 0.9847,
      "step": 1790
    },
    {
      "epoch": 0.20708352561144439,
      "grad_norm": 0.19098900258541107,
      "learning_rate": 0.00019309753016687477,
      "loss": 0.9777,
      "step": 1795
    },
    {
      "epoch": 0.2076603599446239,
      "grad_norm": 0.19980227947235107,
      "learning_rate": 0.00019302382914889284,
      "loss": 0.9907,
      "step": 1800
    },
    {
      "epoch": 0.2082371942778034,
      "grad_norm": 0.18145912885665894,
      "learning_rate": 0.00019294975096427862,
      "loss": 0.9465,
      "step": 1805
    },
    {
      "epoch": 0.20881402861098292,
      "grad_norm": 0.18919304013252258,
      "learning_rate": 0.00019287529591338333,
      "loss": 0.9272,
      "step": 1810
    },
    {
      "epoch": 0.20939086294416243,
      "grad_norm": 0.18271850049495697,
      "learning_rate": 0.0001928004642980862,
      "loss": 0.9525,
      "step": 1815
    },
    {
      "epoch": 0.20996769727734194,
      "grad_norm": 0.21809083223342896,
      "learning_rate": 0.00019272525642179323,
      "loss": 0.972,
      "step": 1820
    },
    {
      "epoch": 0.21054453161052145,
      "grad_norm": 0.19276146590709686,
      "learning_rate": 0.00019264967258943595,
      "loss": 0.9335,
      "step": 1825
    },
    {
      "epoch": 0.21112136594370096,
      "grad_norm": 0.18455006182193756,
      "learning_rate": 0.0001925737131074703,
      "loss": 0.9558,
      "step": 1830
    },
    {
      "epoch": 0.2116982002768805,
      "grad_norm": 0.2075817883014679,
      "learning_rate": 0.00019249737828387522,
      "loss": 1.0067,
      "step": 1835
    },
    {
      "epoch": 0.21227503461006,
      "grad_norm": 0.18990999460220337,
      "learning_rate": 0.00019242066842815146,
      "loss": 0.9964,
      "step": 1840
    },
    {
      "epoch": 0.21285186894323951,
      "grad_norm": 0.19079752266407013,
      "learning_rate": 0.00019234358385132038,
      "loss": 0.9827,
      "step": 1845
    },
    {
      "epoch": 0.21342870327641902,
      "grad_norm": 0.187656432390213,
      "learning_rate": 0.00019226612486592271,
      "loss": 0.9619,
      "step": 1850
    },
    {
      "epoch": 0.21400553760959853,
      "grad_norm": 0.19485455751419067,
      "learning_rate": 0.00019218829178601713,
      "loss": 0.9874,
      "step": 1855
    },
    {
      "epoch": 0.21458237194277804,
      "grad_norm": 0.19166654348373413,
      "learning_rate": 0.00019211008492717914,
      "loss": 1.0142,
      "step": 1860
    },
    {
      "epoch": 0.21515920627595755,
      "grad_norm": 0.20272095501422882,
      "learning_rate": 0.0001920315046064997,
      "loss": 0.9981,
      "step": 1865
    },
    {
      "epoch": 0.21573604060913706,
      "grad_norm": 0.18274882435798645,
      "learning_rate": 0.00019195255114258408,
      "loss": 0.9761,
      "step": 1870
    },
    {
      "epoch": 0.21631287494231657,
      "grad_norm": 0.18468588590621948,
      "learning_rate": 0.00019187322485555031,
      "loss": 0.9657,
      "step": 1875
    },
    {
      "epoch": 0.21688970927549608,
      "grad_norm": 0.19172626733779907,
      "learning_rate": 0.00019179352606702813,
      "loss": 0.9432,
      "step": 1880
    },
    {
      "epoch": 0.2174665436086756,
      "grad_norm": 0.1883264034986496,
      "learning_rate": 0.00019171345510015758,
      "loss": 0.9939,
      "step": 1885
    },
    {
      "epoch": 0.2180433779418551,
      "grad_norm": 0.1864314079284668,
      "learning_rate": 0.0001916330122795877,
      "loss": 1.0028,
      "step": 1890
    },
    {
      "epoch": 0.21862021227503461,
      "grad_norm": 0.19553621113300323,
      "learning_rate": 0.00019155219793147522,
      "loss": 0.9866,
      "step": 1895
    },
    {
      "epoch": 0.21919704660821412,
      "grad_norm": 0.1937098354101181,
      "learning_rate": 0.00019147101238348326,
      "loss": 0.9515,
      "step": 1900
    },
    {
      "epoch": 0.21977388094139363,
      "grad_norm": 0.1826750636100769,
      "learning_rate": 0.00019138945596477994,
      "loss": 0.9832,
      "step": 1905
    },
    {
      "epoch": 0.22035071527457314,
      "grad_norm": 0.19543537497520447,
      "learning_rate": 0.00019130752900603702,
      "loss": 0.9677,
      "step": 1910
    },
    {
      "epoch": 0.22092754960775265,
      "grad_norm": 0.18481284379959106,
      "learning_rate": 0.00019122523183942879,
      "loss": 0.9081,
      "step": 1915
    },
    {
      "epoch": 0.22150438394093216,
      "grad_norm": 0.17375807464122772,
      "learning_rate": 0.00019114256479863038,
      "loss": 0.9674,
      "step": 1920
    },
    {
      "epoch": 0.22208121827411167,
      "grad_norm": 0.19649936258792877,
      "learning_rate": 0.00019105952821881668,
      "loss": 0.9693,
      "step": 1925
    },
    {
      "epoch": 0.22265805260729118,
      "grad_norm": 0.18931066989898682,
      "learning_rate": 0.00019097612243666086,
      "loss": 0.9665,
      "step": 1930
    },
    {
      "epoch": 0.2232348869404707,
      "grad_norm": 0.18258830904960632,
      "learning_rate": 0.00019089234779033306,
      "loss": 0.9844,
      "step": 1935
    },
    {
      "epoch": 0.2238117212736502,
      "grad_norm": 0.2044518142938614,
      "learning_rate": 0.00019080820461949886,
      "loss": 0.9293,
      "step": 1940
    },
    {
      "epoch": 0.22438855560682971,
      "grad_norm": 0.1841280460357666,
      "learning_rate": 0.00019072369326531824,
      "loss": 0.9823,
      "step": 1945
    },
    {
      "epoch": 0.22496538994000922,
      "grad_norm": 0.1927040070295334,
      "learning_rate": 0.00019063881407044373,
      "loss": 0.993,
      "step": 1950
    },
    {
      "epoch": 0.22554222427318874,
      "grad_norm": 0.19881246984004974,
      "learning_rate": 0.00019055356737901952,
      "loss": 0.9679,
      "step": 1955
    },
    {
      "epoch": 0.22611905860636825,
      "grad_norm": 0.18685629963874817,
      "learning_rate": 0.00019046795353667965,
      "loss": 0.961,
      "step": 1960
    },
    {
      "epoch": 0.22669589293954776,
      "grad_norm": 0.1891525834798813,
      "learning_rate": 0.00019038197289054684,
      "loss": 0.9496,
      "step": 1965
    },
    {
      "epoch": 0.22727272727272727,
      "grad_norm": 0.1977647989988327,
      "learning_rate": 0.00019029562578923106,
      "loss": 0.9804,
      "step": 1970
    },
    {
      "epoch": 0.22784956160590678,
      "grad_norm": 0.19999738037586212,
      "learning_rate": 0.000190208912582828,
      "loss": 1.0139,
      "step": 1975
    },
    {
      "epoch": 0.22842639593908629,
      "grad_norm": 0.18472820520401,
      "learning_rate": 0.0001901218336229178,
      "loss": 0.989,
      "step": 1980
    },
    {
      "epoch": 0.2290032302722658,
      "grad_norm": 0.18772737681865692,
      "learning_rate": 0.0001900343892625635,
      "loss": 0.9869,
      "step": 1985
    },
    {
      "epoch": 0.2295800646054453,
      "grad_norm": 0.21390481293201447,
      "learning_rate": 0.00018994657985630972,
      "loss": 0.9556,
      "step": 1990
    },
    {
      "epoch": 0.23015689893862482,
      "grad_norm": 0.18885917961597443,
      "learning_rate": 0.00018985840576018107,
      "loss": 0.951,
      "step": 1995
    },
    {
      "epoch": 0.23073373327180433,
      "grad_norm": 0.18941619992256165,
      "learning_rate": 0.00018976986733168093,
      "loss": 0.9348,
      "step": 2000
    },
    {
      "epoch": 0.23131056760498384,
      "grad_norm": 0.18609663844108582,
      "learning_rate": 0.00018968096492978976,
      "loss": 0.9704,
      "step": 2005
    },
    {
      "epoch": 0.23188740193816337,
      "grad_norm": 0.19215025007724762,
      "learning_rate": 0.0001895916989149638,
      "loss": 0.9188,
      "step": 2010
    },
    {
      "epoch": 0.23246423627134288,
      "grad_norm": 0.18575677275657654,
      "learning_rate": 0.00018950206964913355,
      "loss": 0.9793,
      "step": 2015
    },
    {
      "epoch": 0.2330410706045224,
      "grad_norm": 0.198299378156662,
      "learning_rate": 0.00018941207749570237,
      "loss": 0.9936,
      "step": 2020
    },
    {
      "epoch": 0.2336179049377019,
      "grad_norm": 0.1827745884656906,
      "learning_rate": 0.0001893217228195449,
      "loss": 0.9306,
      "step": 2025
    },
    {
      "epoch": 0.2341947392708814,
      "grad_norm": 0.19425687193870544,
      "learning_rate": 0.00018923100598700561,
      "loss": 1.0467,
      "step": 2030
    },
    {
      "epoch": 0.23477157360406092,
      "grad_norm": 0.18473173677921295,
      "learning_rate": 0.00018913992736589746,
      "loss": 1.0026,
      "step": 2035
    },
    {
      "epoch": 0.23534840793724043,
      "grad_norm": 0.1885528862476349,
      "learning_rate": 0.0001890484873255001,
      "loss": 0.9516,
      "step": 2040
    },
    {
      "epoch": 0.23592524227041994,
      "grad_norm": 0.19046476483345032,
      "learning_rate": 0.00018895668623655873,
      "loss": 0.9957,
      "step": 2045
    },
    {
      "epoch": 0.23650207660359945,
      "grad_norm": 0.1952226310968399,
      "learning_rate": 0.0001888645244712824,
      "loss": 0.98,
      "step": 2050
    },
    {
      "epoch": 0.23707891093677896,
      "grad_norm": 0.1928633153438568,
      "learning_rate": 0.00018877200240334236,
      "loss": 1.0137,
      "step": 2055
    },
    {
      "epoch": 0.23765574526995847,
      "grad_norm": 0.1882990151643753,
      "learning_rate": 0.00018867912040787096,
      "loss": 1.0047,
      "step": 2060
    },
    {
      "epoch": 0.23823257960313798,
      "grad_norm": 0.20092810690402985,
      "learning_rate": 0.00018858587886145975,
      "loss": 0.9555,
      "step": 2065
    },
    {
      "epoch": 0.2388094139363175,
      "grad_norm": 0.19786730408668518,
      "learning_rate": 0.00018849227814215805,
      "loss": 0.9894,
      "step": 2070
    },
    {
      "epoch": 0.239386248269497,
      "grad_norm": 0.19983135163784027,
      "learning_rate": 0.00018839831862947152,
      "loss": 0.9391,
      "step": 2075
    },
    {
      "epoch": 0.23996308260267651,
      "grad_norm": 0.19174017012119293,
      "learning_rate": 0.00018830400070436057,
      "loss": 0.9526,
      "step": 2080
    },
    {
      "epoch": 0.24053991693585602,
      "grad_norm": 0.1841077357530594,
      "learning_rate": 0.00018820932474923873,
      "loss": 0.9744,
      "step": 2085
    },
    {
      "epoch": 0.24111675126903553,
      "grad_norm": 0.19150149822235107,
      "learning_rate": 0.00018811429114797123,
      "loss": 0.9676,
      "step": 2090
    },
    {
      "epoch": 0.24169358560221504,
      "grad_norm": 0.20261262357234955,
      "learning_rate": 0.00018801890028587333,
      "loss": 0.9706,
      "step": 2095
    },
    {
      "epoch": 0.24227041993539455,
      "grad_norm": 0.199992835521698,
      "learning_rate": 0.0001879231525497089,
      "loss": 1.0026,
      "step": 2100
    },
    {
      "epoch": 0.24284725426857406,
      "grad_norm": 0.20790398120880127,
      "learning_rate": 0.0001878270483276886,
      "loss": 1.0086,
      "step": 2105
    },
    {
      "epoch": 0.24342408860175357,
      "grad_norm": 0.1923636943101883,
      "learning_rate": 0.00018773058800946858,
      "loss": 0.932,
      "step": 2110
    },
    {
      "epoch": 0.24400092293493308,
      "grad_norm": 0.18227846920490265,
      "learning_rate": 0.00018763377198614887,
      "loss": 0.9612,
      "step": 2115
    },
    {
      "epoch": 0.2445777572681126,
      "grad_norm": 0.18197417259216309,
      "learning_rate": 0.00018753660065027152,
      "loss": 1.0405,
      "step": 2120
    },
    {
      "epoch": 0.2451545916012921,
      "grad_norm": 0.1864314079284668,
      "learning_rate": 0.00018743907439581933,
      "loss": 0.9918,
      "step": 2125
    },
    {
      "epoch": 0.24573142593447161,
      "grad_norm": 0.18296851217746735,
      "learning_rate": 0.0001873411936182141,
      "loss": 0.9839,
      "step": 2130
    },
    {
      "epoch": 0.24630826026765112,
      "grad_norm": 0.20536720752716064,
      "learning_rate": 0.000187242958714315,
      "loss": 0.9955,
      "step": 2135
    },
    {
      "epoch": 0.24688509460083063,
      "grad_norm": 0.18991383910179138,
      "learning_rate": 0.00018714437008241709,
      "loss": 0.9564,
      "step": 2140
    },
    {
      "epoch": 0.24746192893401014,
      "grad_norm": 0.21389083564281464,
      "learning_rate": 0.00018704542812224956,
      "loss": 1.0121,
      "step": 2145
    },
    {
      "epoch": 0.24803876326718965,
      "grad_norm": 0.19424885511398315,
      "learning_rate": 0.00018694613323497422,
      "loss": 0.9919,
      "step": 2150
    },
    {
      "epoch": 0.24861559760036916,
      "grad_norm": 0.20211443305015564,
      "learning_rate": 0.0001868464858231838,
      "loss": 1.0288,
      "step": 2155
    },
    {
      "epoch": 0.24919243193354867,
      "grad_norm": 0.1808236688375473,
      "learning_rate": 0.0001867464862909004,
      "loss": 0.8799,
      "step": 2160
    },
    {
      "epoch": 0.24976926626672818,
      "grad_norm": 0.19431866705417633,
      "learning_rate": 0.00018664613504357366,
      "loss": 0.942,
      "step": 2165
    },
    {
      "epoch": 0.2503461005999077,
      "grad_norm": 0.18865206837654114,
      "learning_rate": 0.0001865454324880794,
      "loss": 0.9667,
      "step": 2170
    },
    {
      "epoch": 0.2509229349330872,
      "grad_norm": 0.1844114363193512,
      "learning_rate": 0.00018644437903271778,
      "loss": 0.9783,
      "step": 2175
    },
    {
      "epoch": 0.2514997692662667,
      "grad_norm": 0.19228576123714447,
      "learning_rate": 0.00018634297508721167,
      "loss": 0.9547,
      "step": 2180
    },
    {
      "epoch": 0.2520766035994462,
      "grad_norm": 0.19523252546787262,
      "learning_rate": 0.00018624122106270506,
      "loss": 0.944,
      "step": 2185
    },
    {
      "epoch": 0.25265343793262574,
      "grad_norm": 0.18889620900154114,
      "learning_rate": 0.00018613911737176125,
      "loss": 0.9372,
      "step": 2190
    },
    {
      "epoch": 0.25323027226580525,
      "grad_norm": 0.1964087188243866,
      "learning_rate": 0.0001860366644283613,
      "loss": 0.9885,
      "step": 2195
    },
    {
      "epoch": 0.25380710659898476,
      "grad_norm": 0.19151298701763153,
      "learning_rate": 0.00018593386264790243,
      "loss": 1.023,
      "step": 2200
    },
    {
      "epoch": 0.25438394093216427,
      "grad_norm": 0.181709885597229,
      "learning_rate": 0.00018583071244719607,
      "loss": 0.8938,
      "step": 2205
    },
    {
      "epoch": 0.2549607752653438,
      "grad_norm": 0.18949133157730103,
      "learning_rate": 0.0001857272142444664,
      "loss": 0.9712,
      "step": 2210
    },
    {
      "epoch": 0.2555376095985233,
      "grad_norm": 0.19438017904758453,
      "learning_rate": 0.0001856233684593486,
      "loss": 0.9659,
      "step": 2215
    },
    {
      "epoch": 0.2561144439317028,
      "grad_norm": 0.19530200958251953,
      "learning_rate": 0.00018551917551288706,
      "loss": 1.0126,
      "step": 2220
    },
    {
      "epoch": 0.2566912782648823,
      "grad_norm": 0.19227471947669983,
      "learning_rate": 0.0001854146358275338,
      "loss": 0.9807,
      "step": 2225
    },
    {
      "epoch": 0.2572681125980618,
      "grad_norm": 0.19348041713237762,
      "learning_rate": 0.00018530974982714667,
      "loss": 0.9755,
      "step": 2230
    },
    {
      "epoch": 0.2578449469312413,
      "grad_norm": 0.2049867808818817,
      "learning_rate": 0.0001852045179369877,
      "loss": 0.9948,
      "step": 2235
    },
    {
      "epoch": 0.25842178126442084,
      "grad_norm": 0.19374214112758636,
      "learning_rate": 0.0001850989405837212,
      "loss": 1.028,
      "step": 2240
    },
    {
      "epoch": 0.25899861559760035,
      "grad_norm": 0.18960264325141907,
      "learning_rate": 0.0001849930181954124,
      "loss": 1.0199,
      "step": 2245
    },
    {
      "epoch": 0.25957544993077986,
      "grad_norm": 0.19603078067302704,
      "learning_rate": 0.00018488675120152532,
      "loss": 0.9788,
      "step": 2250
    },
    {
      "epoch": 0.26015228426395937,
      "grad_norm": 0.19418926537036896,
      "learning_rate": 0.00018478014003292116,
      "loss": 0.9406,
      "step": 2255
    },
    {
      "epoch": 0.2607291185971389,
      "grad_norm": 0.183867946267128,
      "learning_rate": 0.0001846731851218567,
      "loss": 0.9217,
      "step": 2260
    },
    {
      "epoch": 0.2613059529303184,
      "grad_norm": 0.19260871410369873,
      "learning_rate": 0.00018456588690198236,
      "loss": 0.9618,
      "step": 2265
    },
    {
      "epoch": 0.2618827872634979,
      "grad_norm": 0.1969606876373291,
      "learning_rate": 0.0001844582458083405,
      "loss": 0.9594,
      "step": 2270
    },
    {
      "epoch": 0.26245962159667746,
      "grad_norm": 0.1877615749835968,
      "learning_rate": 0.0001843502622773637,
      "loss": 0.952,
      "step": 2275
    },
    {
      "epoch": 0.26303645592985697,
      "grad_norm": 0.18486037850379944,
      "learning_rate": 0.00018424193674687297,
      "loss": 0.9699,
      "step": 2280
    },
    {
      "epoch": 0.2636132902630365,
      "grad_norm": 0.19585862755775452,
      "learning_rate": 0.00018413326965607593,
      "loss": 0.9894,
      "step": 2285
    },
    {
      "epoch": 0.264190124596216,
      "grad_norm": 0.17970632016658783,
      "learning_rate": 0.00018402426144556504,
      "loss": 0.9834,
      "step": 2290
    },
    {
      "epoch": 0.2647669589293955,
      "grad_norm": 0.19453051686286926,
      "learning_rate": 0.0001839149125573159,
      "loss": 0.9737,
      "step": 2295
    },
    {
      "epoch": 0.265343793262575,
      "grad_norm": 0.20039866864681244,
      "learning_rate": 0.00018380522343468532,
      "loss": 1.0147,
      "step": 2300
    },
    {
      "epoch": 0.2659206275957545,
      "grad_norm": 0.1953950971364975,
      "learning_rate": 0.00018369519452240973,
      "loss": 0.9651,
      "step": 2305
    },
    {
      "epoch": 0.26649746192893403,
      "grad_norm": 0.1906932145357132,
      "learning_rate": 0.00018358482626660303,
      "loss": 0.9364,
      "step": 2310
    },
    {
      "epoch": 0.26707429626211354,
      "grad_norm": 0.1904212385416031,
      "learning_rate": 0.0001834741191147552,
      "loss": 0.9498,
      "step": 2315
    },
    {
      "epoch": 0.26765113059529305,
      "grad_norm": 0.18951915204524994,
      "learning_rate": 0.00018336307351573018,
      "loss": 1.0005,
      "step": 2320
    },
    {
      "epoch": 0.26822796492847256,
      "grad_norm": 0.19409124553203583,
      "learning_rate": 0.00018325168991976408,
      "loss": 1.0019,
      "step": 2325
    },
    {
      "epoch": 0.26880479926165207,
      "grad_norm": 0.20801284909248352,
      "learning_rate": 0.00018313996877846361,
      "loss": 0.9383,
      "step": 2330
    },
    {
      "epoch": 0.2693816335948316,
      "grad_norm": 0.19471175968647003,
      "learning_rate": 0.00018302791054480394,
      "loss": 1.0274,
      "step": 2335
    },
    {
      "epoch": 0.2699584679280111,
      "grad_norm": 0.20756390690803528,
      "learning_rate": 0.00018291551567312694,
      "loss": 0.9846,
      "step": 2340
    },
    {
      "epoch": 0.2705353022611906,
      "grad_norm": 0.1832803636789322,
      "learning_rate": 0.00018280278461913952,
      "loss": 0.9467,
      "step": 2345
    },
    {
      "epoch": 0.2711121365943701,
      "grad_norm": 0.19528664648532867,
      "learning_rate": 0.00018268971783991152,
      "loss": 0.9564,
      "step": 2350
    },
    {
      "epoch": 0.2716889709275496,
      "grad_norm": 0.18837648630142212,
      "learning_rate": 0.00018257631579387412,
      "loss": 0.9511,
      "step": 2355
    },
    {
      "epoch": 0.27226580526072913,
      "grad_norm": 0.20164744555950165,
      "learning_rate": 0.0001824625789408177,
      "loss": 0.9314,
      "step": 2360
    },
    {
      "epoch": 0.27284263959390864,
      "grad_norm": 0.1876562237739563,
      "learning_rate": 0.00018234850774189018,
      "loss": 0.9518,
      "step": 2365
    },
    {
      "epoch": 0.27341947392708815,
      "grad_norm": 0.18695729970932007,
      "learning_rate": 0.00018223410265959516,
      "loss": 0.9382,
      "step": 2370
    },
    {
      "epoch": 0.27399630826026766,
      "grad_norm": 0.19776229560375214,
      "learning_rate": 0.00018211936415778984,
      "loss": 0.9793,
      "step": 2375
    },
    {
      "epoch": 0.2745731425934472,
      "grad_norm": 0.2102830559015274,
      "learning_rate": 0.0001820042927016834,
      "loss": 0.9862,
      "step": 2380
    },
    {
      "epoch": 0.2751499769266267,
      "grad_norm": 0.19167035818099976,
      "learning_rate": 0.0001818888887578349,
      "loss": 0.9348,
      "step": 2385
    },
    {
      "epoch": 0.2757268112598062,
      "grad_norm": 0.20402806997299194,
      "learning_rate": 0.00018177315279415153,
      "loss": 0.9807,
      "step": 2390
    },
    {
      "epoch": 0.2763036455929857,
      "grad_norm": 0.19756613671779633,
      "learning_rate": 0.00018165708527988664,
      "loss": 1.0354,
      "step": 2395
    },
    {
      "epoch": 0.2768804799261652,
      "grad_norm": 0.19909250736236572,
      "learning_rate": 0.00018154068668563782,
      "loss": 1.0038,
      "step": 2400
    },
    {
      "epoch": 0.2774573142593447,
      "grad_norm": 0.19040563702583313,
      "learning_rate": 0.00018142395748334513,
      "loss": 0.9219,
      "step": 2405
    },
    {
      "epoch": 0.27803414859252423,
      "grad_norm": 0.18826067447662354,
      "learning_rate": 0.000181306898146289,
      "loss": 0.9073,
      "step": 2410
    },
    {
      "epoch": 0.27861098292570374,
      "grad_norm": 0.2101050615310669,
      "learning_rate": 0.00018118950914908843,
      "loss": 0.9463,
      "step": 2415
    },
    {
      "epoch": 0.27918781725888325,
      "grad_norm": 0.19028016924858093,
      "learning_rate": 0.00018107179096769901,
      "loss": 0.9478,
      "step": 2420
    },
    {
      "epoch": 0.27976465159206276,
      "grad_norm": 0.21243633329868317,
      "learning_rate": 0.00018095374407941104,
      "loss": 0.9734,
      "step": 2425
    },
    {
      "epoch": 0.2803414859252423,
      "grad_norm": 0.19807711243629456,
      "learning_rate": 0.0001808353689628475,
      "loss": 0.9057,
      "step": 2430
    },
    {
      "epoch": 0.2809183202584218,
      "grad_norm": 0.19900068640708923,
      "learning_rate": 0.0001807166660979623,
      "loss": 0.9656,
      "step": 2435
    },
    {
      "epoch": 0.2814951545916013,
      "grad_norm": 0.18442806601524353,
      "learning_rate": 0.00018059763596603814,
      "loss": 1.0024,
      "step": 2440
    },
    {
      "epoch": 0.2820719889247808,
      "grad_norm": 0.18683601915836334,
      "learning_rate": 0.0001804782790496846,
      "loss": 0.9286,
      "step": 2445
    },
    {
      "epoch": 0.2826488232579603,
      "grad_norm": 0.1832648068666458,
      "learning_rate": 0.00018035859583283626,
      "loss": 1.0318,
      "step": 2450
    },
    {
      "epoch": 0.2832256575911398,
      "grad_norm": 0.1928461343050003,
      "learning_rate": 0.00018023858680075061,
      "loss": 0.9739,
      "step": 2455
    },
    {
      "epoch": 0.28380249192431933,
      "grad_norm": 0.19916784763336182,
      "learning_rate": 0.00018011825244000632,
      "loss": 0.9622,
      "step": 2460
    },
    {
      "epoch": 0.28437932625749884,
      "grad_norm": 0.1981884241104126,
      "learning_rate": 0.00017999759323850098,
      "loss": 0.9899,
      "step": 2465
    },
    {
      "epoch": 0.28495616059067835,
      "grad_norm": 0.19374872744083405,
      "learning_rate": 0.0001798766096854493,
      "loss": 1.0103,
      "step": 2470
    },
    {
      "epoch": 0.28553299492385786,
      "grad_norm": 0.18874534964561462,
      "learning_rate": 0.00017975530227138105,
      "loss": 0.9592,
      "step": 2475
    },
    {
      "epoch": 0.2861098292570374,
      "grad_norm": 0.1857873797416687,
      "learning_rate": 0.00017963367148813913,
      "loss": 0.9453,
      "step": 2480
    },
    {
      "epoch": 0.2866866635902169,
      "grad_norm": 0.18749921023845673,
      "learning_rate": 0.0001795117178288775,
      "loss": 0.9826,
      "step": 2485
    },
    {
      "epoch": 0.2872634979233964,
      "grad_norm": 0.19447381794452667,
      "learning_rate": 0.00017938944178805933,
      "loss": 0.9433,
      "step": 2490
    },
    {
      "epoch": 0.2878403322565759,
      "grad_norm": 0.18583492934703827,
      "learning_rate": 0.00017926684386145478,
      "loss": 0.8993,
      "step": 2495
    },
    {
      "epoch": 0.2884171665897554,
      "grad_norm": 0.21102139353752136,
      "learning_rate": 0.00017914392454613913,
      "loss": 1.0086,
      "step": 2500
    },
    {
      "epoch": 0.2889940009229349,
      "grad_norm": 0.19170348346233368,
      "learning_rate": 0.00017902068434049077,
      "loss": 0.9852,
      "step": 2505
    },
    {
      "epoch": 0.28957083525611443,
      "grad_norm": 0.1980980634689331,
      "learning_rate": 0.00017889712374418912,
      "loss": 0.9699,
      "step": 2510
    },
    {
      "epoch": 0.29014766958929394,
      "grad_norm": 0.20153307914733887,
      "learning_rate": 0.00017877324325821264,
      "loss": 0.951,
      "step": 2515
    },
    {
      "epoch": 0.29072450392247345,
      "grad_norm": 0.1984160840511322,
      "learning_rate": 0.00017864904338483676,
      "loss": 0.9704,
      "step": 2520
    },
    {
      "epoch": 0.29130133825565296,
      "grad_norm": 0.20981407165527344,
      "learning_rate": 0.00017852452462763192,
      "loss": 0.9606,
      "step": 2525
    },
    {
      "epoch": 0.2918781725888325,
      "grad_norm": 0.1887744665145874,
      "learning_rate": 0.00017839968749146142,
      "loss": 0.9673,
      "step": 2530
    },
    {
      "epoch": 0.292455006922012,
      "grad_norm": 0.2027631551027298,
      "learning_rate": 0.0001782745324824795,
      "loss": 0.9601,
      "step": 2535
    },
    {
      "epoch": 0.2930318412551915,
      "grad_norm": 0.20790278911590576,
      "learning_rate": 0.00017814906010812912,
      "loss": 0.9639,
      "step": 2540
    },
    {
      "epoch": 0.293608675588371,
      "grad_norm": 0.1964947134256363,
      "learning_rate": 0.00017802327087714016,
      "loss": 0.9848,
      "step": 2545
    },
    {
      "epoch": 0.2941855099215505,
      "grad_norm": 0.19823837280273438,
      "learning_rate": 0.00017789716529952704,
      "loss": 1.0059,
      "step": 2550
    },
    {
      "epoch": 0.29476234425473,
      "grad_norm": 0.20603398978710175,
      "learning_rate": 0.00017777074388658693,
      "loss": 0.9444,
      "step": 2555
    },
    {
      "epoch": 0.29533917858790953,
      "grad_norm": 0.20617981255054474,
      "learning_rate": 0.00017764400715089744,
      "loss": 0.9477,
      "step": 2560
    },
    {
      "epoch": 0.29591601292108904,
      "grad_norm": 0.1940777450799942,
      "learning_rate": 0.0001775169556063148,
      "loss": 0.9033,
      "step": 2565
    },
    {
      "epoch": 0.29649284725426855,
      "grad_norm": 0.2084151804447174,
      "learning_rate": 0.00017738958976797157,
      "loss": 0.9458,
      "step": 2570
    },
    {
      "epoch": 0.29706968158744806,
      "grad_norm": 0.21188698709011078,
      "learning_rate": 0.00017726191015227452,
      "loss": 1.0301,
      "step": 2575
    },
    {
      "epoch": 0.2976465159206276,
      "grad_norm": 0.2021496593952179,
      "learning_rate": 0.00017713391727690284,
      "loss": 0.972,
      "step": 2580
    },
    {
      "epoch": 0.2982233502538071,
      "grad_norm": 0.19584889709949493,
      "learning_rate": 0.0001770056116608057,
      "loss": 0.9382,
      "step": 2585
    },
    {
      "epoch": 0.2988001845869866,
      "grad_norm": 0.1912315934896469,
      "learning_rate": 0.0001768769938242003,
      "loss": 0.9805,
      "step": 2590
    },
    {
      "epoch": 0.2993770189201661,
      "grad_norm": 0.2060055434703827,
      "learning_rate": 0.0001767480642885698,
      "loss": 1.0436,
      "step": 2595
    },
    {
      "epoch": 0.2999538532533456,
      "grad_norm": 0.19867336750030518,
      "learning_rate": 0.00017661882357666105,
      "loss": 0.9456,
      "step": 2600
    },
    {
      "epoch": 0.3005306875865251,
      "grad_norm": 0.19426298141479492,
      "learning_rate": 0.00017648927221248264,
      "loss": 1.0145,
      "step": 2605
    },
    {
      "epoch": 0.30110752191970463,
      "grad_norm": 0.2108680009841919,
      "learning_rate": 0.00017635941072130268,
      "loss": 0.9791,
      "step": 2610
    },
    {
      "epoch": 0.30168435625288414,
      "grad_norm": 0.1892360895872116,
      "learning_rate": 0.00017622923962964672,
      "loss": 1.0363,
      "step": 2615
    },
    {
      "epoch": 0.30226119058606365,
      "grad_norm": 0.1932888627052307,
      "learning_rate": 0.0001760987594652956,
      "loss": 0.9866,
      "step": 2620
    },
    {
      "epoch": 0.3028380249192432,
      "grad_norm": 0.19689255952835083,
      "learning_rate": 0.00017596797075728322,
      "loss": 0.9844,
      "step": 2625
    },
    {
      "epoch": 0.30341485925242273,
      "grad_norm": 0.18802085518836975,
      "learning_rate": 0.00017583687403589454,
      "loss": 0.9317,
      "step": 2630
    },
    {
      "epoch": 0.30399169358560224,
      "grad_norm": 0.19586369395256042,
      "learning_rate": 0.0001757054698326634,
      "loss": 0.9983,
      "step": 2635
    },
    {
      "epoch": 0.30456852791878175,
      "grad_norm": 0.19697046279907227,
      "learning_rate": 0.00017557375868037026,
      "loss": 0.9707,
      "step": 2640
    },
    {
      "epoch": 0.30514536225196126,
      "grad_norm": 0.19013574719429016,
      "learning_rate": 0.0001754417411130401,
      "loss": 0.9764,
      "step": 2645
    },
    {
      "epoch": 0.30572219658514077,
      "grad_norm": 0.2056090384721756,
      "learning_rate": 0.0001753094176659403,
      "loss": 0.961,
      "step": 2650
    },
    {
      "epoch": 0.3062990309183203,
      "grad_norm": 0.18955689668655396,
      "learning_rate": 0.0001751767888755785,
      "loss": 0.9444,
      "step": 2655
    },
    {
      "epoch": 0.3068758652514998,
      "grad_norm": 0.20427672564983368,
      "learning_rate": 0.00017504385527970028,
      "loss": 1.0496,
      "step": 2660
    },
    {
      "epoch": 0.3074526995846793,
      "grad_norm": 0.20869238674640656,
      "learning_rate": 0.00017491061741728702,
      "loss": 0.9981,
      "step": 2665
    },
    {
      "epoch": 0.3080295339178588,
      "grad_norm": 0.2011020928621292,
      "learning_rate": 0.00017477707582855384,
      "loss": 0.9737,
      "step": 2670
    },
    {
      "epoch": 0.3086063682510383,
      "grad_norm": 0.20109429955482483,
      "learning_rate": 0.00017464323105494727,
      "loss": 0.9614,
      "step": 2675
    },
    {
      "epoch": 0.30918320258421783,
      "grad_norm": 0.1839284896850586,
      "learning_rate": 0.00017450908363914316,
      "loss": 1.0152,
      "step": 2680
    },
    {
      "epoch": 0.30976003691739734,
      "grad_norm": 0.24291379749774933,
      "learning_rate": 0.00017437463412504437,
      "loss": 0.9756,
      "step": 2685
    },
    {
      "epoch": 0.31033687125057685,
      "grad_norm": 0.20243053138256073,
      "learning_rate": 0.00017423988305777864,
      "loss": 0.9743,
      "step": 2690
    },
    {
      "epoch": 0.31091370558375636,
      "grad_norm": 0.20824618637561798,
      "learning_rate": 0.0001741048309836964,
      "loss": 0.9752,
      "step": 2695
    },
    {
      "epoch": 0.31149053991693587,
      "grad_norm": 0.19603955745697021,
      "learning_rate": 0.00017396947845036844,
      "loss": 0.9745,
      "step": 2700
    },
    {
      "epoch": 0.3120673742501154,
      "grad_norm": 0.2077784240245819,
      "learning_rate": 0.00017383382600658388,
      "loss": 0.9846,
      "step": 2705
    },
    {
      "epoch": 0.3126442085832949,
      "grad_norm": 0.21430762112140656,
      "learning_rate": 0.0001736978742023477,
      "loss": 1.0096,
      "step": 2710
    },
    {
      "epoch": 0.3132210429164744,
      "grad_norm": 0.18606549501419067,
      "learning_rate": 0.00017356162358887875,
      "loss": 0.9688,
      "step": 2715
    },
    {
      "epoch": 0.3137978772496539,
      "grad_norm": 0.1990072876214981,
      "learning_rate": 0.00017342507471860733,
      "loss": 0.9299,
      "step": 2720
    },
    {
      "epoch": 0.3143747115828334,
      "grad_norm": 0.191040500998497,
      "learning_rate": 0.0001732882281451731,
      "loss": 0.9946,
      "step": 2725
    },
    {
      "epoch": 0.31495154591601293,
      "grad_norm": 0.21565918624401093,
      "learning_rate": 0.0001731510844234227,
      "loss": 0.9862,
      "step": 2730
    },
    {
      "epoch": 0.31552838024919244,
      "grad_norm": 0.19910766184329987,
      "learning_rate": 0.0001730136441094076,
      "loss": 0.9582,
      "step": 2735
    },
    {
      "epoch": 0.31610521458237195,
      "grad_norm": 0.19215498864650726,
      "learning_rate": 0.00017287590776038177,
      "loss": 0.9837,
      "step": 2740
    },
    {
      "epoch": 0.31668204891555146,
      "grad_norm": 0.19923163950443268,
      "learning_rate": 0.0001727378759347995,
      "loss": 0.9536,
      "step": 2745
    },
    {
      "epoch": 0.31725888324873097,
      "grad_norm": 0.20635759830474854,
      "learning_rate": 0.0001725995491923131,
      "loss": 0.9745,
      "step": 2750
    },
    {
      "epoch": 0.3178357175819105,
      "grad_norm": 0.1964532434940338,
      "learning_rate": 0.00017246092809377058,
      "loss": 1.0143,
      "step": 2755
    },
    {
      "epoch": 0.31841255191509,
      "grad_norm": 0.21438036859035492,
      "learning_rate": 0.0001723220132012134,
      "loss": 0.9954,
      "step": 2760
    },
    {
      "epoch": 0.3189893862482695,
      "grad_norm": 0.2038785070180893,
      "learning_rate": 0.00017218280507787435,
      "loss": 1.0069,
      "step": 2765
    },
    {
      "epoch": 0.319566220581449,
      "grad_norm": 0.19554126262664795,
      "learning_rate": 0.00017204330428817496,
      "loss": 0.933,
      "step": 2770
    },
    {
      "epoch": 0.3201430549146285,
      "grad_norm": 0.20744913816452026,
      "learning_rate": 0.00017190351139772348,
      "loss": 0.9584,
      "step": 2775
    },
    {
      "epoch": 0.32071988924780803,
      "grad_norm": 0.20495720207691193,
      "learning_rate": 0.00017176342697331246,
      "loss": 0.9706,
      "step": 2780
    },
    {
      "epoch": 0.32129672358098754,
      "grad_norm": 0.20170055329799652,
      "learning_rate": 0.00017162305158291655,
      "loss": 0.9542,
      "step": 2785
    },
    {
      "epoch": 0.32187355791416705,
      "grad_norm": 0.18749262392520905,
      "learning_rate": 0.00017148238579568995,
      "loss": 0.8963,
      "step": 2790
    },
    {
      "epoch": 0.32245039224734656,
      "grad_norm": 0.2015407681465149,
      "learning_rate": 0.00017134143018196447,
      "loss": 0.9572,
      "step": 2795
    },
    {
      "epoch": 0.3230272265805261,
      "grad_norm": 0.197077214717865,
      "learning_rate": 0.00017120018531324689,
      "loss": 1.0026,
      "step": 2800
    },
    {
      "epoch": 0.3236040609137056,
      "grad_norm": 0.2059078812599182,
      "learning_rate": 0.00017105865176221684,
      "loss": 0.9523,
      "step": 2805
    },
    {
      "epoch": 0.3241808952468851,
      "grad_norm": 0.19224663078784943,
      "learning_rate": 0.00017091683010272447,
      "loss": 0.9887,
      "step": 2810
    },
    {
      "epoch": 0.3247577295800646,
      "grad_norm": 0.20333868265151978,
      "learning_rate": 0.00017077472090978798,
      "loss": 0.9438,
      "step": 2815
    },
    {
      "epoch": 0.3253345639132441,
      "grad_norm": 0.2038094699382782,
      "learning_rate": 0.00017063232475959133,
      "loss": 0.9884,
      "step": 2820
    },
    {
      "epoch": 0.3259113982464236,
      "grad_norm": 0.19815954566001892,
      "learning_rate": 0.00017048964222948217,
      "loss": 0.9506,
      "step": 2825
    },
    {
      "epoch": 0.32648823257960313,
      "grad_norm": 0.19894501566886902,
      "learning_rate": 0.00017034667389796904,
      "loss": 0.9787,
      "step": 2830
    },
    {
      "epoch": 0.32706506691278264,
      "grad_norm": 0.19535242021083832,
      "learning_rate": 0.00017020342034471944,
      "loss": 0.9805,
      "step": 2835
    },
    {
      "epoch": 0.32764190124596215,
      "grad_norm": 0.2212100327014923,
      "learning_rate": 0.00017005988215055718,
      "loss": 0.9826,
      "step": 2840
    },
    {
      "epoch": 0.32821873557914166,
      "grad_norm": 0.20346590876579285,
      "learning_rate": 0.00016991605989746025,
      "loss": 0.9496,
      "step": 2845
    },
    {
      "epoch": 0.3287955699123212,
      "grad_norm": 0.2145700603723526,
      "learning_rate": 0.00016977195416855828,
      "loss": 0.977,
      "step": 2850
    },
    {
      "epoch": 0.3293724042455007,
      "grad_norm": 0.18952029943466187,
      "learning_rate": 0.00016962756554813037,
      "loss": 0.8824,
      "step": 2855
    },
    {
      "epoch": 0.3299492385786802,
      "grad_norm": 0.2040802389383316,
      "learning_rate": 0.0001694828946216025,
      "loss": 0.9683,
      "step": 2860
    },
    {
      "epoch": 0.3305260729118597,
      "grad_norm": 0.19531820714473724,
      "learning_rate": 0.00016933794197554524,
      "loss": 1.005,
      "step": 2865
    },
    {
      "epoch": 0.3311029072450392,
      "grad_norm": 0.20256836712360382,
      "learning_rate": 0.00016919270819767152,
      "loss": 0.9877,
      "step": 2870
    },
    {
      "epoch": 0.3316797415782187,
      "grad_norm": 0.20027059316635132,
      "learning_rate": 0.00016904719387683407,
      "loss": 0.9758,
      "step": 2875
    },
    {
      "epoch": 0.33225657591139823,
      "grad_norm": 0.1954347789287567,
      "learning_rate": 0.00016890139960302304,
      "loss": 0.9962,
      "step": 2880
    },
    {
      "epoch": 0.33283341024457774,
      "grad_norm": 0.20312942564487457,
      "learning_rate": 0.00016875532596736373,
      "loss": 0.986,
      "step": 2885
    },
    {
      "epoch": 0.33341024457775725,
      "grad_norm": 0.20359157025814056,
      "learning_rate": 0.00016860897356211403,
      "loss": 0.9411,
      "step": 2890
    },
    {
      "epoch": 0.33398707891093676,
      "grad_norm": 0.19114482402801514,
      "learning_rate": 0.00016846234298066218,
      "loss": 0.9789,
      "step": 2895
    },
    {
      "epoch": 0.3345639132441163,
      "grad_norm": 0.19829359650611877,
      "learning_rate": 0.0001683154348175243,
      "loss": 0.9444,
      "step": 2900
    },
    {
      "epoch": 0.3351407475772958,
      "grad_norm": 0.18730616569519043,
      "learning_rate": 0.00016816824966834183,
      "loss": 0.9306,
      "step": 2905
    },
    {
      "epoch": 0.3357175819104753,
      "grad_norm": 0.2252754271030426,
      "learning_rate": 0.00016802078812987948,
      "loss": 0.9732,
      "step": 2910
    },
    {
      "epoch": 0.3362944162436548,
      "grad_norm": 0.20403608679771423,
      "learning_rate": 0.0001678730508000224,
      "loss": 0.9538,
      "step": 2915
    },
    {
      "epoch": 0.3368712505768343,
      "grad_norm": 0.23965264856815338,
      "learning_rate": 0.00016772503827777396,
      "loss": 0.9988,
      "step": 2920
    },
    {
      "epoch": 0.3374480849100138,
      "grad_norm": 0.20559944212436676,
      "learning_rate": 0.00016757675116325343,
      "loss": 0.9697,
      "step": 2925
    },
    {
      "epoch": 0.33802491924319333,
      "grad_norm": 0.19775480031967163,
      "learning_rate": 0.0001674281900576933,
      "loss": 0.9915,
      "step": 2930
    },
    {
      "epoch": 0.33860175357637284,
      "grad_norm": 0.20038728415966034,
      "learning_rate": 0.00016727935556343698,
      "loss": 0.9976,
      "step": 2935
    },
    {
      "epoch": 0.33917858790955235,
      "grad_norm": 0.20284847915172577,
      "learning_rate": 0.0001671302482839364,
      "loss": 1.0131,
      "step": 2940
    },
    {
      "epoch": 0.33975542224273186,
      "grad_norm": 0.18095138669013977,
      "learning_rate": 0.00016698086882374939,
      "loss": 0.9535,
      "step": 2945
    },
    {
      "epoch": 0.3403322565759114,
      "grad_norm": 0.18949155509471893,
      "learning_rate": 0.00016683121778853746,
      "loss": 0.9928,
      "step": 2950
    },
    {
      "epoch": 0.3409090909090909,
      "grad_norm": 0.18761120736598969,
      "learning_rate": 0.00016668129578506315,
      "loss": 0.968,
      "step": 2955
    },
    {
      "epoch": 0.3414859252422704,
      "grad_norm": 0.19716186821460724,
      "learning_rate": 0.00016653110342118764,
      "loss": 1.0469,
      "step": 2960
    },
    {
      "epoch": 0.3420627595754499,
      "grad_norm": 0.19194428622722626,
      "learning_rate": 0.0001663806413058684,
      "loss": 0.9824,
      "step": 2965
    },
    {
      "epoch": 0.3426395939086294,
      "grad_norm": 0.19304610788822174,
      "learning_rate": 0.00016622991004915645,
      "loss": 0.9739,
      "step": 2970
    },
    {
      "epoch": 0.343216428241809,
      "grad_norm": 0.21132609248161316,
      "learning_rate": 0.00016607891026219418,
      "loss": 1.0423,
      "step": 2975
    },
    {
      "epoch": 0.3437932625749885,
      "grad_norm": 0.21153788268566132,
      "learning_rate": 0.00016592764255721264,
      "loss": 0.9585,
      "step": 2980
    },
    {
      "epoch": 0.344370096908168,
      "grad_norm": 0.19157880544662476,
      "learning_rate": 0.00016577610754752925,
      "loss": 0.93,
      "step": 2985
    },
    {
      "epoch": 0.3449469312413475,
      "grad_norm": 0.2042781114578247,
      "learning_rate": 0.00016562430584754516,
      "loss": 0.9758,
      "step": 2990
    },
    {
      "epoch": 0.345523765574527,
      "grad_norm": 0.1973501592874527,
      "learning_rate": 0.00016547223807274287,
      "loss": 0.9822,
      "step": 2995
    },
    {
      "epoch": 0.34610059990770653,
      "grad_norm": 0.20457808673381805,
      "learning_rate": 0.00016531990483968357,
      "loss": 0.9275,
      "step": 3000
    },
    {
      "epoch": 0.34667743424088604,
      "grad_norm": 0.19708305597305298,
      "learning_rate": 0.00016516730676600493,
      "loss": 0.9457,
      "step": 3005
    },
    {
      "epoch": 0.34725426857406555,
      "grad_norm": 0.1883085072040558,
      "learning_rate": 0.00016501444447041824,
      "loss": 0.9429,
      "step": 3010
    },
    {
      "epoch": 0.34783110290724506,
      "grad_norm": 0.20550213754177094,
      "learning_rate": 0.00016486131857270628,
      "loss": 0.9335,
      "step": 3015
    },
    {
      "epoch": 0.34840793724042457,
      "grad_norm": 0.20632968842983246,
      "learning_rate": 0.00016470792969372039,
      "loss": 0.9335,
      "step": 3020
    },
    {
      "epoch": 0.3489847715736041,
      "grad_norm": 0.20688195526599884,
      "learning_rate": 0.00016455427845537835,
      "loss": 0.9474,
      "step": 3025
    },
    {
      "epoch": 0.3495616059067836,
      "grad_norm": 0.19943097233772278,
      "learning_rate": 0.0001644003654806616,
      "loss": 1.0254,
      "step": 3030
    },
    {
      "epoch": 0.3501384402399631,
      "grad_norm": 0.2087412327528,
      "learning_rate": 0.00016424619139361282,
      "loss": 1.0255,
      "step": 3035
    },
    {
      "epoch": 0.3507152745731426,
      "grad_norm": 0.1902855485677719,
      "learning_rate": 0.00016409175681933328,
      "loss": 0.9629,
      "step": 3040
    },
    {
      "epoch": 0.3512921089063221,
      "grad_norm": 0.19096340239048004,
      "learning_rate": 0.00016393706238398056,
      "loss": 0.9426,
      "step": 3045
    },
    {
      "epoch": 0.35186894323950163,
      "grad_norm": 0.20610526204109192,
      "learning_rate": 0.00016378210871476577,
      "loss": 0.8949,
      "step": 3050
    },
    {
      "epoch": 0.35244577757268114,
      "grad_norm": 0.20180396735668182,
      "learning_rate": 0.00016362689643995105,
      "loss": 0.9682,
      "step": 3055
    },
    {
      "epoch": 0.35302261190586065,
      "grad_norm": 0.2666982412338257,
      "learning_rate": 0.00016347142618884712,
      "loss": 0.999,
      "step": 3060
    },
    {
      "epoch": 0.35359944623904016,
      "grad_norm": 0.20752312242984772,
      "learning_rate": 0.00016331569859181062,
      "loss": 0.962,
      "step": 3065
    },
    {
      "epoch": 0.35417628057221967,
      "grad_norm": 0.21684733033180237,
      "learning_rate": 0.00016315971428024168,
      "loss": 0.9512,
      "step": 3070
    },
    {
      "epoch": 0.3547531149053992,
      "grad_norm": 0.20133435726165771,
      "learning_rate": 0.0001630034738865812,
      "loss": 0.9224,
      "step": 3075
    },
    {
      "epoch": 0.3553299492385787,
      "grad_norm": 0.1901036500930786,
      "learning_rate": 0.00016284697804430843,
      "loss": 0.928,
      "step": 3080
    },
    {
      "epoch": 0.3559067835717582,
      "grad_norm": 0.19342263042926788,
      "learning_rate": 0.00016269022738793832,
      "loss": 0.9801,
      "step": 3085
    },
    {
      "epoch": 0.3564836179049377,
      "grad_norm": 0.18987055122852325,
      "learning_rate": 0.00016253322255301887,
      "loss": 0.9403,
      "step": 3090
    },
    {
      "epoch": 0.3570604522381172,
      "grad_norm": 0.21176302433013916,
      "learning_rate": 0.0001623759641761289,
      "loss": 0.9854,
      "step": 3095
    },
    {
      "epoch": 0.35763728657129673,
      "grad_norm": 0.198608860373497,
      "learning_rate": 0.00016221845289487492,
      "loss": 0.9506,
      "step": 3100
    },
    {
      "epoch": 0.35821412090447624,
      "grad_norm": 0.1978042870759964,
      "learning_rate": 0.00016206068934788905,
      "loss": 0.9745,
      "step": 3105
    },
    {
      "epoch": 0.35879095523765575,
      "grad_norm": 0.20335493981838226,
      "learning_rate": 0.0001619026741748262,
      "loss": 0.9575,
      "step": 3110
    },
    {
      "epoch": 0.35936778957083526,
      "grad_norm": 0.2059904783964157,
      "learning_rate": 0.00016174440801636138,
      "loss": 0.9237,
      "step": 3115
    },
    {
      "epoch": 0.35994462390401477,
      "grad_norm": 0.20635630190372467,
      "learning_rate": 0.0001615858915141874,
      "loss": 0.9222,
      "step": 3120
    },
    {
      "epoch": 0.3605214582371943,
      "grad_norm": 0.2041216492652893,
      "learning_rate": 0.00016142712531101196,
      "loss": 0.9432,
      "step": 3125
    },
    {
      "epoch": 0.3610982925703738,
      "grad_norm": 0.2060927152633667,
      "learning_rate": 0.0001612681100505552,
      "loss": 0.9993,
      "step": 3130
    },
    {
      "epoch": 0.3616751269035533,
      "grad_norm": 0.21824884414672852,
      "learning_rate": 0.00016110884637754713,
      "loss": 0.9735,
      "step": 3135
    },
    {
      "epoch": 0.3622519612367328,
      "grad_norm": 0.20615214109420776,
      "learning_rate": 0.00016094933493772487,
      "loss": 1.022,
      "step": 3140
    },
    {
      "epoch": 0.3628287955699123,
      "grad_norm": 0.19122281670570374,
      "learning_rate": 0.00016078957637783017,
      "loss": 0.9451,
      "step": 3145
    },
    {
      "epoch": 0.36340562990309183,
      "grad_norm": 0.20327936112880707,
      "learning_rate": 0.00016062957134560675,
      "loss": 0.939,
      "step": 3150
    },
    {
      "epoch": 0.36398246423627134,
      "grad_norm": 0.18740370869636536,
      "learning_rate": 0.0001604693204897975,
      "loss": 0.9801,
      "step": 3155
    },
    {
      "epoch": 0.36455929856945085,
      "grad_norm": 0.21493248641490936,
      "learning_rate": 0.00016030882446014234,
      "loss": 1.0288,
      "step": 3160
    },
    {
      "epoch": 0.36513613290263036,
      "grad_norm": 0.21232721209526062,
      "learning_rate": 0.00016014808390737485,
      "loss": 0.9975,
      "step": 3165
    },
    {
      "epoch": 0.36571296723580987,
      "grad_norm": 0.1946122944355011,
      "learning_rate": 0.00015998709948322027,
      "loss": 0.9658,
      "step": 3170
    },
    {
      "epoch": 0.3662898015689894,
      "grad_norm": 0.18985234200954437,
      "learning_rate": 0.00015982587184039263,
      "loss": 0.9608,
      "step": 3175
    },
    {
      "epoch": 0.3668666359021689,
      "grad_norm": 0.21031589806079865,
      "learning_rate": 0.00015966440163259202,
      "loss": 0.9316,
      "step": 3180
    },
    {
      "epoch": 0.3674434702353484,
      "grad_norm": 0.19520461559295654,
      "learning_rate": 0.00015950268951450198,
      "loss": 0.9502,
      "step": 3185
    },
    {
      "epoch": 0.3680203045685279,
      "grad_norm": 0.20601095259189606,
      "learning_rate": 0.00015934073614178696,
      "loss": 0.9511,
      "step": 3190
    },
    {
      "epoch": 0.3685971389017074,
      "grad_norm": 0.19720254838466644,
      "learning_rate": 0.00015917854217108954,
      "loss": 0.9694,
      "step": 3195
    },
    {
      "epoch": 0.36917397323488693,
      "grad_norm": 0.19519665837287903,
      "learning_rate": 0.00015901610826002787,
      "loss": 0.9564,
      "step": 3200
    },
    {
      "epoch": 0.36975080756806644,
      "grad_norm": 0.21351169049739838,
      "learning_rate": 0.0001588534350671928,
      "loss": 0.9541,
      "step": 3205
    },
    {
      "epoch": 0.37032764190124595,
      "grad_norm": 0.214279443025589,
      "learning_rate": 0.00015869052325214554,
      "loss": 0.9811,
      "step": 3210
    },
    {
      "epoch": 0.37090447623442546,
      "grad_norm": 0.2172657698392868,
      "learning_rate": 0.00015852737347541465,
      "loss": 0.9756,
      "step": 3215
    },
    {
      "epoch": 0.37148131056760497,
      "grad_norm": 0.1879701018333435,
      "learning_rate": 0.00015836398639849355,
      "loss": 0.9628,
      "step": 3220
    },
    {
      "epoch": 0.3720581449007845,
      "grad_norm": 0.2050919383764267,
      "learning_rate": 0.00015820036268383785,
      "loss": 0.9715,
      "step": 3225
    },
    {
      "epoch": 0.372634979233964,
      "grad_norm": 0.18730495870113373,
      "learning_rate": 0.00015803650299486252,
      "loss": 0.9706,
      "step": 3230
    },
    {
      "epoch": 0.3732118135671435,
      "grad_norm": 0.19974547624588013,
      "learning_rate": 0.00015787240799593937,
      "loss": 0.9911,
      "step": 3235
    },
    {
      "epoch": 0.373788647900323,
      "grad_norm": 0.1979796588420868,
      "learning_rate": 0.00015770807835239424,
      "loss": 0.9154,
      "step": 3240
    },
    {
      "epoch": 0.3743654822335025,
      "grad_norm": 0.2047133445739746,
      "learning_rate": 0.00015754351473050435,
      "loss": 0.9593,
      "step": 3245
    },
    {
      "epoch": 0.37494231656668203,
      "grad_norm": 0.19649125635623932,
      "learning_rate": 0.0001573787177974956,
      "loss": 0.9831,
      "step": 3250
    },
    {
      "epoch": 0.37551915089986154,
      "grad_norm": 0.2003108412027359,
      "learning_rate": 0.00015721368822153986,
      "loss": 0.876,
      "step": 3255
    },
    {
      "epoch": 0.37609598523304105,
      "grad_norm": 0.21534393727779388,
      "learning_rate": 0.0001570484266717522,
      "loss": 1.0213,
      "step": 3260
    },
    {
      "epoch": 0.37667281956622056,
      "grad_norm": 0.19399525225162506,
      "learning_rate": 0.00015688293381818823,
      "loss": 0.9159,
      "step": 3265
    },
    {
      "epoch": 0.3772496538994001,
      "grad_norm": 0.19937078654766083,
      "learning_rate": 0.0001567172103318415,
      "loss": 0.9598,
      "step": 3270
    },
    {
      "epoch": 0.3778264882325796,
      "grad_norm": 0.1996837705373764,
      "learning_rate": 0.00015655125688464062,
      "loss": 0.9606,
      "step": 3275
    },
    {
      "epoch": 0.3784033225657591,
      "grad_norm": 0.2030685842037201,
      "learning_rate": 0.00015638507414944642,
      "loss": 0.9623,
      "step": 3280
    },
    {
      "epoch": 0.3789801568989386,
      "grad_norm": 0.20788177847862244,
      "learning_rate": 0.0001562186628000496,
      "loss": 1.0118,
      "step": 3285
    },
    {
      "epoch": 0.3795569912321181,
      "grad_norm": 0.19508282840251923,
      "learning_rate": 0.00015605202351116765,
      "loss": 0.9457,
      "step": 3290
    },
    {
      "epoch": 0.3801338255652976,
      "grad_norm": 0.19498229026794434,
      "learning_rate": 0.00015588515695844234,
      "loss": 0.9243,
      "step": 3295
    },
    {
      "epoch": 0.38071065989847713,
      "grad_norm": 0.2012222409248352,
      "learning_rate": 0.00015571806381843676,
      "loss": 0.9631,
      "step": 3300
    },
    {
      "epoch": 0.38128749423165664,
      "grad_norm": 0.1890476495027542,
      "learning_rate": 0.00015555074476863282,
      "loss": 0.931,
      "step": 3305
    },
    {
      "epoch": 0.38186432856483615,
      "grad_norm": 0.19449810683727264,
      "learning_rate": 0.00015538320048742835,
      "loss": 0.945,
      "step": 3310
    },
    {
      "epoch": 0.38244116289801566,
      "grad_norm": 0.19659358263015747,
      "learning_rate": 0.00015521543165413428,
      "loss": 0.9052,
      "step": 3315
    },
    {
      "epoch": 0.3830179972311952,
      "grad_norm": 0.1930277943611145,
      "learning_rate": 0.00015504743894897218,
      "loss": 0.9379,
      "step": 3320
    },
    {
      "epoch": 0.38359483156437474,
      "grad_norm": 0.1968124955892563,
      "learning_rate": 0.00015487922305307118,
      "loss": 0.9839,
      "step": 3325
    },
    {
      "epoch": 0.38417166589755425,
      "grad_norm": 0.20579907298088074,
      "learning_rate": 0.0001547107846484653,
      "loss": 0.9653,
      "step": 3330
    },
    {
      "epoch": 0.38474850023073376,
      "grad_norm": 0.193470761179924,
      "learning_rate": 0.00015454212441809095,
      "loss": 1.0168,
      "step": 3335
    },
    {
      "epoch": 0.38532533456391327,
      "grad_norm": 0.20029513537883759,
      "learning_rate": 0.00015437324304578363,
      "loss": 1.0071,
      "step": 3340
    },
    {
      "epoch": 0.3859021688970928,
      "grad_norm": 0.19662067294120789,
      "learning_rate": 0.00015420414121627575,
      "loss": 0.9741,
      "step": 3345
    },
    {
      "epoch": 0.3864790032302723,
      "grad_norm": 0.21813516318798065,
      "learning_rate": 0.00015403481961519334,
      "loss": 1.012,
      "step": 3350
    },
    {
      "epoch": 0.3870558375634518,
      "grad_norm": 0.19463011622428894,
      "learning_rate": 0.00015386527892905365,
      "loss": 0.9786,
      "step": 3355
    },
    {
      "epoch": 0.3876326718966313,
      "grad_norm": 0.19774432480335236,
      "learning_rate": 0.0001536955198452621,
      "loss": 0.9776,
      "step": 3360
    },
    {
      "epoch": 0.3882095062298108,
      "grad_norm": 0.2053624838590622,
      "learning_rate": 0.0001535255430521097,
      "loss": 0.9645,
      "step": 3365
    },
    {
      "epoch": 0.38878634056299033,
      "grad_norm": 0.20018287003040314,
      "learning_rate": 0.00015335534923877013,
      "loss": 0.9489,
      "step": 3370
    },
    {
      "epoch": 0.38936317489616984,
      "grad_norm": 0.1886557936668396,
      "learning_rate": 0.000153184939095297,
      "loss": 0.9604,
      "step": 3375
    },
    {
      "epoch": 0.38994000922934935,
      "grad_norm": 0.19467659294605255,
      "learning_rate": 0.00015301431331262095,
      "loss": 0.9485,
      "step": 3380
    },
    {
      "epoch": 0.39051684356252886,
      "grad_norm": 0.20318065583705902,
      "learning_rate": 0.00015284347258254704,
      "loss": 0.9876,
      "step": 3385
    },
    {
      "epoch": 0.39109367789570837,
      "grad_norm": 0.1873011291027069,
      "learning_rate": 0.0001526724175977518,
      "loss": 0.9538,
      "step": 3390
    },
    {
      "epoch": 0.3916705122288879,
      "grad_norm": 0.19449511170387268,
      "learning_rate": 0.0001525011490517805,
      "loss": 0.9369,
      "step": 3395
    },
    {
      "epoch": 0.3922473465620674,
      "grad_norm": 0.19777143001556396,
      "learning_rate": 0.00015232966763904416,
      "loss": 0.9926,
      "step": 3400
    },
    {
      "epoch": 0.3928241808952469,
      "grad_norm": 0.20093326270580292,
      "learning_rate": 0.00015215797405481704,
      "loss": 0.9737,
      "step": 3405
    },
    {
      "epoch": 0.3934010152284264,
      "grad_norm": 0.20396889746189117,
      "learning_rate": 0.00015198606899523352,
      "loss": 0.9654,
      "step": 3410
    },
    {
      "epoch": 0.3939778495616059,
      "grad_norm": 0.20018213987350464,
      "learning_rate": 0.00015181395315728554,
      "loss": 0.9835,
      "step": 3415
    },
    {
      "epoch": 0.39455468389478543,
      "grad_norm": 0.18661239743232727,
      "learning_rate": 0.00015164162723881947,
      "loss": 0.9946,
      "step": 3420
    },
    {
      "epoch": 0.39513151822796494,
      "grad_norm": 0.20598599314689636,
      "learning_rate": 0.00015146909193853363,
      "loss": 0.9495,
      "step": 3425
    },
    {
      "epoch": 0.39570835256114445,
      "grad_norm": 0.2064884752035141,
      "learning_rate": 0.0001512963479559752,
      "loss": 0.9524,
      "step": 3430
    },
    {
      "epoch": 0.39628518689432396,
      "grad_norm": 0.19914241135120392,
      "learning_rate": 0.00015112339599153746,
      "loss": 0.9643,
      "step": 3435
    },
    {
      "epoch": 0.39686202122750347,
      "grad_norm": 0.20408232510089874,
      "learning_rate": 0.00015095023674645698,
      "loss": 0.9757,
      "step": 3440
    },
    {
      "epoch": 0.397438855560683,
      "grad_norm": 0.1998940259218216,
      "learning_rate": 0.00015077687092281074,
      "loss": 0.9318,
      "step": 3445
    },
    {
      "epoch": 0.3980156898938625,
      "grad_norm": 0.20163275301456451,
      "learning_rate": 0.00015060329922351326,
      "loss": 0.9465,
      "step": 3450
    },
    {
      "epoch": 0.398592524227042,
      "grad_norm": 0.20353001356124878,
      "learning_rate": 0.0001504295223523139,
      "loss": 0.9631,
      "step": 3455
    },
    {
      "epoch": 0.3991693585602215,
      "grad_norm": 0.20348602533340454,
      "learning_rate": 0.00015025554101379379,
      "loss": 0.9685,
      "step": 3460
    },
    {
      "epoch": 0.399746192893401,
      "grad_norm": 0.19552728533744812,
      "learning_rate": 0.0001500813559133631,
      "loss": 0.9729,
      "step": 3465
    },
    {
      "epoch": 0.40032302722658053,
      "grad_norm": 0.19121238589286804,
      "learning_rate": 0.00014990696775725812,
      "loss": 0.9514,
      "step": 3470
    },
    {
      "epoch": 0.40089986155976004,
      "grad_norm": 0.18907782435417175,
      "learning_rate": 0.0001497323772525385,
      "loss": 0.9234,
      "step": 3475
    },
    {
      "epoch": 0.40147669589293955,
      "grad_norm": 0.19860269129276276,
      "learning_rate": 0.00014955758510708434,
      "loss": 0.9192,
      "step": 3480
    },
    {
      "epoch": 0.40205353022611906,
      "grad_norm": 0.19196364283561707,
      "learning_rate": 0.00014938259202959317,
      "loss": 0.9216,
      "step": 3485
    },
    {
      "epoch": 0.40263036455929857,
      "grad_norm": 0.20269255340099335,
      "learning_rate": 0.00014920739872957732,
      "loss": 0.9733,
      "step": 3490
    },
    {
      "epoch": 0.4032071988924781,
      "grad_norm": 0.20808875560760498,
      "learning_rate": 0.00014903200591736087,
      "loss": 0.9984,
      "step": 3495
    },
    {
      "epoch": 0.4037840332256576,
      "grad_norm": 0.20577386021614075,
      "learning_rate": 0.00014885641430407686,
      "loss": 0.9771,
      "step": 3500
    },
    {
      "epoch": 0.4043608675588371,
      "grad_norm": 0.20498362183570862,
      "learning_rate": 0.0001486806246016643,
      "loss": 0.9883,
      "step": 3505
    },
    {
      "epoch": 0.4049377018920166,
      "grad_norm": 0.1883656233549118,
      "learning_rate": 0.00014850463752286543,
      "loss": 0.9662,
      "step": 3510
    },
    {
      "epoch": 0.4055145362251961,
      "grad_norm": 0.18973584473133087,
      "learning_rate": 0.00014832845378122276,
      "loss": 0.9403,
      "step": 3515
    },
    {
      "epoch": 0.40609137055837563,
      "grad_norm": 0.20748434960842133,
      "learning_rate": 0.00014815207409107608,
      "loss": 0.945,
      "step": 3520
    },
    {
      "epoch": 0.40666820489155514,
      "grad_norm": 0.19455088675022125,
      "learning_rate": 0.00014797549916755975,
      "loss": 0.9646,
      "step": 3525
    },
    {
      "epoch": 0.40724503922473465,
      "grad_norm": 0.20486250519752502,
      "learning_rate": 0.0001477987297265997,
      "loss": 0.9901,
      "step": 3530
    },
    {
      "epoch": 0.40782187355791416,
      "grad_norm": 0.20417264103889465,
      "learning_rate": 0.0001476217664849105,
      "loss": 0.9385,
      "step": 3535
    },
    {
      "epoch": 0.40839870789109367,
      "grad_norm": 0.19334882497787476,
      "learning_rate": 0.00014744461015999248,
      "loss": 1.0049,
      "step": 3540
    },
    {
      "epoch": 0.4089755422242732,
      "grad_norm": 0.18688935041427612,
      "learning_rate": 0.00014726726147012889,
      "loss": 0.9574,
      "step": 3545
    },
    {
      "epoch": 0.4095523765574527,
      "grad_norm": 0.1947825849056244,
      "learning_rate": 0.00014708972113438285,
      "loss": 0.957,
      "step": 3550
    },
    {
      "epoch": 0.4101292108906322,
      "grad_norm": 0.20580242574214935,
      "learning_rate": 0.00014691198987259454,
      "loss": 0.9648,
      "step": 3555
    },
    {
      "epoch": 0.4107060452238117,
      "grad_norm": 0.1960141509771347,
      "learning_rate": 0.00014673406840537824,
      "loss": 0.9558,
      "step": 3560
    },
    {
      "epoch": 0.4112828795569912,
      "grad_norm": 0.188209667801857,
      "learning_rate": 0.00014655595745411955,
      "loss": 0.9761,
      "step": 3565
    },
    {
      "epoch": 0.41185971389017073,
      "grad_norm": 0.20472858846187592,
      "learning_rate": 0.00014637765774097206,
      "loss": 0.9597,
      "step": 3570
    },
    {
      "epoch": 0.41243654822335024,
      "grad_norm": 0.20903244614601135,
      "learning_rate": 0.000146199169988855,
      "loss": 0.9749,
      "step": 3575
    },
    {
      "epoch": 0.41301338255652975,
      "grad_norm": 0.19724762439727783,
      "learning_rate": 0.00014602049492144984,
      "loss": 0.9621,
      "step": 3580
    },
    {
      "epoch": 0.41359021688970926,
      "grad_norm": 0.190316304564476,
      "learning_rate": 0.00014584163326319754,
      "loss": 0.9804,
      "step": 3585
    },
    {
      "epoch": 0.41416705122288877,
      "grad_norm": 0.18492008745670319,
      "learning_rate": 0.00014566258573929557,
      "loss": 0.9791,
      "step": 3590
    },
    {
      "epoch": 0.4147438855560683,
      "grad_norm": 0.19851504266262054,
      "learning_rate": 0.0001454833530756951,
      "loss": 0.9497,
      "step": 3595
    },
    {
      "epoch": 0.4153207198892478,
      "grad_norm": 0.20571914315223694,
      "learning_rate": 0.0001453039359990979,
      "loss": 1.0079,
      "step": 3600
    },
    {
      "epoch": 0.4158975542224273,
      "grad_norm": 0.22042030096054077,
      "learning_rate": 0.00014512433523695332,
      "loss": 0.9887,
      "step": 3605
    },
    {
      "epoch": 0.4164743885556068,
      "grad_norm": 0.1839991807937622,
      "learning_rate": 0.0001449445515174557,
      "loss": 0.9404,
      "step": 3610
    },
    {
      "epoch": 0.4170512228887863,
      "grad_norm": 0.19041991233825684,
      "learning_rate": 0.000144764585569541,
      "loss": 0.9374,
      "step": 3615
    },
    {
      "epoch": 0.41762805722196583,
      "grad_norm": 0.19904352724552155,
      "learning_rate": 0.00014458443812288415,
      "loss": 0.9219,
      "step": 3620
    },
    {
      "epoch": 0.41820489155514534,
      "grad_norm": 0.19090399146080017,
      "learning_rate": 0.00014440410990789582,
      "loss": 0.9379,
      "step": 3625
    },
    {
      "epoch": 0.41878172588832485,
      "grad_norm": 0.18457092344760895,
      "learning_rate": 0.00014422360165571976,
      "loss": 0.9966,
      "step": 3630
    },
    {
      "epoch": 0.41935856022150436,
      "grad_norm": 0.20009112358093262,
      "learning_rate": 0.0001440429140982296,
      "loss": 0.9714,
      "step": 3635
    },
    {
      "epoch": 0.41993539455468387,
      "grad_norm": 0.19650514423847198,
      "learning_rate": 0.000143862047968026,
      "loss": 0.9502,
      "step": 3640
    },
    {
      "epoch": 0.4205122288878634,
      "grad_norm": 0.20459064841270447,
      "learning_rate": 0.00014368100399843366,
      "loss": 0.9208,
      "step": 3645
    },
    {
      "epoch": 0.4210890632210429,
      "grad_norm": 0.20869080722332,
      "learning_rate": 0.00014349978292349825,
      "loss": 0.9478,
      "step": 3650
    },
    {
      "epoch": 0.4216658975542224,
      "grad_norm": 0.21228952705860138,
      "learning_rate": 0.0001433183854779836,
      "loss": 0.957,
      "step": 3655
    },
    {
      "epoch": 0.4222427318874019,
      "grad_norm": 0.19315268099308014,
      "learning_rate": 0.00014313681239736865,
      "loss": 0.973,
      "step": 3660
    },
    {
      "epoch": 0.4228195662205814,
      "grad_norm": 0.19466190040111542,
      "learning_rate": 0.00014295506441784435,
      "loss": 1.0031,
      "step": 3665
    },
    {
      "epoch": 0.423396400553761,
      "grad_norm": 0.1872572898864746,
      "learning_rate": 0.00014277314227631086,
      "loss": 0.9398,
      "step": 3670
    },
    {
      "epoch": 0.4239732348869405,
      "grad_norm": 0.18912255764007568,
      "learning_rate": 0.00014259104671037452,
      "loss": 0.9261,
      "step": 3675
    },
    {
      "epoch": 0.42455006922012,
      "grad_norm": 0.18942441046237946,
      "learning_rate": 0.00014240877845834472,
      "loss": 0.9511,
      "step": 3680
    },
    {
      "epoch": 0.4251269035532995,
      "grad_norm": 0.2267604023218155,
      "learning_rate": 0.00014222633825923108,
      "loss": 0.9919,
      "step": 3685
    },
    {
      "epoch": 0.42570373788647903,
      "grad_norm": 0.19375835359096527,
      "learning_rate": 0.00014204372685274039,
      "loss": 0.9862,
      "step": 3690
    },
    {
      "epoch": 0.42628057221965854,
      "grad_norm": 0.19178146123886108,
      "learning_rate": 0.00014186094497927352,
      "loss": 0.9777,
      "step": 3695
    },
    {
      "epoch": 0.42685740655283805,
      "grad_norm": 0.198550745844841,
      "learning_rate": 0.00014167799337992258,
      "loss": 0.936,
      "step": 3700
    },
    {
      "epoch": 0.42743424088601756,
      "grad_norm": 0.2064967155456543,
      "learning_rate": 0.00014149487279646781,
      "loss": 0.9877,
      "step": 3705
    },
    {
      "epoch": 0.42801107521919707,
      "grad_norm": 0.20294541120529175,
      "learning_rate": 0.00014131158397137462,
      "loss": 0.9306,
      "step": 3710
    },
    {
      "epoch": 0.4285879095523766,
      "grad_norm": 0.21081644296646118,
      "learning_rate": 0.00014112812764779053,
      "loss": 0.9482,
      "step": 3715
    },
    {
      "epoch": 0.4291647438855561,
      "grad_norm": 0.20666174590587616,
      "learning_rate": 0.00014094450456954218,
      "loss": 0.9848,
      "step": 3720
    },
    {
      "epoch": 0.4297415782187356,
      "grad_norm": 0.19854900240898132,
      "learning_rate": 0.00014076071548113238,
      "loss": 0.9593,
      "step": 3725
    },
    {
      "epoch": 0.4303184125519151,
      "grad_norm": 0.2024005800485611,
      "learning_rate": 0.0001405767611277369,
      "loss": 0.9753,
      "step": 3730
    },
    {
      "epoch": 0.4308952468850946,
      "grad_norm": 0.19519449770450592,
      "learning_rate": 0.00014039264225520175,
      "loss": 0.9844,
      "step": 3735
    },
    {
      "epoch": 0.43147208121827413,
      "grad_norm": 0.19514302909374237,
      "learning_rate": 0.0001402083596100399,
      "loss": 0.991,
      "step": 3740
    },
    {
      "epoch": 0.43204891555145364,
      "grad_norm": 0.1931222528219223,
      "learning_rate": 0.00014002391393942826,
      "loss": 0.9693,
      "step": 3745
    },
    {
      "epoch": 0.43262574988463315,
      "grad_norm": 0.203064426779747,
      "learning_rate": 0.00013983930599120487,
      "loss": 0.9822,
      "step": 3750
    },
    {
      "epoch": 0.43320258421781266,
      "grad_norm": 0.19102463126182556,
      "learning_rate": 0.0001396545365138657,
      "loss": 0.9643,
      "step": 3755
    },
    {
      "epoch": 0.43377941855099217,
      "grad_norm": 0.18826338648796082,
      "learning_rate": 0.00013946960625656153,
      "loss": 0.9706,
      "step": 3760
    },
    {
      "epoch": 0.4343562528841717,
      "grad_norm": 0.20435942709445953,
      "learning_rate": 0.00013928451596909516,
      "loss": 0.9388,
      "step": 3765
    },
    {
      "epoch": 0.4349330872173512,
      "grad_norm": 0.18556763231754303,
      "learning_rate": 0.00013909926640191813,
      "loss": 0.9218,
      "step": 3770
    },
    {
      "epoch": 0.4355099215505307,
      "grad_norm": 0.1957630217075348,
      "learning_rate": 0.0001389138583061279,
      "loss": 1.0146,
      "step": 3775
    },
    {
      "epoch": 0.4360867558837102,
      "grad_norm": 0.19505439698696136,
      "learning_rate": 0.00013872829243346453,
      "loss": 1.0006,
      "step": 3780
    },
    {
      "epoch": 0.4366635902168897,
      "grad_norm": 0.20385412871837616,
      "learning_rate": 0.00013854256953630797,
      "loss": 0.997,
      "step": 3785
    },
    {
      "epoch": 0.43724042455006923,
      "grad_norm": 0.1895400434732437,
      "learning_rate": 0.00013835669036767466,
      "loss": 1.0025,
      "step": 3790
    },
    {
      "epoch": 0.43781725888324874,
      "grad_norm": 0.19983290135860443,
      "learning_rate": 0.00013817065568121477,
      "loss": 0.9643,
      "step": 3795
    },
    {
      "epoch": 0.43839409321642825,
      "grad_norm": 0.20115922391414642,
      "learning_rate": 0.00013798446623120893,
      "loss": 0.9225,
      "step": 3800
    },
    {
      "epoch": 0.43897092754960776,
      "grad_norm": 0.19573155045509338,
      "learning_rate": 0.00013779812277256537,
      "loss": 0.9408,
      "step": 3805
    },
    {
      "epoch": 0.43954776188278727,
      "grad_norm": 0.21077951788902283,
      "learning_rate": 0.0001376116260608166,
      "loss": 0.9573,
      "step": 3810
    },
    {
      "epoch": 0.4401245962159668,
      "grad_norm": 0.2025730162858963,
      "learning_rate": 0.0001374249768521166,
      "loss": 0.9674,
      "step": 3815
    },
    {
      "epoch": 0.4407014305491463,
      "grad_norm": 0.1951024830341339,
      "learning_rate": 0.0001372381759032377,
      "loss": 0.9782,
      "step": 3820
    },
    {
      "epoch": 0.4412782648823258,
      "grad_norm": 0.1892070323228836,
      "learning_rate": 0.00013705122397156727,
      "loss": 0.9297,
      "step": 3825
    },
    {
      "epoch": 0.4418550992155053,
      "grad_norm": 0.2148219794034958,
      "learning_rate": 0.00013686412181510504,
      "loss": 0.9735,
      "step": 3830
    },
    {
      "epoch": 0.4424319335486848,
      "grad_norm": 0.2071019560098648,
      "learning_rate": 0.0001366768701924598,
      "loss": 0.9767,
      "step": 3835
    },
    {
      "epoch": 0.44300876788186433,
      "grad_norm": 0.19552481174468994,
      "learning_rate": 0.0001364894698628462,
      "loss": 0.9675,
      "step": 3840
    },
    {
      "epoch": 0.44358560221504384,
      "grad_norm": 0.2387782335281372,
      "learning_rate": 0.00013630192158608202,
      "loss": 1.0083,
      "step": 3845
    },
    {
      "epoch": 0.44416243654822335,
      "grad_norm": 0.19381476938724518,
      "learning_rate": 0.00013611422612258477,
      "loss": 0.9669,
      "step": 3850
    },
    {
      "epoch": 0.44473927088140286,
      "grad_norm": 0.19321493804454803,
      "learning_rate": 0.00013592638423336875,
      "loss": 0.9885,
      "step": 3855
    },
    {
      "epoch": 0.44531610521458237,
      "grad_norm": 0.20854459702968597,
      "learning_rate": 0.00013573839668004202,
      "loss": 1.0008,
      "step": 3860
    },
    {
      "epoch": 0.4458929395477619,
      "grad_norm": 0.2074405997991562,
      "learning_rate": 0.00013555026422480313,
      "loss": 0.9222,
      "step": 3865
    },
    {
      "epoch": 0.4464697738809414,
      "grad_norm": 0.194743350148201,
      "learning_rate": 0.00013536198763043823,
      "loss": 0.9856,
      "step": 3870
    },
    {
      "epoch": 0.4470466082141209,
      "grad_norm": 0.20063389837741852,
      "learning_rate": 0.00013517356766031777,
      "loss": 1.0056,
      "step": 3875
    },
    {
      "epoch": 0.4476234425473004,
      "grad_norm": 0.1934989094734192,
      "learning_rate": 0.00013498500507839363,
      "loss": 0.9928,
      "step": 3880
    },
    {
      "epoch": 0.4482002768804799,
      "grad_norm": 0.20212046802043915,
      "learning_rate": 0.00013479630064919593,
      "loss": 0.8963,
      "step": 3885
    },
    {
      "epoch": 0.44877711121365943,
      "grad_norm": 0.21239322423934937,
      "learning_rate": 0.00013460745513782976,
      "loss": 0.9812,
      "step": 3890
    },
    {
      "epoch": 0.44935394554683894,
      "grad_norm": 0.2325180619955063,
      "learning_rate": 0.0001344184693099724,
      "loss": 0.9476,
      "step": 3895
    },
    {
      "epoch": 0.44993077988001845,
      "grad_norm": 0.19209115207195282,
      "learning_rate": 0.00013422934393186994,
      "loss": 0.9289,
      "step": 3900
    },
    {
      "epoch": 0.45050761421319796,
      "grad_norm": 0.19307377934455872,
      "learning_rate": 0.0001340400797703343,
      "loss": 0.983,
      "step": 3905
    },
    {
      "epoch": 0.45108444854637747,
      "grad_norm": 0.19258156418800354,
      "learning_rate": 0.00013385067759274014,
      "loss": 0.986,
      "step": 3910
    },
    {
      "epoch": 0.451661282879557,
      "grad_norm": 0.1946646273136139,
      "learning_rate": 0.00013366113816702164,
      "loss": 0.9962,
      "step": 3915
    },
    {
      "epoch": 0.4522381172127365,
      "grad_norm": 0.20190544426441193,
      "learning_rate": 0.0001334714622616695,
      "loss": 0.9591,
      "step": 3920
    },
    {
      "epoch": 0.452814951545916,
      "grad_norm": 0.1982085108757019,
      "learning_rate": 0.0001332816506457278,
      "loss": 0.9545,
      "step": 3925
    },
    {
      "epoch": 0.4533917858790955,
      "grad_norm": 0.18541787564754486,
      "learning_rate": 0.0001330917040887908,
      "loss": 0.9652,
      "step": 3930
    },
    {
      "epoch": 0.453968620212275,
      "grad_norm": 0.19790363311767578,
      "learning_rate": 0.00013290162336099996,
      "loss": 0.923,
      "step": 3935
    },
    {
      "epoch": 0.45454545454545453,
      "grad_norm": 0.21102024614810944,
      "learning_rate": 0.00013271140923304064,
      "loss": 0.9701,
      "step": 3940
    },
    {
      "epoch": 0.45512228887863404,
      "grad_norm": 0.1868615597486496,
      "learning_rate": 0.00013252106247613914,
      "loss": 0.9216,
      "step": 3945
    },
    {
      "epoch": 0.45569912321181355,
      "grad_norm": 0.19408555328845978,
      "learning_rate": 0.00013233058386205948,
      "loss": 0.9397,
      "step": 3950
    },
    {
      "epoch": 0.45627595754499306,
      "grad_norm": 0.19673417508602142,
      "learning_rate": 0.00013213997416310034,
      "loss": 0.9116,
      "step": 3955
    },
    {
      "epoch": 0.45685279187817257,
      "grad_norm": 0.2141309231519699,
      "learning_rate": 0.00013194923415209183,
      "loss": 0.9748,
      "step": 3960
    },
    {
      "epoch": 0.4574296262113521,
      "grad_norm": 0.19085197150707245,
      "learning_rate": 0.00013175836460239243,
      "loss": 1.0119,
      "step": 3965
    },
    {
      "epoch": 0.4580064605445316,
      "grad_norm": 0.19188842177391052,
      "learning_rate": 0.00013156736628788584,
      "loss": 0.9487,
      "step": 3970
    },
    {
      "epoch": 0.4585832948777111,
      "grad_norm": 0.19430477917194366,
      "learning_rate": 0.00013137623998297785,
      "loss": 0.9753,
      "step": 3975
    },
    {
      "epoch": 0.4591601292108906,
      "grad_norm": 0.1962517946958542,
      "learning_rate": 0.00013118498646259323,
      "loss": 0.9655,
      "step": 3980
    },
    {
      "epoch": 0.4597369635440701,
      "grad_norm": 0.20422694087028503,
      "learning_rate": 0.0001309936065021724,
      "loss": 1.0045,
      "step": 3985
    },
    {
      "epoch": 0.46031379787724963,
      "grad_norm": 0.19590309262275696,
      "learning_rate": 0.0001308021008776686,
      "loss": 0.9858,
      "step": 3990
    },
    {
      "epoch": 0.46089063221042914,
      "grad_norm": 0.19435778260231018,
      "learning_rate": 0.00013061047036554444,
      "loss": 0.9605,
      "step": 3995
    },
    {
      "epoch": 0.46146746654360865,
      "grad_norm": 0.2044801414012909,
      "learning_rate": 0.00013041871574276905,
      "loss": 0.9802,
      "step": 4000
    },
    {
      "epoch": 0.46204430087678816,
      "grad_norm": 0.21514268219470978,
      "learning_rate": 0.00013022683778681458,
      "loss": 0.9468,
      "step": 4005
    },
    {
      "epoch": 0.46262113520996767,
      "grad_norm": 0.19823044538497925,
      "learning_rate": 0.00013003483727565344,
      "loss": 0.9975,
      "step": 4010
    },
    {
      "epoch": 0.4631979695431472,
      "grad_norm": 0.1900324672460556,
      "learning_rate": 0.00012984271498775473,
      "loss": 0.9505,
      "step": 4015
    },
    {
      "epoch": 0.46377480387632675,
      "grad_norm": 0.20325466990470886,
      "learning_rate": 0.00012965047170208145,
      "loss": 0.958,
      "step": 4020
    },
    {
      "epoch": 0.46435163820950626,
      "grad_norm": 0.18988734483718872,
      "learning_rate": 0.00012945810819808715,
      "loss": 0.9684,
      "step": 4025
    },
    {
      "epoch": 0.46492847254268577,
      "grad_norm": 0.19148334860801697,
      "learning_rate": 0.00012926562525571273,
      "loss": 0.953,
      "step": 4030
    },
    {
      "epoch": 0.4655053068758653,
      "grad_norm": 0.19238923490047455,
      "learning_rate": 0.00012907302365538348,
      "loss": 0.9194,
      "step": 4035
    },
    {
      "epoch": 0.4660821412090448,
      "grad_norm": 0.1843547224998474,
      "learning_rate": 0.0001288803041780057,
      "loss": 0.9375,
      "step": 4040
    },
    {
      "epoch": 0.4666589755422243,
      "grad_norm": 0.20036543905735016,
      "learning_rate": 0.0001286874676049637,
      "loss": 0.9753,
      "step": 4045
    },
    {
      "epoch": 0.4672358098754038,
      "grad_norm": 0.19511838257312775,
      "learning_rate": 0.00012849451471811643,
      "loss": 0.9217,
      "step": 4050
    },
    {
      "epoch": 0.4678126442085833,
      "grad_norm": 0.2069857269525528,
      "learning_rate": 0.00012830144629979456,
      "loss": 0.9803,
      "step": 4055
    },
    {
      "epoch": 0.4683894785417628,
      "grad_norm": 0.19766615331172943,
      "learning_rate": 0.00012810826313279717,
      "loss": 0.9806,
      "step": 4060
    },
    {
      "epoch": 0.46896631287494234,
      "grad_norm": 0.19592691957950592,
      "learning_rate": 0.00012791496600038854,
      "loss": 0.9848,
      "step": 4065
    },
    {
      "epoch": 0.46954314720812185,
      "grad_norm": 0.19588027894496918,
      "learning_rate": 0.00012772155568629499,
      "loss": 0.9274,
      "step": 4070
    },
    {
      "epoch": 0.47011998154130136,
      "grad_norm": 0.2110724300146103,
      "learning_rate": 0.00012752803297470187,
      "loss": 0.9989,
      "step": 4075
    },
    {
      "epoch": 0.47069681587448087,
      "grad_norm": 0.2043410688638687,
      "learning_rate": 0.00012733439865025012,
      "loss": 0.9706,
      "step": 4080
    },
    {
      "epoch": 0.4712736502076604,
      "grad_norm": 0.22159966826438904,
      "learning_rate": 0.0001271406534980333,
      "loss": 0.9409,
      "step": 4085
    },
    {
      "epoch": 0.4718504845408399,
      "grad_norm": 0.19468598067760468,
      "learning_rate": 0.0001269467983035943,
      "loss": 0.9749,
      "step": 4090
    },
    {
      "epoch": 0.4724273188740194,
      "grad_norm": 0.1822662055492401,
      "learning_rate": 0.00012675283385292212,
      "loss": 0.9994,
      "step": 4095
    },
    {
      "epoch": 0.4730041532071989,
      "grad_norm": 0.19376279413700104,
      "learning_rate": 0.00012655876093244878,
      "loss": 0.9757,
      "step": 4100
    },
    {
      "epoch": 0.4735809875403784,
      "grad_norm": 0.18976148962974548,
      "learning_rate": 0.00012636458032904617,
      "loss": 1.0159,
      "step": 4105
    },
    {
      "epoch": 0.4741578218735579,
      "grad_norm": 0.20439419150352478,
      "learning_rate": 0.00012617029283002265,
      "loss": 1.0269,
      "step": 4110
    },
    {
      "epoch": 0.47473465620673744,
      "grad_norm": 0.18871116638183594,
      "learning_rate": 0.00012597589922312008,
      "loss": 0.944,
      "step": 4115
    },
    {
      "epoch": 0.47531149053991695,
      "grad_norm": 0.19103854894638062,
      "learning_rate": 0.00012578140029651053,
      "loss": 0.9384,
      "step": 4120
    },
    {
      "epoch": 0.47588832487309646,
      "grad_norm": 0.1954331398010254,
      "learning_rate": 0.00012558679683879301,
      "loss": 0.9838,
      "step": 4125
    },
    {
      "epoch": 0.47646515920627597,
      "grad_norm": 0.20008032023906708,
      "learning_rate": 0.0001253920896389905,
      "loss": 1.012,
      "step": 4130
    },
    {
      "epoch": 0.4770419935394555,
      "grad_norm": 0.18915073573589325,
      "learning_rate": 0.00012519727948654642,
      "loss": 0.965,
      "step": 4135
    },
    {
      "epoch": 0.477618827872635,
      "grad_norm": 0.19700497388839722,
      "learning_rate": 0.00012500236717132178,
      "loss": 1.0043,
      "step": 4140
    },
    {
      "epoch": 0.4781956622058145,
      "grad_norm": 0.23750852048397064,
      "learning_rate": 0.0001248073534835917,
      "loss": 0.955,
      "step": 4145
    },
    {
      "epoch": 0.478772496538994,
      "grad_norm": 0.1965513378381729,
      "learning_rate": 0.0001246122392140424,
      "loss": 0.9257,
      "step": 4150
    },
    {
      "epoch": 0.4793493308721735,
      "grad_norm": 0.20664075016975403,
      "learning_rate": 0.00012441702515376786,
      "loss": 0.9276,
      "step": 4155
    },
    {
      "epoch": 0.47992616520535303,
      "grad_norm": 0.20796158909797668,
      "learning_rate": 0.0001242217120942666,
      "loss": 0.9813,
      "step": 4160
    },
    {
      "epoch": 0.48050299953853254,
      "grad_norm": 0.19869232177734375,
      "learning_rate": 0.00012402630082743868,
      "loss": 0.9262,
      "step": 4165
    },
    {
      "epoch": 0.48107983387171205,
      "grad_norm": 0.1941554695367813,
      "learning_rate": 0.00012383079214558227,
      "loss": 0.9682,
      "step": 4170
    },
    {
      "epoch": 0.48165666820489156,
      "grad_norm": 0.19581513106822968,
      "learning_rate": 0.00012363518684139043,
      "loss": 0.952,
      "step": 4175
    },
    {
      "epoch": 0.48223350253807107,
      "grad_norm": 0.18992619216442108,
      "learning_rate": 0.00012343948570794815,
      "loss": 0.9541,
      "step": 4180
    },
    {
      "epoch": 0.4828103368712506,
      "grad_norm": 0.19012705981731415,
      "learning_rate": 0.00012324368953872883,
      "loss": 0.9461,
      "step": 4185
    },
    {
      "epoch": 0.4833871712044301,
      "grad_norm": 0.24560825526714325,
      "learning_rate": 0.00012304779912759118,
      "loss": 0.9679,
      "step": 4190
    },
    {
      "epoch": 0.4839640055376096,
      "grad_norm": 0.19769693911075592,
      "learning_rate": 0.00012285181526877615,
      "loss": 0.926,
      "step": 4195
    },
    {
      "epoch": 0.4845408398707891,
      "grad_norm": 0.20233696699142456,
      "learning_rate": 0.00012265573875690344,
      "loss": 0.9854,
      "step": 4200
    },
    {
      "epoch": 0.4851176742039686,
      "grad_norm": 0.1947474330663681,
      "learning_rate": 0.0001224595703869685,
      "loss": 0.9606,
      "step": 4205
    },
    {
      "epoch": 0.48569450853714813,
      "grad_norm": 0.1961849331855774,
      "learning_rate": 0.0001222633109543392,
      "loss": 1.0326,
      "step": 4210
    },
    {
      "epoch": 0.48627134287032764,
      "grad_norm": 0.1963188499212265,
      "learning_rate": 0.00012206696125475249,
      "loss": 0.979,
      "step": 4215
    },
    {
      "epoch": 0.48684817720350715,
      "grad_norm": 0.19063878059387207,
      "learning_rate": 0.00012187052208431158,
      "loss": 0.9483,
      "step": 4220
    },
    {
      "epoch": 0.48742501153668666,
      "grad_norm": 0.2054065316915512,
      "learning_rate": 0.0001216739942394822,
      "loss": 0.9725,
      "step": 4225
    },
    {
      "epoch": 0.48800184586986617,
      "grad_norm": 0.19815371930599213,
      "learning_rate": 0.00012147737851708973,
      "loss": 0.9445,
      "step": 4230
    },
    {
      "epoch": 0.4885786802030457,
      "grad_norm": 0.19612999260425568,
      "learning_rate": 0.00012128067571431583,
      "loss": 0.9167,
      "step": 4235
    },
    {
      "epoch": 0.4891555145362252,
      "grad_norm": 0.18682846426963806,
      "learning_rate": 0.00012108388662869519,
      "loss": 0.9596,
      "step": 4240
    },
    {
      "epoch": 0.4897323488694047,
      "grad_norm": 0.19077306985855103,
      "learning_rate": 0.0001208870120581124,
      "loss": 0.9491,
      "step": 4245
    },
    {
      "epoch": 0.4903091832025842,
      "grad_norm": 0.1911584734916687,
      "learning_rate": 0.00012069005280079862,
      "loss": 0.9399,
      "step": 4250
    },
    {
      "epoch": 0.4908860175357637,
      "grad_norm": 0.18746261298656464,
      "learning_rate": 0.00012049300965532832,
      "loss": 0.9954,
      "step": 4255
    },
    {
      "epoch": 0.49146285186894323,
      "grad_norm": 0.19653597474098206,
      "learning_rate": 0.00012029588342061621,
      "loss": 0.9635,
      "step": 4260
    },
    {
      "epoch": 0.49203968620212274,
      "grad_norm": 0.1984453648328781,
      "learning_rate": 0.00012009867489591377,
      "loss": 0.901,
      "step": 4265
    },
    {
      "epoch": 0.49261652053530225,
      "grad_norm": 0.20906962454319,
      "learning_rate": 0.00011990138488080622,
      "loss": 0.9282,
      "step": 4270
    },
    {
      "epoch": 0.49319335486848176,
      "grad_norm": 0.2029707431793213,
      "learning_rate": 0.00011970401417520913,
      "loss": 0.9731,
      "step": 4275
    },
    {
      "epoch": 0.49377018920166127,
      "grad_norm": 0.19050882756710052,
      "learning_rate": 0.00011950656357936525,
      "loss": 0.9431,
      "step": 4280
    },
    {
      "epoch": 0.4943470235348408,
      "grad_norm": 0.19275051355361938,
      "learning_rate": 0.00011930903389384123,
      "loss": 0.9576,
      "step": 4285
    },
    {
      "epoch": 0.4949238578680203,
      "grad_norm": 0.21345672011375427,
      "learning_rate": 0.00011911142591952437,
      "loss": 0.9696,
      "step": 4290
    },
    {
      "epoch": 0.4955006922011998,
      "grad_norm": 0.1972283273935318,
      "learning_rate": 0.0001189137404576195,
      "loss": 0.9669,
      "step": 4295
    },
    {
      "epoch": 0.4960775265343793,
      "grad_norm": 0.19907104969024658,
      "learning_rate": 0.00011871597830964551,
      "loss": 0.9477,
      "step": 4300
    },
    {
      "epoch": 0.4966543608675588,
      "grad_norm": 0.1972326934337616,
      "learning_rate": 0.00011851814027743223,
      "loss": 0.9962,
      "step": 4305
    },
    {
      "epoch": 0.49723119520073833,
      "grad_norm": 0.18552403151988983,
      "learning_rate": 0.00011832022716311722,
      "loss": 0.9556,
      "step": 4310
    },
    {
      "epoch": 0.49780802953391784,
      "grad_norm": 0.18974804878234863,
      "learning_rate": 0.00011812223976914243,
      "loss": 0.9446,
      "step": 4315
    },
    {
      "epoch": 0.49838486386709735,
      "grad_norm": 0.2123657763004303,
      "learning_rate": 0.00011792417889825094,
      "loss": 0.9444,
      "step": 4320
    },
    {
      "epoch": 0.49896169820027686,
      "grad_norm": 0.1999022513628006,
      "learning_rate": 0.00011772604535348382,
      "loss": 0.9767,
      "step": 4325
    },
    {
      "epoch": 0.49953853253345637,
      "grad_norm": 0.19423572719097137,
      "learning_rate": 0.00011752783993817675,
      "loss": 0.9548,
      "step": 4330
    },
    {
      "epoch": 0.5001153668666359,
      "grad_norm": 0.19109071791172028,
      "learning_rate": 0.00011732956345595682,
      "loss": 0.9343,
      "step": 4335
    },
    {
      "epoch": 0.5006922011998154,
      "grad_norm": 0.18442274630069733,
      "learning_rate": 0.00011713121671073924,
      "loss": 0.9759,
      "step": 4340
    },
    {
      "epoch": 0.501269035532995,
      "grad_norm": 0.20228290557861328,
      "learning_rate": 0.00011693280050672417,
      "loss": 0.9676,
      "step": 4345
    },
    {
      "epoch": 0.5018458698661744,
      "grad_norm": 0.19034543633460999,
      "learning_rate": 0.00011673431564839327,
      "loss": 0.87,
      "step": 4350
    },
    {
      "epoch": 0.502422704199354,
      "grad_norm": 0.2001582235097885,
      "learning_rate": 0.0001165357629405067,
      "loss": 0.9489,
      "step": 4355
    },
    {
      "epoch": 0.5029995385325334,
      "grad_norm": 0.18779706954956055,
      "learning_rate": 0.00011633714318809962,
      "loss": 0.9755,
      "step": 4360
    },
    {
      "epoch": 0.503576372865713,
      "grad_norm": 0.19904857873916626,
      "learning_rate": 0.00011613845719647909,
      "loss": 0.9586,
      "step": 4365
    },
    {
      "epoch": 0.5041532071988925,
      "grad_norm": 0.20691759884357452,
      "learning_rate": 0.00011593970577122067,
      "loss": 0.9716,
      "step": 4370
    },
    {
      "epoch": 0.504730041532072,
      "grad_norm": 0.19286218285560608,
      "learning_rate": 0.00011574088971816523,
      "loss": 0.9424,
      "step": 4375
    },
    {
      "epoch": 0.5053068758652515,
      "grad_norm": 0.1918749362230301,
      "learning_rate": 0.00011554200984341577,
      "loss": 0.9339,
      "step": 4380
    },
    {
      "epoch": 0.505883710198431,
      "grad_norm": 0.20204107463359833,
      "learning_rate": 0.00011534306695333395,
      "loss": 1.0028,
      "step": 4385
    },
    {
      "epoch": 0.5064605445316105,
      "grad_norm": 0.21450471878051758,
      "learning_rate": 0.00011514406185453692,
      "loss": 0.95,
      "step": 4390
    },
    {
      "epoch": 0.5070373788647901,
      "grad_norm": 0.22814354300498962,
      "learning_rate": 0.00011494499535389418,
      "loss": 0.9179,
      "step": 4395
    },
    {
      "epoch": 0.5076142131979695,
      "grad_norm": 0.19863393902778625,
      "learning_rate": 0.00011474586825852405,
      "loss": 0.9664,
      "step": 4400
    },
    {
      "epoch": 0.5081910475311491,
      "grad_norm": 0.21064569056034088,
      "learning_rate": 0.00011454668137579059,
      "loss": 0.9269,
      "step": 4405
    },
    {
      "epoch": 0.5087678818643285,
      "grad_norm": 0.19144834578037262,
      "learning_rate": 0.00011434743551330028,
      "loss": 0.9448,
      "step": 4410
    },
    {
      "epoch": 0.5093447161975081,
      "grad_norm": 0.19969388842582703,
      "learning_rate": 0.00011414813147889868,
      "loss": 0.9967,
      "step": 4415
    },
    {
      "epoch": 0.5099215505306876,
      "grad_norm": 0.1932876855134964,
      "learning_rate": 0.00011394877008066731,
      "loss": 0.9726,
      "step": 4420
    },
    {
      "epoch": 0.5104983848638671,
      "grad_norm": 0.2045474648475647,
      "learning_rate": 0.00011374935212692018,
      "loss": 0.9356,
      "step": 4425
    },
    {
      "epoch": 0.5110752191970466,
      "grad_norm": 0.2248217910528183,
      "learning_rate": 0.00011354987842620061,
      "loss": 0.9842,
      "step": 4430
    },
    {
      "epoch": 0.5116520535302261,
      "grad_norm": 0.18337203562259674,
      "learning_rate": 0.000113350349787278,
      "loss": 1.0268,
      "step": 4435
    },
    {
      "epoch": 0.5122288878634056,
      "grad_norm": 0.21334248781204224,
      "learning_rate": 0.00011315076701914449,
      "loss": 0.952,
      "step": 4440
    },
    {
      "epoch": 0.5128057221965852,
      "grad_norm": 0.20142047107219696,
      "learning_rate": 0.00011295113093101162,
      "loss": 0.9348,
      "step": 4445
    },
    {
      "epoch": 0.5133825565297646,
      "grad_norm": 0.19259627163410187,
      "learning_rate": 0.0001127514423323072,
      "loss": 0.9867,
      "step": 4450
    },
    {
      "epoch": 0.5139593908629442,
      "grad_norm": 0.2004094421863556,
      "learning_rate": 0.00011255170203267186,
      "loss": 0.9208,
      "step": 4455
    },
    {
      "epoch": 0.5145362251961236,
      "grad_norm": 0.18530438840389252,
      "learning_rate": 0.000112351910841956,
      "loss": 0.9743,
      "step": 4460
    },
    {
      "epoch": 0.5151130595293032,
      "grad_norm": 0.20678523182868958,
      "learning_rate": 0.00011215206957021618,
      "loss": 0.9476,
      "step": 4465
    },
    {
      "epoch": 0.5156898938624827,
      "grad_norm": 0.20912997424602509,
      "learning_rate": 0.00011195217902771212,
      "loss": 0.9338,
      "step": 4470
    },
    {
      "epoch": 0.5162667281956622,
      "grad_norm": 0.1984136551618576,
      "learning_rate": 0.0001117522400249033,
      "loss": 0.9563,
      "step": 4475
    },
    {
      "epoch": 0.5168435625288417,
      "grad_norm": 0.21053320169448853,
      "learning_rate": 0.00011155225337244562,
      "loss": 0.9753,
      "step": 4480
    },
    {
      "epoch": 0.5174203968620212,
      "grad_norm": 0.19541747868061066,
      "learning_rate": 0.00011135221988118825,
      "loss": 0.9495,
      "step": 4485
    },
    {
      "epoch": 0.5179972311952007,
      "grad_norm": 0.1899503469467163,
      "learning_rate": 0.00011115214036217026,
      "loss": 0.9259,
      "step": 4490
    },
    {
      "epoch": 0.5185740655283803,
      "grad_norm": 0.1909545511007309,
      "learning_rate": 0.0001109520156266173,
      "loss": 0.9308,
      "step": 4495
    },
    {
      "epoch": 0.5191508998615597,
      "grad_norm": 0.18733327090740204,
      "learning_rate": 0.00011075184648593838,
      "loss": 0.989,
      "step": 4500
    },
    {
      "epoch": 0.5197277341947393,
      "grad_norm": 0.1994892954826355,
      "learning_rate": 0.00011055163375172257,
      "loss": 0.9611,
      "step": 4505
    },
    {
      "epoch": 0.5203045685279187,
      "grad_norm": 0.1912652552127838,
      "learning_rate": 0.00011035137823573561,
      "loss": 0.9785,
      "step": 4510
    },
    {
      "epoch": 0.5208814028610983,
      "grad_norm": 0.20350737869739532,
      "learning_rate": 0.0001101510807499168,
      "loss": 0.9672,
      "step": 4515
    },
    {
      "epoch": 0.5214582371942778,
      "grad_norm": 0.2018175572156906,
      "learning_rate": 0.00010995074210637557,
      "loss": 0.9362,
      "step": 4520
    },
    {
      "epoch": 0.5220350715274573,
      "grad_norm": 0.20151209831237793,
      "learning_rate": 0.00010975036311738818,
      "loss": 0.9485,
      "step": 4525
    },
    {
      "epoch": 0.5226119058606368,
      "grad_norm": 0.21562981605529785,
      "learning_rate": 0.00010954994459539452,
      "loss": 0.9553,
      "step": 4530
    },
    {
      "epoch": 0.5231887401938163,
      "grad_norm": 0.19124732911586761,
      "learning_rate": 0.00010934948735299475,
      "loss": 0.9422,
      "step": 4535
    },
    {
      "epoch": 0.5237655745269958,
      "grad_norm": 0.20208267867565155,
      "learning_rate": 0.00010914899220294607,
      "loss": 0.9729,
      "step": 4540
    },
    {
      "epoch": 0.5243424088601754,
      "grad_norm": 0.20815403759479523,
      "learning_rate": 0.00010894845995815928,
      "loss": 0.98,
      "step": 4545
    },
    {
      "epoch": 0.5249192431933549,
      "grad_norm": 0.19528432190418243,
      "learning_rate": 0.00010874789143169568,
      "loss": 0.9301,
      "step": 4550
    },
    {
      "epoch": 0.5254960775265344,
      "grad_norm": 0.20784156024456024,
      "learning_rate": 0.00010854728743676362,
      "loss": 0.9553,
      "step": 4555
    },
    {
      "epoch": 0.5260729118597139,
      "grad_norm": 0.20081031322479248,
      "learning_rate": 0.00010834664878671525,
      "loss": 0.943,
      "step": 4560
    },
    {
      "epoch": 0.5266497461928934,
      "grad_norm": 0.19570979475975037,
      "learning_rate": 0.00010814597629504324,
      "loss": 0.9876,
      "step": 4565
    },
    {
      "epoch": 0.527226580526073,
      "grad_norm": 0.1984855979681015,
      "learning_rate": 0.00010794527077537755,
      "loss": 1.0065,
      "step": 4570
    },
    {
      "epoch": 0.5278034148592524,
      "grad_norm": 0.18974007666110992,
      "learning_rate": 0.00010774453304148192,
      "loss": 0.881,
      "step": 4575
    },
    {
      "epoch": 0.528380249192432,
      "grad_norm": 0.21458855271339417,
      "learning_rate": 0.00010754376390725074,
      "loss": 0.922,
      "step": 4580
    },
    {
      "epoch": 0.5289570835256114,
      "grad_norm": 0.20850373804569244,
      "learning_rate": 0.00010734296418670582,
      "loss": 0.9884,
      "step": 4585
    },
    {
      "epoch": 0.529533917858791,
      "grad_norm": 0.20355555415153503,
      "learning_rate": 0.00010714213469399283,
      "loss": 0.9743,
      "step": 4590
    },
    {
      "epoch": 0.5301107521919705,
      "grad_norm": 0.20082198083400726,
      "learning_rate": 0.00010694127624337826,
      "loss": 0.9368,
      "step": 4595
    },
    {
      "epoch": 0.53068758652515,
      "grad_norm": 0.1965116560459137,
      "learning_rate": 0.00010674038964924597,
      "loss": 0.9374,
      "step": 4600
    },
    {
      "epoch": 0.5312644208583295,
      "grad_norm": 0.1953085958957672,
      "learning_rate": 0.00010653947572609393,
      "loss": 0.9168,
      "step": 4605
    },
    {
      "epoch": 0.531841255191509,
      "grad_norm": 0.18610374629497528,
      "learning_rate": 0.0001063385352885309,
      "loss": 0.9191,
      "step": 4610
    },
    {
      "epoch": 0.5324180895246885,
      "grad_norm": 0.18412019312381744,
      "learning_rate": 0.00010613756915127319,
      "loss": 0.9549,
      "step": 4615
    },
    {
      "epoch": 0.5329949238578681,
      "grad_norm": 0.20428583025932312,
      "learning_rate": 0.00010593657812914129,
      "loss": 0.9849,
      "step": 4620
    },
    {
      "epoch": 0.5335717581910475,
      "grad_norm": 0.1892022341489792,
      "learning_rate": 0.00010573556303705652,
      "loss": 0.9892,
      "step": 4625
    },
    {
      "epoch": 0.5341485925242271,
      "grad_norm": 0.19151116907596588,
      "learning_rate": 0.00010553452469003789,
      "loss": 0.9291,
      "step": 4630
    },
    {
      "epoch": 0.5347254268574065,
      "grad_norm": 0.20716647803783417,
      "learning_rate": 0.00010533346390319867,
      "loss": 0.9391,
      "step": 4635
    },
    {
      "epoch": 0.5353022611905861,
      "grad_norm": 0.19385290145874023,
      "learning_rate": 0.00010513238149174304,
      "loss": 1.0,
      "step": 4640
    },
    {
      "epoch": 0.5358790955237656,
      "grad_norm": 0.20355677604675293,
      "learning_rate": 0.00010493127827096298,
      "loss": 0.9311,
      "step": 4645
    },
    {
      "epoch": 0.5364559298569451,
      "grad_norm": 0.2063637375831604,
      "learning_rate": 0.00010473015505623477,
      "loss": 0.9521,
      "step": 4650
    },
    {
      "epoch": 0.5370327641901246,
      "grad_norm": 0.201882466673851,
      "learning_rate": 0.00010452901266301574,
      "loss": 0.9302,
      "step": 4655
    },
    {
      "epoch": 0.5376095985233041,
      "grad_norm": 0.19823043048381805,
      "learning_rate": 0.000104327851906841,
      "loss": 1.0078,
      "step": 4660
    },
    {
      "epoch": 0.5381864328564836,
      "grad_norm": 0.19284914433956146,
      "learning_rate": 0.00010412667360332013,
      "loss": 0.9246,
      "step": 4665
    },
    {
      "epoch": 0.5387632671896632,
      "grad_norm": 0.19816389679908752,
      "learning_rate": 0.00010392547856813384,
      "loss": 0.9565,
      "step": 4670
    },
    {
      "epoch": 0.5393401015228426,
      "grad_norm": 0.2038826197385788,
      "learning_rate": 0.00010372426761703067,
      "loss": 0.9511,
      "step": 4675
    },
    {
      "epoch": 0.5399169358560222,
      "grad_norm": 0.2072780877351761,
      "learning_rate": 0.00010352304156582376,
      "loss": 0.9883,
      "step": 4680
    },
    {
      "epoch": 0.5404937701892016,
      "grad_norm": 0.2100173830986023,
      "learning_rate": 0.0001033218012303873,
      "loss": 0.9626,
      "step": 4685
    },
    {
      "epoch": 0.5410706045223812,
      "grad_norm": 0.1984483003616333,
      "learning_rate": 0.00010312054742665362,
      "loss": 0.9235,
      "step": 4690
    },
    {
      "epoch": 0.5416474388555607,
      "grad_norm": 0.19214801490306854,
      "learning_rate": 0.0001029192809706095,
      "loss": 0.9675,
      "step": 4695
    },
    {
      "epoch": 0.5422242731887402,
      "grad_norm": 0.19624173641204834,
      "learning_rate": 0.00010271800267829308,
      "loss": 0.9274,
      "step": 4700
    },
    {
      "epoch": 0.5428011075219197,
      "grad_norm": 0.19869272410869598,
      "learning_rate": 0.00010251671336579048,
      "loss": 0.9557,
      "step": 4705
    },
    {
      "epoch": 0.5433779418550992,
      "grad_norm": 0.191603422164917,
      "learning_rate": 0.00010231541384923248,
      "loss": 0.9239,
      "step": 4710
    },
    {
      "epoch": 0.5439547761882787,
      "grad_norm": 0.18842673301696777,
      "learning_rate": 0.0001021141049447913,
      "loss": 0.9652,
      "step": 4715
    },
    {
      "epoch": 0.5445316105214583,
      "grad_norm": 0.1997225433588028,
      "learning_rate": 0.00010191278746867714,
      "loss": 0.964,
      "step": 4720
    },
    {
      "epoch": 0.5451084448546377,
      "grad_norm": 0.22433891892433167,
      "learning_rate": 0.00010171146223713496,
      "loss": 0.9204,
      "step": 4725
    },
    {
      "epoch": 0.5456852791878173,
      "grad_norm": 0.20622776448726654,
      "learning_rate": 0.00010151013006644128,
      "loss": 0.9701,
      "step": 4730
    },
    {
      "epoch": 0.5462621135209967,
      "grad_norm": 0.19883517920970917,
      "learning_rate": 0.00010130879177290061,
      "loss": 0.9816,
      "step": 4735
    },
    {
      "epoch": 0.5468389478541763,
      "grad_norm": 0.1920338273048401,
      "learning_rate": 0.00010110744817284232,
      "loss": 0.9579,
      "step": 4740
    },
    {
      "epoch": 0.5474157821873558,
      "grad_norm": 0.18235036730766296,
      "learning_rate": 0.00010090610008261738,
      "loss": 0.9488,
      "step": 4745
    },
    {
      "epoch": 0.5479926165205353,
      "grad_norm": 0.2129899263381958,
      "learning_rate": 0.00010070474831859486,
      "loss": 1.0436,
      "step": 4750
    },
    {
      "epoch": 0.5485694508537148,
      "grad_norm": 0.19054663181304932,
      "learning_rate": 0.0001005033936971588,
      "loss": 0.9736,
      "step": 4755
    },
    {
      "epoch": 0.5491462851868943,
      "grad_norm": 0.199018195271492,
      "learning_rate": 0.00010030203703470477,
      "loss": 0.9589,
      "step": 4760
    },
    {
      "epoch": 0.5497231195200738,
      "grad_norm": 0.20436997711658478,
      "learning_rate": 0.00010010067914763668,
      "loss": 0.9825,
      "step": 4765
    },
    {
      "epoch": 0.5502999538532534,
      "grad_norm": 0.19828738272190094,
      "learning_rate": 9.989932085236334e-05,
      "loss": 0.9739,
      "step": 4770
    },
    {
      "epoch": 0.5508767881864328,
      "grad_norm": 0.19354400038719177,
      "learning_rate": 9.969796296529525e-05,
      "loss": 0.968,
      "step": 4775
    },
    {
      "epoch": 0.5514536225196124,
      "grad_norm": 0.2065267264842987,
      "learning_rate": 9.949660630284122e-05,
      "loss": 0.9612,
      "step": 4780
    },
    {
      "epoch": 0.5520304568527918,
      "grad_norm": 0.18790316581726074,
      "learning_rate": 9.929525168140516e-05,
      "loss": 0.9412,
      "step": 4785
    },
    {
      "epoch": 0.5526072911859714,
      "grad_norm": 0.19820088148117065,
      "learning_rate": 9.909389991738263e-05,
      "loss": 0.9354,
      "step": 4790
    },
    {
      "epoch": 0.5531841255191509,
      "grad_norm": 0.1902572065591812,
      "learning_rate": 9.889255182715769e-05,
      "loss": 0.9176,
      "step": 4795
    },
    {
      "epoch": 0.5537609598523304,
      "grad_norm": 0.18741828203201294,
      "learning_rate": 9.869120822709946e-05,
      "loss": 0.9258,
      "step": 4800
    },
    {
      "epoch": 0.5543377941855099,
      "grad_norm": 0.18927207589149475,
      "learning_rate": 9.848986993355877e-05,
      "loss": 0.9675,
      "step": 4805
    },
    {
      "epoch": 0.5549146285186894,
      "grad_norm": 0.20099502801895142,
      "learning_rate": 9.828853776286505e-05,
      "loss": 0.937,
      "step": 4810
    },
    {
      "epoch": 0.5554914628518689,
      "grad_norm": 0.19836010038852692,
      "learning_rate": 9.808721253132289e-05,
      "loss": 0.9674,
      "step": 4815
    },
    {
      "epoch": 0.5560682971850485,
      "grad_norm": 0.18979863822460175,
      "learning_rate": 9.78858950552087e-05,
      "loss": 0.9916,
      "step": 4820
    },
    {
      "epoch": 0.5566451315182279,
      "grad_norm": 0.191952183842659,
      "learning_rate": 9.768458615076751e-05,
      "loss": 0.9519,
      "step": 4825
    },
    {
      "epoch": 0.5572219658514075,
      "grad_norm": 0.19669634103775024,
      "learning_rate": 9.748328663420952e-05,
      "loss": 0.9389,
      "step": 4830
    },
    {
      "epoch": 0.5577988001845869,
      "grad_norm": 0.20401979982852936,
      "learning_rate": 9.728199732170696e-05,
      "loss": 0.9875,
      "step": 4835
    },
    {
      "epoch": 0.5583756345177665,
      "grad_norm": 0.2189038097858429,
      "learning_rate": 9.708071902939054e-05,
      "loss": 0.9388,
      "step": 4840
    },
    {
      "epoch": 0.558952468850946,
      "grad_norm": 0.2018914520740509,
      "learning_rate": 9.687945257334641e-05,
      "loss": 1.0281,
      "step": 4845
    },
    {
      "epoch": 0.5595293031841255,
      "grad_norm": 0.20172595977783203,
      "learning_rate": 9.667819876961272e-05,
      "loss": 0.9957,
      "step": 4850
    },
    {
      "epoch": 0.560106137517305,
      "grad_norm": 0.2063581645488739,
      "learning_rate": 9.647695843417628e-05,
      "loss": 0.9723,
      "step": 4855
    },
    {
      "epoch": 0.5606829718504845,
      "grad_norm": 0.2113044112920761,
      "learning_rate": 9.627573238296933e-05,
      "loss": 0.9577,
      "step": 4860
    },
    {
      "epoch": 0.561259806183664,
      "grad_norm": 0.20113973319530487,
      "learning_rate": 9.60745214318662e-05,
      "loss": 0.9239,
      "step": 4865
    },
    {
      "epoch": 0.5618366405168436,
      "grad_norm": 0.20177756249904633,
      "learning_rate": 9.58733263966799e-05,
      "loss": 1.0015,
      "step": 4870
    },
    {
      "epoch": 0.562413474850023,
      "grad_norm": 0.19315795600414276,
      "learning_rate": 9.567214809315903e-05,
      "loss": 0.9231,
      "step": 4875
    },
    {
      "epoch": 0.5629903091832026,
      "grad_norm": 0.19903239607810974,
      "learning_rate": 9.547098733698428e-05,
      "loss": 0.9608,
      "step": 4880
    },
    {
      "epoch": 0.563567143516382,
      "grad_norm": 0.19510619342327118,
      "learning_rate": 9.526984494376524e-05,
      "loss": 1.0001,
      "step": 4885
    },
    {
      "epoch": 0.5641439778495616,
      "grad_norm": 0.1909225881099701,
      "learning_rate": 9.5068721729037e-05,
      "loss": 0.9288,
      "step": 4890
    },
    {
      "epoch": 0.5647208121827412,
      "grad_norm": 0.19778591394424438,
      "learning_rate": 9.486761850825694e-05,
      "loss": 0.9509,
      "step": 4895
    },
    {
      "epoch": 0.5652976465159206,
      "grad_norm": 0.19866321980953217,
      "learning_rate": 9.466653609680137e-05,
      "loss": 0.9815,
      "step": 4900
    },
    {
      "epoch": 0.5658744808491002,
      "grad_norm": 0.19185493886470795,
      "learning_rate": 9.446547530996214e-05,
      "loss": 0.9667,
      "step": 4905
    },
    {
      "epoch": 0.5664513151822796,
      "grad_norm": 0.22153504192829132,
      "learning_rate": 9.426443696294351e-05,
      "loss": 0.9367,
      "step": 4910
    },
    {
      "epoch": 0.5670281495154592,
      "grad_norm": 0.18705500662326813,
      "learning_rate": 9.406342187085875e-05,
      "loss": 0.9668,
      "step": 4915
    },
    {
      "epoch": 0.5676049838486387,
      "grad_norm": 0.19349828362464905,
      "learning_rate": 9.386243084872682e-05,
      "loss": 0.892,
      "step": 4920
    },
    {
      "epoch": 0.5681818181818182,
      "grad_norm": 0.21577003598213196,
      "learning_rate": 9.36614647114691e-05,
      "loss": 0.9722,
      "step": 4925
    },
    {
      "epoch": 0.5687586525149977,
      "grad_norm": 0.2050502598285675,
      "learning_rate": 9.34605242739061e-05,
      "loss": 0.9489,
      "step": 4930
    },
    {
      "epoch": 0.5693354868481773,
      "grad_norm": 0.19747060537338257,
      "learning_rate": 9.325961035075405e-05,
      "loss": 0.9862,
      "step": 4935
    },
    {
      "epoch": 0.5699123211813567,
      "grad_norm": 0.2020748257637024,
      "learning_rate": 9.305872375662176e-05,
      "loss": 1.017,
      "step": 4940
    },
    {
      "epoch": 0.5704891555145363,
      "grad_norm": 0.19945184886455536,
      "learning_rate": 9.285786530600718e-05,
      "loss": 0.9749,
      "step": 4945
    },
    {
      "epoch": 0.5710659898477157,
      "grad_norm": 0.20474183559417725,
      "learning_rate": 9.26570358132942e-05,
      "loss": 0.93,
      "step": 4950
    },
    {
      "epoch": 0.5716428241808953,
      "grad_norm": 0.1998247355222702,
      "learning_rate": 9.245623609274928e-05,
      "loss": 0.9278,
      "step": 4955
    },
    {
      "epoch": 0.5722196585140747,
      "grad_norm": 0.19674921035766602,
      "learning_rate": 9.225546695851815e-05,
      "loss": 0.9899,
      "step": 4960
    },
    {
      "epoch": 0.5727964928472543,
      "grad_norm": 0.19790132343769073,
      "learning_rate": 9.20547292246225e-05,
      "loss": 0.9343,
      "step": 4965
    },
    {
      "epoch": 0.5733733271804338,
      "grad_norm": 0.20167304575443268,
      "learning_rate": 9.185402370495677e-05,
      "loss": 0.9547,
      "step": 4970
    },
    {
      "epoch": 0.5739501615136133,
      "grad_norm": 0.19420599937438965,
      "learning_rate": 9.165335121328477e-05,
      "loss": 0.9824,
      "step": 4975
    },
    {
      "epoch": 0.5745269958467928,
      "grad_norm": 0.18731547892093658,
      "learning_rate": 9.14527125632364e-05,
      "loss": 0.9304,
      "step": 4980
    },
    {
      "epoch": 0.5751038301799724,
      "grad_norm": 0.19297674298286438,
      "learning_rate": 9.125210856830433e-05,
      "loss": 0.9895,
      "step": 4985
    },
    {
      "epoch": 0.5756806645131518,
      "grad_norm": 0.19416366517543793,
      "learning_rate": 9.105154004184071e-05,
      "loss": 0.9606,
      "step": 4990
    },
    {
      "epoch": 0.5762574988463314,
      "grad_norm": 0.19022150337696075,
      "learning_rate": 9.085100779705398e-05,
      "loss": 0.9269,
      "step": 4995
    },
    {
      "epoch": 0.5768343331795108,
      "grad_norm": 0.19337041676044464,
      "learning_rate": 9.065051264700527e-05,
      "loss": 0.9502,
      "step": 5000
    },
    {
      "epoch": 0.5774111675126904,
      "grad_norm": 0.19673341512680054,
      "learning_rate": 9.045005540460552e-05,
      "loss": 0.9453,
      "step": 5005
    },
    {
      "epoch": 0.5779880018458698,
      "grad_norm": 0.19977766275405884,
      "learning_rate": 9.024963688261186e-05,
      "loss": 0.9208,
      "step": 5010
    },
    {
      "epoch": 0.5785648361790494,
      "grad_norm": 0.19135598838329315,
      "learning_rate": 9.004925789362446e-05,
      "loss": 0.961,
      "step": 5015
    },
    {
      "epoch": 0.5791416705122289,
      "grad_norm": 0.1971130669116974,
      "learning_rate": 8.984891925008321e-05,
      "loss": 0.945,
      "step": 5020
    },
    {
      "epoch": 0.5797185048454084,
      "grad_norm": 0.19552946090698242,
      "learning_rate": 8.964862176426443e-05,
      "loss": 0.9618,
      "step": 5025
    },
    {
      "epoch": 0.5802953391785879,
      "grad_norm": 0.18302129209041595,
      "learning_rate": 8.944836624827748e-05,
      "loss": 0.956,
      "step": 5030
    },
    {
      "epoch": 0.5808721735117675,
      "grad_norm": 0.2028164565563202,
      "learning_rate": 8.924815351406163e-05,
      "loss": 1.0094,
      "step": 5035
    },
    {
      "epoch": 0.5814490078449469,
      "grad_norm": 0.19891835749149323,
      "learning_rate": 8.904798437338272e-05,
      "loss": 0.9727,
      "step": 5040
    },
    {
      "epoch": 0.5820258421781265,
      "grad_norm": 0.1948157548904419,
      "learning_rate": 8.884785963782975e-05,
      "loss": 1.0068,
      "step": 5045
    },
    {
      "epoch": 0.5826026765113059,
      "grad_norm": 0.19020549952983856,
      "learning_rate": 8.864778011881175e-05,
      "loss": 0.9164,
      "step": 5050
    },
    {
      "epoch": 0.5831795108444855,
      "grad_norm": 0.20140686631202698,
      "learning_rate": 8.84477466275544e-05,
      "loss": 0.949,
      "step": 5055
    },
    {
      "epoch": 0.583756345177665,
      "grad_norm": 0.203651562333107,
      "learning_rate": 8.824775997509675e-05,
      "loss": 0.9788,
      "step": 5060
    },
    {
      "epoch": 0.5843331795108445,
      "grad_norm": 0.20070117712020874,
      "learning_rate": 8.80478209722879e-05,
      "loss": 0.9566,
      "step": 5065
    },
    {
      "epoch": 0.584910013844024,
      "grad_norm": 0.2062043994665146,
      "learning_rate": 8.784793042978384e-05,
      "loss": 0.9331,
      "step": 5070
    },
    {
      "epoch": 0.5854868481772035,
      "grad_norm": 0.19828034937381744,
      "learning_rate": 8.764808915804401e-05,
      "loss": 0.9926,
      "step": 5075
    },
    {
      "epoch": 0.586063682510383,
      "grad_norm": 0.2180069088935852,
      "learning_rate": 8.744829796732812e-05,
      "loss": 1.0008,
      "step": 5080
    },
    {
      "epoch": 0.5866405168435626,
      "grad_norm": 0.2032460868358612,
      "learning_rate": 8.724855766769282e-05,
      "loss": 1.017,
      "step": 5085
    },
    {
      "epoch": 0.587217351176742,
      "grad_norm": 0.21724148094654083,
      "learning_rate": 8.70488690689884e-05,
      "loss": 0.9026,
      "step": 5090
    },
    {
      "epoch": 0.5877941855099216,
      "grad_norm": 0.21417102217674255,
      "learning_rate": 8.684923298085555e-05,
      "loss": 0.9439,
      "step": 5095
    },
    {
      "epoch": 0.588371019843101,
      "grad_norm": 0.19951827824115753,
      "learning_rate": 8.6649650212722e-05,
      "loss": 0.9587,
      "step": 5100
    },
    {
      "epoch": 0.5889478541762806,
      "grad_norm": 0.20338566601276398,
      "learning_rate": 8.645012157379941e-05,
      "loss": 1.0392,
      "step": 5105
    },
    {
      "epoch": 0.58952468850946,
      "grad_norm": 0.18534879386425018,
      "learning_rate": 8.625064787307986e-05,
      "loss": 0.975,
      "step": 5110
    },
    {
      "epoch": 0.5901015228426396,
      "grad_norm": 0.20185095071792603,
      "learning_rate": 8.605122991933271e-05,
      "loss": 0.9446,
      "step": 5115
    },
    {
      "epoch": 0.5906783571758191,
      "grad_norm": 0.19836807250976562,
      "learning_rate": 8.585186852110134e-05,
      "loss": 0.9678,
      "step": 5120
    },
    {
      "epoch": 0.5912551915089986,
      "grad_norm": 0.20136135816574097,
      "learning_rate": 8.565256448669976e-05,
      "loss": 0.9662,
      "step": 5125
    },
    {
      "epoch": 0.5918320258421781,
      "grad_norm": 0.1838986873626709,
      "learning_rate": 8.545331862420944e-05,
      "loss": 0.9133,
      "step": 5130
    },
    {
      "epoch": 0.5924088601753577,
      "grad_norm": 0.19178135693073273,
      "learning_rate": 8.525413174147598e-05,
      "loss": 0.9451,
      "step": 5135
    },
    {
      "epoch": 0.5929856945085371,
      "grad_norm": 0.20643360912799835,
      "learning_rate": 8.505500464610584e-05,
      "loss": 0.9355,
      "step": 5140
    },
    {
      "epoch": 0.5935625288417167,
      "grad_norm": 0.20259740948677063,
      "learning_rate": 8.485593814546307e-05,
      "loss": 0.9678,
      "step": 5145
    },
    {
      "epoch": 0.5941393631748961,
      "grad_norm": 0.19384346902370453,
      "learning_rate": 8.465693304666606e-05,
      "loss": 0.966,
      "step": 5150
    },
    {
      "epoch": 0.5947161975080757,
      "grad_norm": 0.2989238202571869,
      "learning_rate": 8.445799015658427e-05,
      "loss": 0.9356,
      "step": 5155
    },
    {
      "epoch": 0.5952930318412551,
      "grad_norm": 0.212250217795372,
      "learning_rate": 8.425911028183479e-05,
      "loss": 0.9423,
      "step": 5160
    },
    {
      "epoch": 0.5958698661744347,
      "grad_norm": 0.19762969017028809,
      "learning_rate": 8.406029422877937e-05,
      "loss": 0.9727,
      "step": 5165
    },
    {
      "epoch": 0.5964467005076142,
      "grad_norm": 0.18385392427444458,
      "learning_rate": 8.386154280352094e-05,
      "loss": 0.9671,
      "step": 5170
    },
    {
      "epoch": 0.5970235348407937,
      "grad_norm": 0.21260400116443634,
      "learning_rate": 8.366285681190039e-05,
      "loss": 0.9678,
      "step": 5175
    },
    {
      "epoch": 0.5976003691739732,
      "grad_norm": 0.20906022191047668,
      "learning_rate": 8.34642370594933e-05,
      "loss": 0.9252,
      "step": 5180
    },
    {
      "epoch": 0.5981772035071528,
      "grad_norm": 0.18937109410762787,
      "learning_rate": 8.326568435160677e-05,
      "loss": 1.0374,
      "step": 5185
    },
    {
      "epoch": 0.5987540378403322,
      "grad_norm": 0.19254235923290253,
      "learning_rate": 8.306719949327588e-05,
      "loss": 0.9621,
      "step": 5190
    },
    {
      "epoch": 0.5993308721735118,
      "grad_norm": 0.1981174647808075,
      "learning_rate": 8.286878328926077e-05,
      "loss": 0.9276,
      "step": 5195
    },
    {
      "epoch": 0.5999077065066912,
      "grad_norm": 0.2028452455997467,
      "learning_rate": 8.26704365440432e-05,
      "loss": 0.9824,
      "step": 5200
    },
    {
      "epoch": 0.6004845408398708,
      "grad_norm": 0.2027309387922287,
      "learning_rate": 8.247216006182326e-05,
      "loss": 0.9204,
      "step": 5205
    },
    {
      "epoch": 0.6010613751730502,
      "grad_norm": 0.19740572571754456,
      "learning_rate": 8.227395464651618e-05,
      "loss": 0.9799,
      "step": 5210
    },
    {
      "epoch": 0.6016382095062298,
      "grad_norm": 0.19563211500644684,
      "learning_rate": 8.20758211017491e-05,
      "loss": 0.9518,
      "step": 5215
    },
    {
      "epoch": 0.6022150438394093,
      "grad_norm": 0.19438865780830383,
      "learning_rate": 8.187776023085762e-05,
      "loss": 1.0022,
      "step": 5220
    },
    {
      "epoch": 0.6027918781725888,
      "grad_norm": 0.1977875679731369,
      "learning_rate": 8.167977283688282e-05,
      "loss": 0.997,
      "step": 5225
    },
    {
      "epoch": 0.6033687125057683,
      "grad_norm": 0.22014763951301575,
      "learning_rate": 8.148185972256778e-05,
      "loss": 0.9436,
      "step": 5230
    },
    {
      "epoch": 0.6039455468389479,
      "grad_norm": 0.19123868644237518,
      "learning_rate": 8.128402169035451e-05,
      "loss": 0.971,
      "step": 5235
    },
    {
      "epoch": 0.6045223811721273,
      "grad_norm": 0.18875616788864136,
      "learning_rate": 8.108625954238051e-05,
      "loss": 0.9399,
      "step": 5240
    },
    {
      "epoch": 0.6050992155053069,
      "grad_norm": 0.20804201066493988,
      "learning_rate": 8.088857408047562e-05,
      "loss": 0.9634,
      "step": 5245
    },
    {
      "epoch": 0.6056760498384864,
      "grad_norm": 0.1881391406059265,
      "learning_rate": 8.06909661061588e-05,
      "loss": 0.9885,
      "step": 5250
    },
    {
      "epoch": 0.6062528841716659,
      "grad_norm": 0.19537098705768585,
      "learning_rate": 8.049343642063477e-05,
      "loss": 0.952,
      "step": 5255
    },
    {
      "epoch": 0.6068297185048455,
      "grad_norm": 0.18685182929039001,
      "learning_rate": 8.029598582479088e-05,
      "loss": 0.9603,
      "step": 5260
    },
    {
      "epoch": 0.6074065528380249,
      "grad_norm": 0.2082066535949707,
      "learning_rate": 8.00986151191938e-05,
      "loss": 0.8972,
      "step": 5265
    },
    {
      "epoch": 0.6079833871712045,
      "grad_norm": 0.20010879635810852,
      "learning_rate": 7.990132510408625e-05,
      "loss": 0.934,
      "step": 5270
    },
    {
      "epoch": 0.6085602215043839,
      "grad_norm": 0.19527243077754974,
      "learning_rate": 7.970411657938381e-05,
      "loss": 0.9687,
      "step": 5275
    },
    {
      "epoch": 0.6091370558375635,
      "grad_norm": 0.2064102441072464,
      "learning_rate": 7.95069903446717e-05,
      "loss": 0.9407,
      "step": 5280
    },
    {
      "epoch": 0.609713890170743,
      "grad_norm": 0.19495844841003418,
      "learning_rate": 7.930994719920142e-05,
      "loss": 0.9628,
      "step": 5285
    },
    {
      "epoch": 0.6102907245039225,
      "grad_norm": 0.21486780047416687,
      "learning_rate": 7.911298794188761e-05,
      "loss": 0.9869,
      "step": 5290
    },
    {
      "epoch": 0.610867558837102,
      "grad_norm": 0.19653169810771942,
      "learning_rate": 7.891611337130482e-05,
      "loss": 0.9998,
      "step": 5295
    },
    {
      "epoch": 0.6114443931702815,
      "grad_norm": 0.21410493552684784,
      "learning_rate": 7.871932428568418e-05,
      "loss": 0.9142,
      "step": 5300
    },
    {
      "epoch": 0.612021227503461,
      "grad_norm": 0.18754735589027405,
      "learning_rate": 7.852262148291028e-05,
      "loss": 1.0069,
      "step": 5305
    },
    {
      "epoch": 0.6125980618366406,
      "grad_norm": 0.19620005786418915,
      "learning_rate": 7.832600576051779e-05,
      "loss": 0.9078,
      "step": 5310
    },
    {
      "epoch": 0.61317489616982,
      "grad_norm": 0.20171235501766205,
      "learning_rate": 7.812947791568845e-05,
      "loss": 0.9696,
      "step": 5315
    },
    {
      "epoch": 0.6137517305029996,
      "grad_norm": 0.20567715167999268,
      "learning_rate": 7.793303874524752e-05,
      "loss": 0.9431,
      "step": 5320
    },
    {
      "epoch": 0.614328564836179,
      "grad_norm": 0.19829270243644714,
      "learning_rate": 7.773668904566085e-05,
      "loss": 0.9473,
      "step": 5325
    },
    {
      "epoch": 0.6149053991693586,
      "grad_norm": 0.1850794106721878,
      "learning_rate": 7.75404296130315e-05,
      "loss": 0.9487,
      "step": 5330
    },
    {
      "epoch": 0.6154822335025381,
      "grad_norm": 0.19462130963802338,
      "learning_rate": 7.734426124309656e-05,
      "loss": 0.9599,
      "step": 5335
    },
    {
      "epoch": 0.6160590678357176,
      "grad_norm": 0.20998431742191315,
      "learning_rate": 7.714818473122385e-05,
      "loss": 0.9605,
      "step": 5340
    },
    {
      "epoch": 0.6166359021688971,
      "grad_norm": 0.18975423276424408,
      "learning_rate": 7.695220087240885e-05,
      "loss": 0.9829,
      "step": 5345
    },
    {
      "epoch": 0.6172127365020766,
      "grad_norm": 0.19463218748569489,
      "learning_rate": 7.675631046127123e-05,
      "loss": 0.9586,
      "step": 5350
    },
    {
      "epoch": 0.6177895708352561,
      "grad_norm": 0.19710615277290344,
      "learning_rate": 7.656051429205188e-05,
      "loss": 0.9812,
      "step": 5355
    },
    {
      "epoch": 0.6183664051684357,
      "grad_norm": 0.19291462004184723,
      "learning_rate": 7.636481315860958e-05,
      "loss": 0.9615,
      "step": 5360
    },
    {
      "epoch": 0.6189432395016151,
      "grad_norm": 0.19099998474121094,
      "learning_rate": 7.616920785441777e-05,
      "loss": 0.9017,
      "step": 5365
    },
    {
      "epoch": 0.6195200738347947,
      "grad_norm": 0.1884920597076416,
      "learning_rate": 7.597369917256132e-05,
      "loss": 0.9232,
      "step": 5370
    },
    {
      "epoch": 0.6200969081679741,
      "grad_norm": 0.20266391336917877,
      "learning_rate": 7.577828790573345e-05,
      "loss": 0.973,
      "step": 5375
    },
    {
      "epoch": 0.6206737425011537,
      "grad_norm": 0.19208677113056183,
      "learning_rate": 7.55829748462322e-05,
      "loss": 0.966,
      "step": 5380
    },
    {
      "epoch": 0.6212505768343332,
      "grad_norm": 0.1974397599697113,
      "learning_rate": 7.538776078595762e-05,
      "loss": 0.9816,
      "step": 5385
    },
    {
      "epoch": 0.6218274111675127,
      "grad_norm": 0.1980104148387909,
      "learning_rate": 7.519264651640829e-05,
      "loss": 0.9531,
      "step": 5390
    },
    {
      "epoch": 0.6224042455006922,
      "grad_norm": 0.19935230910778046,
      "learning_rate": 7.499763282867823e-05,
      "loss": 0.9754,
      "step": 5395
    },
    {
      "epoch": 0.6229810798338717,
      "grad_norm": 0.19416543841362,
      "learning_rate": 7.480272051345358e-05,
      "loss": 0.9571,
      "step": 5400
    },
    {
      "epoch": 0.6235579141670512,
      "grad_norm": 0.21511389315128326,
      "learning_rate": 7.460791036100952e-05,
      "loss": 0.9454,
      "step": 5405
    },
    {
      "epoch": 0.6241347485002308,
      "grad_norm": 0.1961875706911087,
      "learning_rate": 7.4413203161207e-05,
      "loss": 0.9851,
      "step": 5410
    },
    {
      "epoch": 0.6247115828334102,
      "grad_norm": 0.20345531404018402,
      "learning_rate": 7.421859970348949e-05,
      "loss": 0.9334,
      "step": 5415
    },
    {
      "epoch": 0.6252884171665898,
      "grad_norm": 0.18896692991256714,
      "learning_rate": 7.402410077687993e-05,
      "loss": 0.9288,
      "step": 5420
    },
    {
      "epoch": 0.6258652514997692,
      "grad_norm": 0.21268202364444733,
      "learning_rate": 7.382970716997736e-05,
      "loss": 0.9789,
      "step": 5425
    },
    {
      "epoch": 0.6264420858329488,
      "grad_norm": 0.20382317900657654,
      "learning_rate": 7.363541967095387e-05,
      "loss": 0.9449,
      "step": 5430
    },
    {
      "epoch": 0.6270189201661283,
      "grad_norm": 0.1883535534143448,
      "learning_rate": 7.344123906755124e-05,
      "loss": 0.9609,
      "step": 5435
    },
    {
      "epoch": 0.6275957544993078,
      "grad_norm": 0.2033308893442154,
      "learning_rate": 7.324716614707793e-05,
      "loss": 0.9712,
      "step": 5440
    },
    {
      "epoch": 0.6281725888324873,
      "grad_norm": 0.1854039430618286,
      "learning_rate": 7.305320169640575e-05,
      "loss": 0.9199,
      "step": 5445
    },
    {
      "epoch": 0.6287494231656668,
      "grad_norm": 0.19366517663002014,
      "learning_rate": 7.285934650196672e-05,
      "loss": 0.9421,
      "step": 5450
    },
    {
      "epoch": 0.6293262574988463,
      "grad_norm": 0.1920800656080246,
      "learning_rate": 7.266560134974989e-05,
      "loss": 0.9357,
      "step": 5455
    },
    {
      "epoch": 0.6299030918320259,
      "grad_norm": 0.1931942254304886,
      "learning_rate": 7.247196702529815e-05,
      "loss": 0.9787,
      "step": 5460
    },
    {
      "epoch": 0.6304799261652053,
      "grad_norm": 0.20151135325431824,
      "learning_rate": 7.227844431370502e-05,
      "loss": 1.0103,
      "step": 5465
    },
    {
      "epoch": 0.6310567604983849,
      "grad_norm": 0.19090351462364197,
      "learning_rate": 7.208503399961149e-05,
      "loss": 0.9166,
      "step": 5470
    },
    {
      "epoch": 0.6316335948315643,
      "grad_norm": 0.19098447263240814,
      "learning_rate": 7.189173686720287e-05,
      "loss": 0.9626,
      "step": 5475
    },
    {
      "epoch": 0.6322104291647439,
      "grad_norm": 0.20782425999641418,
      "learning_rate": 7.169855370020547e-05,
      "loss": 1.0002,
      "step": 5480
    },
    {
      "epoch": 0.6327872634979234,
      "grad_norm": 0.20794013142585754,
      "learning_rate": 7.15054852818836e-05,
      "loss": 0.9396,
      "step": 5485
    },
    {
      "epoch": 0.6333640978311029,
      "grad_norm": 0.2069510966539383,
      "learning_rate": 7.131253239503635e-05,
      "loss": 0.9651,
      "step": 5490
    },
    {
      "epoch": 0.6339409321642824,
      "grad_norm": 0.19113782048225403,
      "learning_rate": 7.111969582199431e-05,
      "loss": 0.9243,
      "step": 5495
    },
    {
      "epoch": 0.6345177664974619,
      "grad_norm": 0.19338031113147736,
      "learning_rate": 7.092697634461654e-05,
      "loss": 0.9379,
      "step": 5500
    },
    {
      "epoch": 0.6350946008306414,
      "grad_norm": 0.214319109916687,
      "learning_rate": 7.073437474428732e-05,
      "loss": 0.99,
      "step": 5505
    },
    {
      "epoch": 0.635671435163821,
      "grad_norm": 0.19014938175678253,
      "learning_rate": 7.05418918019129e-05,
      "loss": 0.9966,
      "step": 5510
    },
    {
      "epoch": 0.6362482694970004,
      "grad_norm": 0.20483070611953735,
      "learning_rate": 7.034952829791858e-05,
      "loss": 0.9126,
      "step": 5515
    },
    {
      "epoch": 0.63682510383018,
      "grad_norm": 0.19414497911930084,
      "learning_rate": 7.01572850122453e-05,
      "loss": 0.959,
      "step": 5520
    },
    {
      "epoch": 0.6374019381633594,
      "grad_norm": 0.20513789355754852,
      "learning_rate": 6.996516272434658e-05,
      "loss": 0.9301,
      "step": 5525
    },
    {
      "epoch": 0.637978772496539,
      "grad_norm": 0.19759726524353027,
      "learning_rate": 6.97731622131854e-05,
      "loss": 0.9727,
      "step": 5530
    },
    {
      "epoch": 0.6385556068297185,
      "grad_norm": 0.20151971280574799,
      "learning_rate": 6.9581284257231e-05,
      "loss": 0.9625,
      "step": 5535
    },
    {
      "epoch": 0.639132441162898,
      "grad_norm": 0.2085186392068863,
      "learning_rate": 6.938952963445559e-05,
      "loss": 0.9663,
      "step": 5540
    },
    {
      "epoch": 0.6397092754960775,
      "grad_norm": 0.19066348671913147,
      "learning_rate": 6.919789912233146e-05,
      "loss": 0.9397,
      "step": 5545
    },
    {
      "epoch": 0.640286109829257,
      "grad_norm": 0.2034011334180832,
      "learning_rate": 6.900639349782762e-05,
      "loss": 0.975,
      "step": 5550
    },
    {
      "epoch": 0.6408629441624365,
      "grad_norm": 0.19911380112171173,
      "learning_rate": 6.88150135374068e-05,
      "loss": 1.0096,
      "step": 5555
    },
    {
      "epoch": 0.6414397784956161,
      "grad_norm": 0.19491539895534515,
      "learning_rate": 6.862376001702213e-05,
      "loss": 0.9999,
      "step": 5560
    },
    {
      "epoch": 0.6420166128287955,
      "grad_norm": 0.18677499890327454,
      "learning_rate": 6.843263371211414e-05,
      "loss": 0.9005,
      "step": 5565
    },
    {
      "epoch": 0.6425934471619751,
      "grad_norm": 0.19133366644382477,
      "learning_rate": 6.824163539760759e-05,
      "loss": 0.9202,
      "step": 5570
    },
    {
      "epoch": 0.6431702814951545,
      "grad_norm": 0.19170129299163818,
      "learning_rate": 6.805076584790818e-05,
      "loss": 0.955,
      "step": 5575
    },
    {
      "epoch": 0.6437471158283341,
      "grad_norm": 0.18898151814937592,
      "learning_rate": 6.786002583689968e-05,
      "loss": 0.9515,
      "step": 5580
    },
    {
      "epoch": 0.6443239501615136,
      "grad_norm": 0.20018716156482697,
      "learning_rate": 6.766941613794053e-05,
      "loss": 0.9429,
      "step": 5585
    },
    {
      "epoch": 0.6449007844946931,
      "grad_norm": 0.20243898034095764,
      "learning_rate": 6.747893752386088e-05,
      "loss": 0.9879,
      "step": 5590
    },
    {
      "epoch": 0.6454776188278727,
      "grad_norm": 0.18688704073429108,
      "learning_rate": 6.728859076695938e-05,
      "loss": 0.9039,
      "step": 5595
    },
    {
      "epoch": 0.6460544531610521,
      "grad_norm": 0.2041657567024231,
      "learning_rate": 6.709837663900007e-05,
      "loss": 0.9449,
      "step": 5600
    },
    {
      "epoch": 0.6466312874942317,
      "grad_norm": 0.17731288075447083,
      "learning_rate": 6.690829591120922e-05,
      "loss": 0.8981,
      "step": 5605
    },
    {
      "epoch": 0.6472081218274112,
      "grad_norm": 0.19162966310977936,
      "learning_rate": 6.671834935427222e-05,
      "loss": 0.9003,
      "step": 5610
    },
    {
      "epoch": 0.6477849561605907,
      "grad_norm": 0.19198764860630035,
      "learning_rate": 6.652853773833052e-05,
      "loss": 0.9338,
      "step": 5615
    },
    {
      "epoch": 0.6483617904937702,
      "grad_norm": 0.1949075311422348,
      "learning_rate": 6.633886183297838e-05,
      "loss": 0.9595,
      "step": 5620
    },
    {
      "epoch": 0.6489386248269498,
      "grad_norm": 0.18270482122898102,
      "learning_rate": 6.614932240725989e-05,
      "loss": 0.9107,
      "step": 5625
    },
    {
      "epoch": 0.6495154591601292,
      "grad_norm": 0.19540977478027344,
      "learning_rate": 6.595992022966571e-05,
      "loss": 0.9186,
      "step": 5630
    },
    {
      "epoch": 0.6500922934933088,
      "grad_norm": 0.19692561030387878,
      "learning_rate": 6.577065606813011e-05,
      "loss": 0.9674,
      "step": 5635
    },
    {
      "epoch": 0.6506691278264882,
      "grad_norm": 0.19444316625595093,
      "learning_rate": 6.558153069002764e-05,
      "loss": 0.998,
      "step": 5640
    },
    {
      "epoch": 0.6512459621596678,
      "grad_norm": 0.19267070293426514,
      "learning_rate": 6.539254486217026e-05,
      "loss": 0.9694,
      "step": 5645
    },
    {
      "epoch": 0.6518227964928472,
      "grad_norm": 0.1884683221578598,
      "learning_rate": 6.520369935080411e-05,
      "loss": 0.9626,
      "step": 5650
    },
    {
      "epoch": 0.6523996308260268,
      "grad_norm": 0.19955401122570038,
      "learning_rate": 6.501499492160636e-05,
      "loss": 0.9644,
      "step": 5655
    },
    {
      "epoch": 0.6529764651592063,
      "grad_norm": 0.18771077692508698,
      "learning_rate": 6.482643233968224e-05,
      "loss": 0.9485,
      "step": 5660
    },
    {
      "epoch": 0.6535532994923858,
      "grad_norm": 0.2017538845539093,
      "learning_rate": 6.463801236956184e-05,
      "loss": 0.9341,
      "step": 5665
    },
    {
      "epoch": 0.6541301338255653,
      "grad_norm": 0.2100268006324768,
      "learning_rate": 6.44497357751969e-05,
      "loss": 0.9625,
      "step": 5670
    },
    {
      "epoch": 0.6547069681587449,
      "grad_norm": 0.1906213015317917,
      "learning_rate": 6.426160331995801e-05,
      "loss": 0.9099,
      "step": 5675
    },
    {
      "epoch": 0.6552838024919243,
      "grad_norm": 0.18438196182250977,
      "learning_rate": 6.407361576663124e-05,
      "loss": 0.8879,
      "step": 5680
    },
    {
      "epoch": 0.6558606368251039,
      "grad_norm": 0.18689702451229095,
      "learning_rate": 6.388577387741524e-05,
      "loss": 0.9613,
      "step": 5685
    },
    {
      "epoch": 0.6564374711582833,
      "grad_norm": 0.19837065041065216,
      "learning_rate": 6.369807841391798e-05,
      "loss": 0.9303,
      "step": 5690
    },
    {
      "epoch": 0.6570143054914629,
      "grad_norm": 0.19491511583328247,
      "learning_rate": 6.351053013715383e-05,
      "loss": 0.9777,
      "step": 5695
    },
    {
      "epoch": 0.6575911398246423,
      "grad_norm": 0.19005633890628815,
      "learning_rate": 6.332312980754025e-05,
      "loss": 0.9305,
      "step": 5700
    },
    {
      "epoch": 0.6581679741578219,
      "grad_norm": 0.19769403338432312,
      "learning_rate": 6.313587818489497e-05,
      "loss": 0.9505,
      "step": 5705
    },
    {
      "epoch": 0.6587448084910014,
      "grad_norm": 0.22214584052562714,
      "learning_rate": 6.294877602843275e-05,
      "loss": 0.9718,
      "step": 5710
    },
    {
      "epoch": 0.6593216428241809,
      "grad_norm": 0.18468452990055084,
      "learning_rate": 6.276182409676234e-05,
      "loss": 0.9605,
      "step": 5715
    },
    {
      "epoch": 0.6598984771573604,
      "grad_norm": 0.19918876886367798,
      "learning_rate": 6.25750231478834e-05,
      "loss": 0.9539,
      "step": 5720
    },
    {
      "epoch": 0.66047531149054,
      "grad_norm": 0.18475128710269928,
      "learning_rate": 6.238837393918341e-05,
      "loss": 0.9419,
      "step": 5725
    },
    {
      "epoch": 0.6610521458237194,
      "grad_norm": 0.20297956466674805,
      "learning_rate": 6.220187722743466e-05,
      "loss": 0.96,
      "step": 5730
    },
    {
      "epoch": 0.661628980156899,
      "grad_norm": 0.19551438093185425,
      "learning_rate": 6.201553376879108e-05,
      "loss": 0.95,
      "step": 5735
    },
    {
      "epoch": 0.6622058144900784,
      "grad_norm": 0.20556505024433136,
      "learning_rate": 6.182934431878526e-05,
      "loss": 0.9811,
      "step": 5740
    },
    {
      "epoch": 0.662782648823258,
      "grad_norm": 0.19358351826667786,
      "learning_rate": 6.164330963232535e-05,
      "loss": 0.9539,
      "step": 5745
    },
    {
      "epoch": 0.6633594831564374,
      "grad_norm": 0.18913935124874115,
      "learning_rate": 6.145743046369205e-05,
      "loss": 0.9677,
      "step": 5750
    },
    {
      "epoch": 0.663936317489617,
      "grad_norm": 0.1924053579568863,
      "learning_rate": 6.127170756653546e-05,
      "loss": 0.9302,
      "step": 5755
    },
    {
      "epoch": 0.6645131518227965,
      "grad_norm": 0.1916627734899521,
      "learning_rate": 6.108614169387215e-05,
      "loss": 0.9588,
      "step": 5760
    },
    {
      "epoch": 0.665089986155976,
      "grad_norm": 0.21800926327705383,
      "learning_rate": 6.090073359808188e-05,
      "loss": 0.9329,
      "step": 5765
    },
    {
      "epoch": 0.6656668204891555,
      "grad_norm": 0.19843755662441254,
      "learning_rate": 6.071548403090488e-05,
      "loss": 0.9722,
      "step": 5770
    },
    {
      "epoch": 0.666243654822335,
      "grad_norm": 0.1899397373199463,
      "learning_rate": 6.053039374343849e-05,
      "loss": 0.9167,
      "step": 5775
    },
    {
      "epoch": 0.6668204891555145,
      "grad_norm": 0.19542363286018372,
      "learning_rate": 6.0345463486134325e-05,
      "loss": 0.9741,
      "step": 5780
    },
    {
      "epoch": 0.6673973234886941,
      "grad_norm": 0.1900932788848877,
      "learning_rate": 6.0160694008795114e-05,
      "loss": 0.9137,
      "step": 5785
    },
    {
      "epoch": 0.6679741578218735,
      "grad_norm": 0.192066490650177,
      "learning_rate": 5.9976086060571765e-05,
      "loss": 0.947,
      "step": 5790
    },
    {
      "epoch": 0.6685509921550531,
      "grad_norm": 0.1989891529083252,
      "learning_rate": 5.979164038996015e-05,
      "loss": 0.9692,
      "step": 5795
    },
    {
      "epoch": 0.6691278264882325,
      "grad_norm": 0.18487831950187683,
      "learning_rate": 5.960735774479826e-05,
      "loss": 0.9288,
      "step": 5800
    },
    {
      "epoch": 0.6697046608214121,
      "grad_norm": 0.18360459804534912,
      "learning_rate": 5.942323887226311e-05,
      "loss": 0.966,
      "step": 5805
    },
    {
      "epoch": 0.6702814951545916,
      "grad_norm": 0.1812918782234192,
      "learning_rate": 5.923928451886767e-05,
      "loss": 0.907,
      "step": 5810
    },
    {
      "epoch": 0.6708583294877711,
      "grad_norm": 0.19430682063102722,
      "learning_rate": 5.905549543045783e-05,
      "loss": 0.962,
      "step": 5815
    },
    {
      "epoch": 0.6714351638209506,
      "grad_norm": 0.1931554228067398,
      "learning_rate": 5.887187235220948e-05,
      "loss": 0.957,
      "step": 5820
    },
    {
      "epoch": 0.6720119981541302,
      "grad_norm": 0.19421172142028809,
      "learning_rate": 5.868841602862541e-05,
      "loss": 0.9318,
      "step": 5825
    },
    {
      "epoch": 0.6725888324873096,
      "grad_norm": 0.18080562353134155,
      "learning_rate": 5.8505127203532216e-05,
      "loss": 0.9359,
      "step": 5830
    },
    {
      "epoch": 0.6731656668204892,
      "grad_norm": 0.19878889620304108,
      "learning_rate": 5.8322006620077426e-05,
      "loss": 0.9495,
      "step": 5835
    },
    {
      "epoch": 0.6737425011536686,
      "grad_norm": 0.18784356117248535,
      "learning_rate": 5.8139055020726494e-05,
      "loss": 0.9684,
      "step": 5840
    },
    {
      "epoch": 0.6743193354868482,
      "grad_norm": 0.17742076516151428,
      "learning_rate": 5.7956273147259645e-05,
      "loss": 0.9648,
      "step": 5845
    },
    {
      "epoch": 0.6748961698200276,
      "grad_norm": 0.19306622445583344,
      "learning_rate": 5.77736617407689e-05,
      "loss": 0.9388,
      "step": 5850
    },
    {
      "epoch": 0.6754730041532072,
      "grad_norm": 0.20553186535835266,
      "learning_rate": 5.7591221541655285e-05,
      "loss": 0.9764,
      "step": 5855
    },
    {
      "epoch": 0.6760498384863867,
      "grad_norm": 0.20302440226078033,
      "learning_rate": 5.74089532896255e-05,
      "loss": 0.9554,
      "step": 5860
    },
    {
      "epoch": 0.6766266728195662,
      "grad_norm": 0.19851718842983246,
      "learning_rate": 5.722685772368912e-05,
      "loss": 0.9692,
      "step": 5865
    },
    {
      "epoch": 0.6772035071527457,
      "grad_norm": 0.20243926346302032,
      "learning_rate": 5.704493558215567e-05,
      "loss": 0.967,
      "step": 5870
    },
    {
      "epoch": 0.6777803414859253,
      "grad_norm": 0.19658678770065308,
      "learning_rate": 5.6863187602631354e-05,
      "loss": 0.9167,
      "step": 5875
    },
    {
      "epoch": 0.6783571758191047,
      "grad_norm": 0.18363118171691895,
      "learning_rate": 5.668161452201639e-05,
      "loss": 0.9346,
      "step": 5880
    },
    {
      "epoch": 0.6789340101522843,
      "grad_norm": 0.2019900381565094,
      "learning_rate": 5.650021707650173e-05,
      "loss": 0.9602,
      "step": 5885
    },
    {
      "epoch": 0.6795108444854637,
      "grad_norm": 0.18240347504615784,
      "learning_rate": 5.6318996001566384e-05,
      "loss": 0.9663,
      "step": 5890
    },
    {
      "epoch": 0.6800876788186433,
      "grad_norm": 0.19030514359474182,
      "learning_rate": 5.613795203197401e-05,
      "loss": 0.9382,
      "step": 5895
    },
    {
      "epoch": 0.6806645131518227,
      "grad_norm": 0.18463625013828278,
      "learning_rate": 5.5957085901770424e-05,
      "loss": 0.9487,
      "step": 5900
    },
    {
      "epoch": 0.6812413474850023,
      "grad_norm": 0.18482211232185364,
      "learning_rate": 5.577639834428026e-05,
      "loss": 0.96,
      "step": 5905
    },
    {
      "epoch": 0.6818181818181818,
      "grad_norm": 0.19854898750782013,
      "learning_rate": 5.559589009210421e-05,
      "loss": 0.9632,
      "step": 5910
    },
    {
      "epoch": 0.6823950161513613,
      "grad_norm": 0.19982431828975677,
      "learning_rate": 5.5415561877115876e-05,
      "loss": 0.9312,
      "step": 5915
    },
    {
      "epoch": 0.6829718504845408,
      "grad_norm": 0.19143183529376984,
      "learning_rate": 5.523541443045904e-05,
      "loss": 0.9736,
      "step": 5920
    },
    {
      "epoch": 0.6835486848177204,
      "grad_norm": 0.20476052165031433,
      "learning_rate": 5.505544848254432e-05,
      "loss": 0.9658,
      "step": 5925
    },
    {
      "epoch": 0.6841255191508998,
      "grad_norm": 0.19165083765983582,
      "learning_rate": 5.4875664763046705e-05,
      "loss": 0.9917,
      "step": 5930
    },
    {
      "epoch": 0.6847023534840794,
      "grad_norm": 0.19160091876983643,
      "learning_rate": 5.4696064000902146e-05,
      "loss": 0.952,
      "step": 5935
    },
    {
      "epoch": 0.6852791878172588,
      "grad_norm": 0.22697453200817108,
      "learning_rate": 5.451664692430493e-05,
      "loss": 0.959,
      "step": 5940
    },
    {
      "epoch": 0.6858560221504384,
      "grad_norm": 0.21347078680992126,
      "learning_rate": 5.433741426070442e-05,
      "loss": 0.9489,
      "step": 5945
    },
    {
      "epoch": 0.686432856483618,
      "grad_norm": 0.19163408875465393,
      "learning_rate": 5.415836673680253e-05,
      "loss": 0.9566,
      "step": 5950
    },
    {
      "epoch": 0.6870096908167974,
      "grad_norm": 0.19686076045036316,
      "learning_rate": 5.3979505078550184e-05,
      "loss": 0.9389,
      "step": 5955
    },
    {
      "epoch": 0.687586525149977,
      "grad_norm": 0.1912279576063156,
      "learning_rate": 5.380083001114503e-05,
      "loss": 0.9249,
      "step": 5960
    },
    {
      "epoch": 0.6881633594831564,
      "grad_norm": 0.20188121497631073,
      "learning_rate": 5.362234225902794e-05,
      "loss": 0.9683,
      "step": 5965
    },
    {
      "epoch": 0.688740193816336,
      "grad_norm": 0.20273169875144958,
      "learning_rate": 5.3444042545880514e-05,
      "loss": 0.9125,
      "step": 5970
    },
    {
      "epoch": 0.6893170281495155,
      "grad_norm": 0.2052476704120636,
      "learning_rate": 5.3265931594621756e-05,
      "loss": 0.9327,
      "step": 5975
    },
    {
      "epoch": 0.689893862482695,
      "grad_norm": 0.19370770454406738,
      "learning_rate": 5.3088010127405496e-05,
      "loss": 0.9664,
      "step": 5980
    },
    {
      "epoch": 0.6904706968158745,
      "grad_norm": 0.19764676690101624,
      "learning_rate": 5.29102788656172e-05,
      "loss": 0.9241,
      "step": 5985
    },
    {
      "epoch": 0.691047531149054,
      "grad_norm": 0.2198079228401184,
      "learning_rate": 5.273273852987113e-05,
      "loss": 0.9722,
      "step": 5990
    },
    {
      "epoch": 0.6916243654822335,
      "grad_norm": 0.1917632520198822,
      "learning_rate": 5.255538984000753e-05,
      "loss": 0.9572,
      "step": 5995
    },
    {
      "epoch": 0.6922011998154131,
      "grad_norm": 0.19673052430152893,
      "learning_rate": 5.237823351508953e-05,
      "loss": 0.9546,
      "step": 6000
    },
    {
      "epoch": 0.6927780341485925,
      "grad_norm": 0.18753381073474884,
      "learning_rate": 5.2201270273400296e-05,
      "loss": 0.9448,
      "step": 6005
    },
    {
      "epoch": 0.6933548684817721,
      "grad_norm": 0.18277138471603394,
      "learning_rate": 5.202450083244026e-05,
      "loss": 0.9748,
      "step": 6010
    },
    {
      "epoch": 0.6939317028149515,
      "grad_norm": 0.1970137506723404,
      "learning_rate": 5.184792590892397e-05,
      "loss": 0.9961,
      "step": 6015
    },
    {
      "epoch": 0.6945085371481311,
      "grad_norm": 0.19395023584365845,
      "learning_rate": 5.167154621877728e-05,
      "loss": 0.9396,
      "step": 6020
    },
    {
      "epoch": 0.6950853714813106,
      "grad_norm": 0.1935676783323288,
      "learning_rate": 5.14953624771346e-05,
      "loss": 0.9277,
      "step": 6025
    },
    {
      "epoch": 0.6956622058144901,
      "grad_norm": 0.21159899234771729,
      "learning_rate": 5.131937539833571e-05,
      "loss": 0.9511,
      "step": 6030
    },
    {
      "epoch": 0.6962390401476696,
      "grad_norm": 0.1984950304031372,
      "learning_rate": 5.1143585695923166e-05,
      "loss": 0.9859,
      "step": 6035
    },
    {
      "epoch": 0.6968158744808491,
      "grad_norm": 0.19086718559265137,
      "learning_rate": 5.09679940826391e-05,
      "loss": 0.9942,
      "step": 6040
    },
    {
      "epoch": 0.6973927088140286,
      "grad_norm": 0.197454035282135,
      "learning_rate": 5.079260127042267e-05,
      "loss": 0.9919,
      "step": 6045
    },
    {
      "epoch": 0.6979695431472082,
      "grad_norm": 0.20797502994537354,
      "learning_rate": 5.061740797040684e-05,
      "loss": 0.9213,
      "step": 6050
    },
    {
      "epoch": 0.6985463774803876,
      "grad_norm": 0.18882066011428833,
      "learning_rate": 5.044241489291569e-05,
      "loss": 0.9691,
      "step": 6055
    },
    {
      "epoch": 0.6991232118135672,
      "grad_norm": 0.19341005384922028,
      "learning_rate": 5.0267622747461487e-05,
      "loss": 0.9267,
      "step": 6060
    },
    {
      "epoch": 0.6997000461467466,
      "grad_norm": 0.20529086887836456,
      "learning_rate": 5.009303224274191e-05,
      "loss": 0.9937,
      "step": 6065
    },
    {
      "epoch": 0.7002768804799262,
      "grad_norm": 0.1887139081954956,
      "learning_rate": 4.991864408663692e-05,
      "loss": 0.9477,
      "step": 6070
    },
    {
      "epoch": 0.7008537148131057,
      "grad_norm": 0.19098982214927673,
      "learning_rate": 4.974445898620622e-05,
      "loss": 0.9689,
      "step": 6075
    },
    {
      "epoch": 0.7014305491462852,
      "grad_norm": 0.19821666181087494,
      "learning_rate": 4.957047764768612e-05,
      "loss": 0.9333,
      "step": 6080
    },
    {
      "epoch": 0.7020073834794647,
      "grad_norm": 0.1881197690963745,
      "learning_rate": 4.939670077648676e-05,
      "loss": 0.9504,
      "step": 6085
    },
    {
      "epoch": 0.7025842178126442,
      "grad_norm": 0.19426091015338898,
      "learning_rate": 4.922312907718929e-05,
      "loss": 1.0074,
      "step": 6090
    },
    {
      "epoch": 0.7031610521458237,
      "grad_norm": 0.18885859847068787,
      "learning_rate": 4.9049763253543054e-05,
      "loss": 0.9706,
      "step": 6095
    },
    {
      "epoch": 0.7037378864790033,
      "grad_norm": 0.20289480686187744,
      "learning_rate": 4.8876604008462554e-05,
      "loss": 0.9547,
      "step": 6100
    },
    {
      "epoch": 0.7043147208121827,
      "grad_norm": 0.2023826539516449,
      "learning_rate": 4.870365204402483e-05,
      "loss": 1.005,
      "step": 6105
    },
    {
      "epoch": 0.7048915551453623,
      "grad_norm": 0.18636111915111542,
      "learning_rate": 4.8530908061466404e-05,
      "loss": 0.9311,
      "step": 6110
    },
    {
      "epoch": 0.7054683894785417,
      "grad_norm": 0.18160183727741241,
      "learning_rate": 4.835837276118058e-05,
      "loss": 0.9579,
      "step": 6115
    },
    {
      "epoch": 0.7060452238117213,
      "grad_norm": 0.19551381468772888,
      "learning_rate": 4.8186046842714504e-05,
      "loss": 0.8944,
      "step": 6120
    },
    {
      "epoch": 0.7066220581449008,
      "grad_norm": 0.18871080875396729,
      "learning_rate": 4.801393100476651e-05,
      "loss": 1.0176,
      "step": 6125
    },
    {
      "epoch": 0.7071988924780803,
      "grad_norm": 0.1864621639251709,
      "learning_rate": 4.784202594518298e-05,
      "loss": 0.945,
      "step": 6130
    },
    {
      "epoch": 0.7077757268112598,
      "grad_norm": 0.1970556527376175,
      "learning_rate": 4.767033236095585e-05,
      "loss": 0.9587,
      "step": 6135
    },
    {
      "epoch": 0.7083525611444393,
      "grad_norm": 0.19122757017612457,
      "learning_rate": 4.749885094821951e-05,
      "loss": 0.9473,
      "step": 6140
    },
    {
      "epoch": 0.7089293954776188,
      "grad_norm": 0.1870802640914917,
      "learning_rate": 4.732758240224818e-05,
      "loss": 0.965,
      "step": 6145
    },
    {
      "epoch": 0.7095062298107984,
      "grad_norm": 0.20964084565639496,
      "learning_rate": 4.715652741745298e-05,
      "loss": 0.9074,
      "step": 6150
    },
    {
      "epoch": 0.7100830641439778,
      "grad_norm": 0.1921871155500412,
      "learning_rate": 4.6985686687379103e-05,
      "loss": 0.9391,
      "step": 6155
    },
    {
      "epoch": 0.7106598984771574,
      "grad_norm": 0.205277681350708,
      "learning_rate": 4.6815060904703046e-05,
      "loss": 0.9321,
      "step": 6160
    },
    {
      "epoch": 0.7112367328103368,
      "grad_norm": 0.18848411738872528,
      "learning_rate": 4.664465076122991e-05,
      "loss": 0.9028,
      "step": 6165
    },
    {
      "epoch": 0.7118135671435164,
      "grad_norm": 0.18404066562652588,
      "learning_rate": 4.647445694789032e-05,
      "loss": 0.9967,
      "step": 6170
    },
    {
      "epoch": 0.7123904014766959,
      "grad_norm": 0.18522439897060394,
      "learning_rate": 4.630448015473794e-05,
      "loss": 0.9022,
      "step": 6175
    },
    {
      "epoch": 0.7129672358098754,
      "grad_norm": 0.1914357990026474,
      "learning_rate": 4.613472107094641e-05,
      "loss": 0.9313,
      "step": 6180
    },
    {
      "epoch": 0.7135440701430549,
      "grad_norm": 0.18472833931446075,
      "learning_rate": 4.596518038480667e-05,
      "loss": 0.9544,
      "step": 6185
    },
    {
      "epoch": 0.7141209044762344,
      "grad_norm": 0.1849919557571411,
      "learning_rate": 4.579585878372428e-05,
      "loss": 0.9521,
      "step": 6190
    },
    {
      "epoch": 0.7146977388094139,
      "grad_norm": 0.2014801949262619,
      "learning_rate": 4.562675695421634e-05,
      "loss": 0.9397,
      "step": 6195
    },
    {
      "epoch": 0.7152745731425935,
      "grad_norm": 0.19745898246765137,
      "learning_rate": 4.545787558190907e-05,
      "loss": 0.9671,
      "step": 6200
    },
    {
      "epoch": 0.7158514074757729,
      "grad_norm": 0.2034626305103302,
      "learning_rate": 4.5289215351534666e-05,
      "loss": 1.0031,
      "step": 6205
    },
    {
      "epoch": 0.7164282418089525,
      "grad_norm": 0.19451607763767242,
      "learning_rate": 4.512077694692888e-05,
      "loss": 0.924,
      "step": 6210
    },
    {
      "epoch": 0.7170050761421319,
      "grad_norm": 0.19638855755329132,
      "learning_rate": 4.495256105102784e-05,
      "loss": 0.9372,
      "step": 6215
    },
    {
      "epoch": 0.7175819104753115,
      "grad_norm": 0.18954864144325256,
      "learning_rate": 4.478456834586574e-05,
      "loss": 0.9775,
      "step": 6220
    },
    {
      "epoch": 0.718158744808491,
      "grad_norm": 0.1834801435470581,
      "learning_rate": 4.4616799512571675e-05,
      "loss": 0.946,
      "step": 6225
    },
    {
      "epoch": 0.7187355791416705,
      "grad_norm": 0.2009892612695694,
      "learning_rate": 4.4449255231367183e-05,
      "loss": 0.9131,
      "step": 6230
    },
    {
      "epoch": 0.71931241347485,
      "grad_norm": 0.1912214159965515,
      "learning_rate": 4.428193618156322e-05,
      "loss": 0.9248,
      "step": 6235
    },
    {
      "epoch": 0.7198892478080295,
      "grad_norm": 0.19237951934337616,
      "learning_rate": 4.411484304155771e-05,
      "loss": 0.9305,
      "step": 6240
    },
    {
      "epoch": 0.720466082141209,
      "grad_norm": 0.1901046186685562,
      "learning_rate": 4.394797648883236e-05,
      "loss": 0.9259,
      "step": 6245
    },
    {
      "epoch": 0.7210429164743886,
      "grad_norm": 0.19405438005924225,
      "learning_rate": 4.378133719995044e-05,
      "loss": 0.9308,
      "step": 6250
    },
    {
      "epoch": 0.721619750807568,
      "grad_norm": 0.19073431193828583,
      "learning_rate": 4.36149258505536e-05,
      "loss": 0.9325,
      "step": 6255
    },
    {
      "epoch": 0.7221965851407476,
      "grad_norm": 0.18729381263256073,
      "learning_rate": 4.344874311535944e-05,
      "loss": 0.9674,
      "step": 6260
    },
    {
      "epoch": 0.722773419473927,
      "grad_norm": 0.19104944169521332,
      "learning_rate": 4.3282789668158476e-05,
      "loss": 0.9328,
      "step": 6265
    },
    {
      "epoch": 0.7233502538071066,
      "grad_norm": 0.20195722579956055,
      "learning_rate": 4.31170661818118e-05,
      "loss": 0.9785,
      "step": 6270
    },
    {
      "epoch": 0.7239270881402861,
      "grad_norm": 0.18547381460666656,
      "learning_rate": 4.295157332824785e-05,
      "loss": 0.9534,
      "step": 6275
    },
    {
      "epoch": 0.7245039224734656,
      "grad_norm": 0.18389153480529785,
      "learning_rate": 4.27863117784602e-05,
      "loss": 0.9408,
      "step": 6280
    },
    {
      "epoch": 0.7250807568066451,
      "grad_norm": 0.1920400708913803,
      "learning_rate": 4.262128220250441e-05,
      "loss": 0.9413,
      "step": 6285
    },
    {
      "epoch": 0.7256575911398246,
      "grad_norm": 0.18754833936691284,
      "learning_rate": 4.245648526949567e-05,
      "loss": 0.9961,
      "step": 6290
    },
    {
      "epoch": 0.7262344254730042,
      "grad_norm": 0.19839036464691162,
      "learning_rate": 4.229192164760576e-05,
      "loss": 0.961,
      "step": 6295
    },
    {
      "epoch": 0.7268112598061837,
      "grad_norm": 0.20525625348091125,
      "learning_rate": 4.212759200406065e-05,
      "loss": 0.9767,
      "step": 6300
    },
    {
      "epoch": 0.7273880941393632,
      "grad_norm": 0.20254795253276825,
      "learning_rate": 4.1963497005137516e-05,
      "loss": 0.9062,
      "step": 6305
    },
    {
      "epoch": 0.7279649284725427,
      "grad_norm": 0.20592284202575684,
      "learning_rate": 4.179963731616221e-05,
      "loss": 0.9707,
      "step": 6310
    },
    {
      "epoch": 0.7285417628057222,
      "grad_norm": 0.1987779587507248,
      "learning_rate": 4.163601360150646e-05,
      "loss": 0.9244,
      "step": 6315
    },
    {
      "epoch": 0.7291185971389017,
      "grad_norm": 0.1847141832113266,
      "learning_rate": 4.147262652458539e-05,
      "loss": 0.9771,
      "step": 6320
    },
    {
      "epoch": 0.7296954314720813,
      "grad_norm": 0.18493050336837769,
      "learning_rate": 4.130947674785447e-05,
      "loss": 0.9005,
      "step": 6325
    },
    {
      "epoch": 0.7302722658052607,
      "grad_norm": 0.18646039068698883,
      "learning_rate": 4.114656493280721e-05,
      "loss": 0.938,
      "step": 6330
    },
    {
      "epoch": 0.7308491001384403,
      "grad_norm": 0.2044810801744461,
      "learning_rate": 4.098389173997218e-05,
      "loss": 0.9636,
      "step": 6335
    },
    {
      "epoch": 0.7314259344716197,
      "grad_norm": 0.20317162573337555,
      "learning_rate": 4.082145782891046e-05,
      "loss": 0.9499,
      "step": 6340
    },
    {
      "epoch": 0.7320027688047993,
      "grad_norm": 0.19699914753437042,
      "learning_rate": 4.065926385821307e-05,
      "loss": 0.9391,
      "step": 6345
    },
    {
      "epoch": 0.7325796031379788,
      "grad_norm": 0.18906693160533905,
      "learning_rate": 4.049731048549804e-05,
      "loss": 0.9166,
      "step": 6350
    },
    {
      "epoch": 0.7331564374711583,
      "grad_norm": 0.20645971596240997,
      "learning_rate": 4.033559836740801e-05,
      "loss": 0.9581,
      "step": 6355
    },
    {
      "epoch": 0.7337332718043378,
      "grad_norm": 0.18750208616256714,
      "learning_rate": 4.017412815960735e-05,
      "loss": 0.9418,
      "step": 6360
    },
    {
      "epoch": 0.7343101061375173,
      "grad_norm": 0.18464316427707672,
      "learning_rate": 4.001290051677975e-05,
      "loss": 0.9937,
      "step": 6365
    },
    {
      "epoch": 0.7348869404706968,
      "grad_norm": 0.19745340943336487,
      "learning_rate": 3.985191609262519e-05,
      "loss": 0.9188,
      "step": 6370
    },
    {
      "epoch": 0.7354637748038764,
      "grad_norm": 0.2214672714471817,
      "learning_rate": 3.969117553985772e-05,
      "loss": 0.9521,
      "step": 6375
    },
    {
      "epoch": 0.7360406091370558,
      "grad_norm": 0.1932508647441864,
      "learning_rate": 3.9530679510202476e-05,
      "loss": 0.9164,
      "step": 6380
    },
    {
      "epoch": 0.7366174434702354,
      "grad_norm": 0.1878437101840973,
      "learning_rate": 3.9370428654393296e-05,
      "loss": 0.9119,
      "step": 6385
    },
    {
      "epoch": 0.7371942778034148,
      "grad_norm": 0.19011224806308746,
      "learning_rate": 3.921042362216983e-05,
      "loss": 0.9232,
      "step": 6390
    },
    {
      "epoch": 0.7377711121365944,
      "grad_norm": 0.17758631706237793,
      "learning_rate": 3.905066506227515e-05,
      "loss": 0.937,
      "step": 6395
    },
    {
      "epoch": 0.7383479464697739,
      "grad_norm": 0.19590388238430023,
      "learning_rate": 3.8891153622452904e-05,
      "loss": 0.8993,
      "step": 6400
    },
    {
      "epoch": 0.7389247808029534,
      "grad_norm": 0.1892813891172409,
      "learning_rate": 3.873188994944483e-05,
      "loss": 0.9759,
      "step": 6405
    },
    {
      "epoch": 0.7395016151361329,
      "grad_norm": 0.19297149777412415,
      "learning_rate": 3.857287468898806e-05,
      "loss": 0.9918,
      "step": 6410
    },
    {
      "epoch": 0.7400784494693124,
      "grad_norm": 0.1931808739900589,
      "learning_rate": 3.8414108485812613e-05,
      "loss": 0.9332,
      "step": 6415
    },
    {
      "epoch": 0.7406552838024919,
      "grad_norm": 0.18963101506233215,
      "learning_rate": 3.825559198363861e-05,
      "loss": 0.9274,
      "step": 6420
    },
    {
      "epoch": 0.7412321181356715,
      "grad_norm": 0.18943539261817932,
      "learning_rate": 3.8097325825173826e-05,
      "loss": 0.9751,
      "step": 6425
    },
    {
      "epoch": 0.7418089524688509,
      "grad_norm": 0.1900118887424469,
      "learning_rate": 3.793931065211096e-05,
      "loss": 0.9527,
      "step": 6430
    },
    {
      "epoch": 0.7423857868020305,
      "grad_norm": 0.1904015690088272,
      "learning_rate": 3.778154710512512e-05,
      "loss": 0.9884,
      "step": 6435
    },
    {
      "epoch": 0.7429626211352099,
      "grad_norm": 0.19013242423534393,
      "learning_rate": 3.762403582387114e-05,
      "loss": 0.9467,
      "step": 6440
    },
    {
      "epoch": 0.7435394554683895,
      "grad_norm": 0.18729767203330994,
      "learning_rate": 3.746677744698114e-05,
      "loss": 0.9803,
      "step": 6445
    },
    {
      "epoch": 0.744116289801569,
      "grad_norm": 0.20547720789909363,
      "learning_rate": 3.730977261206171e-05,
      "loss": 0.9168,
      "step": 6450
    },
    {
      "epoch": 0.7446931241347485,
      "grad_norm": 0.20597174763679504,
      "learning_rate": 3.715302195569159e-05,
      "loss": 0.9868,
      "step": 6455
    },
    {
      "epoch": 0.745269958467928,
      "grad_norm": 0.1934199035167694,
      "learning_rate": 3.69965261134188e-05,
      "loss": 1.036,
      "step": 6460
    },
    {
      "epoch": 0.7458467928011075,
      "grad_norm": 0.19105862081050873,
      "learning_rate": 3.684028571975836e-05,
      "loss": 0.9528,
      "step": 6465
    },
    {
      "epoch": 0.746423627134287,
      "grad_norm": 0.19618894159793854,
      "learning_rate": 3.6684301408189406e-05,
      "loss": 1.0373,
      "step": 6470
    },
    {
      "epoch": 0.7470004614674666,
      "grad_norm": 0.18884247541427612,
      "learning_rate": 3.652857381115293e-05,
      "loss": 0.9981,
      "step": 6475
    },
    {
      "epoch": 0.747577295800646,
      "grad_norm": 0.1874423772096634,
      "learning_rate": 3.637310356004897e-05,
      "loss": 0.964,
      "step": 6480
    },
    {
      "epoch": 0.7481541301338256,
      "grad_norm": 0.19521760940551758,
      "learning_rate": 3.6217891285234265e-05,
      "loss": 0.9497,
      "step": 6485
    },
    {
      "epoch": 0.748730964467005,
      "grad_norm": 0.19494710862636566,
      "learning_rate": 3.6062937616019433e-05,
      "loss": 0.9687,
      "step": 6490
    },
    {
      "epoch": 0.7493077988001846,
      "grad_norm": 0.18842960894107819,
      "learning_rate": 3.5908243180666734e-05,
      "loss": 0.9522,
      "step": 6495
    },
    {
      "epoch": 0.7498846331333641,
      "grad_norm": 0.19816794991493225,
      "learning_rate": 3.5753808606387226e-05,
      "loss": 0.9548,
      "step": 6500
    },
    {
      "epoch": 0.7504614674665436,
      "grad_norm": 0.19967734813690186,
      "learning_rate": 3.55996345193384e-05,
      "loss": 1.0035,
      "step": 6505
    },
    {
      "epoch": 0.7510383017997231,
      "grad_norm": 0.19166813790798187,
      "learning_rate": 3.544572154462165e-05,
      "loss": 0.9374,
      "step": 6510
    },
    {
      "epoch": 0.7516151361329027,
      "grad_norm": 0.2017005980014801,
      "learning_rate": 3.529207030627959e-05,
      "loss": 1.0029,
      "step": 6515
    },
    {
      "epoch": 0.7521919704660821,
      "grad_norm": 0.19286920130252838,
      "learning_rate": 3.513868142729373e-05,
      "loss": 0.9457,
      "step": 6520
    },
    {
      "epoch": 0.7527688047992617,
      "grad_norm": 0.20132075250148773,
      "learning_rate": 3.498555552958176e-05,
      "loss": 0.9899,
      "step": 6525
    },
    {
      "epoch": 0.7533456391324411,
      "grad_norm": 0.22670696675777435,
      "learning_rate": 3.483269323399512e-05,
      "loss": 0.9568,
      "step": 6530
    },
    {
      "epoch": 0.7539224734656207,
      "grad_norm": 0.1912250965833664,
      "learning_rate": 3.468009516031644e-05,
      "loss": 0.9871,
      "step": 6535
    },
    {
      "epoch": 0.7544993077988001,
      "grad_norm": 0.1899501532316208,
      "learning_rate": 3.452776192725717e-05,
      "loss": 0.9634,
      "step": 6540
    },
    {
      "epoch": 0.7550761421319797,
      "grad_norm": 0.1889965683221817,
      "learning_rate": 3.437569415245483e-05,
      "loss": 0.9733,
      "step": 6545
    },
    {
      "epoch": 0.7556529764651592,
      "grad_norm": 0.19462092220783234,
      "learning_rate": 3.422389245247075e-05,
      "loss": 0.951,
      "step": 6550
    },
    {
      "epoch": 0.7562298107983387,
      "grad_norm": 0.1985846608877182,
      "learning_rate": 3.407235744278734e-05,
      "loss": 0.9462,
      "step": 6555
    },
    {
      "epoch": 0.7568066451315182,
      "grad_norm": 0.187138170003891,
      "learning_rate": 3.3921089737805866e-05,
      "loss": 0.9234,
      "step": 6560
    },
    {
      "epoch": 0.7573834794646978,
      "grad_norm": 0.188985213637352,
      "learning_rate": 3.3770089950843564e-05,
      "loss": 0.9074,
      "step": 6565
    },
    {
      "epoch": 0.7579603137978772,
      "grad_norm": 0.20506353676319122,
      "learning_rate": 3.361935869413163e-05,
      "loss": 0.9758,
      "step": 6570
    },
    {
      "epoch": 0.7585371481310568,
      "grad_norm": 0.18956059217453003,
      "learning_rate": 3.3468896578812344e-05,
      "loss": 0.9461,
      "step": 6575
    },
    {
      "epoch": 0.7591139824642362,
      "grad_norm": 0.2013210505247116,
      "learning_rate": 3.331870421493688e-05,
      "loss": 0.9575,
      "step": 6580
    },
    {
      "epoch": 0.7596908167974158,
      "grad_norm": 0.19527879357337952,
      "learning_rate": 3.316878221146253e-05,
      "loss": 0.9456,
      "step": 6585
    },
    {
      "epoch": 0.7602676511305952,
      "grad_norm": 0.1831914484500885,
      "learning_rate": 3.301913117625065e-05,
      "loss": 0.9144,
      "step": 6590
    },
    {
      "epoch": 0.7608444854637748,
      "grad_norm": 0.18544113636016846,
      "learning_rate": 3.286975171606362e-05,
      "loss": 0.9525,
      "step": 6595
    },
    {
      "epoch": 0.7614213197969543,
      "grad_norm": 0.20716486871242523,
      "learning_rate": 3.272064443656303e-05,
      "loss": 0.9571,
      "step": 6600
    },
    {
      "epoch": 0.7619981541301338,
      "grad_norm": 0.18836161494255066,
      "learning_rate": 3.257180994230671e-05,
      "loss": 0.9857,
      "step": 6605
    },
    {
      "epoch": 0.7625749884633133,
      "grad_norm": 0.19700607657432556,
      "learning_rate": 3.2423248836746575e-05,
      "loss": 0.9054,
      "step": 6610
    },
    {
      "epoch": 0.7631518227964929,
      "grad_norm": 0.18617579340934753,
      "learning_rate": 3.227496172222603e-05,
      "loss": 0.9512,
      "step": 6615
    },
    {
      "epoch": 0.7637286571296723,
      "grad_norm": 0.18982037901878357,
      "learning_rate": 3.212694919997764e-05,
      "loss": 0.9649,
      "step": 6620
    },
    {
      "epoch": 0.7643054914628519,
      "grad_norm": 0.19119331240653992,
      "learning_rate": 3.197921187012055e-05,
      "loss": 0.9482,
      "step": 6625
    },
    {
      "epoch": 0.7648823257960313,
      "grad_norm": 0.2058209329843521,
      "learning_rate": 3.1831750331658196e-05,
      "loss": 0.9741,
      "step": 6630
    },
    {
      "epoch": 0.7654591601292109,
      "grad_norm": 0.19063866138458252,
      "learning_rate": 3.168456518247575e-05,
      "loss": 0.9556,
      "step": 6635
    },
    {
      "epoch": 0.7660359944623903,
      "grad_norm": 0.1826866716146469,
      "learning_rate": 3.153765701933784e-05,
      "loss": 0.9362,
      "step": 6640
    },
    {
      "epoch": 0.7666128287955699,
      "grad_norm": 0.20662148296833038,
      "learning_rate": 3.1391026437885984e-05,
      "loss": 0.9941,
      "step": 6645
    },
    {
      "epoch": 0.7671896631287495,
      "grad_norm": 0.19351264834403992,
      "learning_rate": 3.12446740326363e-05,
      "loss": 0.9544,
      "step": 6650
    },
    {
      "epoch": 0.7677664974619289,
      "grad_norm": 0.19532477855682373,
      "learning_rate": 3.109860039697699e-05,
      "loss": 1.0098,
      "step": 6655
    },
    {
      "epoch": 0.7683433317951085,
      "grad_norm": 0.19506360590457916,
      "learning_rate": 3.0952806123165945e-05,
      "loss": 0.9201,
      "step": 6660
    },
    {
      "epoch": 0.768920166128288,
      "grad_norm": 0.19203807413578033,
      "learning_rate": 3.0807291802328494e-05,
      "loss": 0.9102,
      "step": 6665
    },
    {
      "epoch": 0.7694970004614675,
      "grad_norm": 0.19363170862197876,
      "learning_rate": 3.066205802445477e-05,
      "loss": 0.9485,
      "step": 6670
    },
    {
      "epoch": 0.770073834794647,
      "grad_norm": 0.19659185409545898,
      "learning_rate": 3.0517105378397536e-05,
      "loss": 0.9946,
      "step": 6675
    },
    {
      "epoch": 0.7706506691278265,
      "grad_norm": 0.20665878057479858,
      "learning_rate": 3.037243445186965e-05,
      "loss": 0.9688,
      "step": 6680
    },
    {
      "epoch": 0.771227503461006,
      "grad_norm": 0.1891080141067505,
      "learning_rate": 3.0228045831441733e-05,
      "loss": 0.9905,
      "step": 6685
    },
    {
      "epoch": 0.7718043377941856,
      "grad_norm": 0.19424353539943695,
      "learning_rate": 3.0083940102539763e-05,
      "loss": 0.9682,
      "step": 6690
    },
    {
      "epoch": 0.772381172127365,
      "grad_norm": 0.1884242296218872,
      "learning_rate": 2.994011784944284e-05,
      "loss": 1.0159,
      "step": 6695
    },
    {
      "epoch": 0.7729580064605446,
      "grad_norm": 0.191155344247818,
      "learning_rate": 2.9796579655280576e-05,
      "loss": 0.9149,
      "step": 6700
    },
    {
      "epoch": 0.773534840793724,
      "grad_norm": 0.20115402340888977,
      "learning_rate": 2.9653326102030964e-05,
      "loss": 0.9818,
      "step": 6705
    },
    {
      "epoch": 0.7741116751269036,
      "grad_norm": 0.19099047780036926,
      "learning_rate": 2.9510357770517825e-05,
      "loss": 1.0363,
      "step": 6710
    },
    {
      "epoch": 0.774688509460083,
      "grad_norm": 0.18843354284763336,
      "learning_rate": 2.9367675240408654e-05,
      "loss": 0.9309,
      "step": 6715
    },
    {
      "epoch": 0.7752653437932626,
      "grad_norm": 0.19343096017837524,
      "learning_rate": 2.9225279090212067e-05,
      "loss": 0.9773,
      "step": 6720
    },
    {
      "epoch": 0.7758421781264421,
      "grad_norm": 0.19770221412181854,
      "learning_rate": 2.9083169897275552e-05,
      "loss": 0.9453,
      "step": 6725
    },
    {
      "epoch": 0.7764190124596216,
      "grad_norm": 0.1835956573486328,
      "learning_rate": 2.894134823778315e-05,
      "loss": 0.9375,
      "step": 6730
    },
    {
      "epoch": 0.7769958467928011,
      "grad_norm": 0.18945716321468353,
      "learning_rate": 2.8799814686753134e-05,
      "loss": 0.9738,
      "step": 6735
    },
    {
      "epoch": 0.7775726811259807,
      "grad_norm": 0.18679681420326233,
      "learning_rate": 2.8658569818035542e-05,
      "loss": 0.9195,
      "step": 6740
    },
    {
      "epoch": 0.7781495154591601,
      "grad_norm": 0.19722817838191986,
      "learning_rate": 2.851761420431006e-05,
      "loss": 0.9791,
      "step": 6745
    },
    {
      "epoch": 0.7787263497923397,
      "grad_norm": 0.1932711899280548,
      "learning_rate": 2.8376948417083483e-05,
      "loss": 0.9314,
      "step": 6750
    },
    {
      "epoch": 0.7793031841255191,
      "grad_norm": 0.2086237072944641,
      "learning_rate": 2.823657302668755e-05,
      "loss": 0.9436,
      "step": 6755
    },
    {
      "epoch": 0.7798800184586987,
      "grad_norm": 0.18653713166713715,
      "learning_rate": 2.8096488602276528e-05,
      "loss": 0.9361,
      "step": 6760
    },
    {
      "epoch": 0.7804568527918782,
      "grad_norm": 0.1907120645046234,
      "learning_rate": 2.7956695711825075e-05,
      "loss": 0.9439,
      "step": 6765
    },
    {
      "epoch": 0.7810336871250577,
      "grad_norm": 0.18436580896377563,
      "learning_rate": 2.7817194922125666e-05,
      "loss": 0.9436,
      "step": 6770
    },
    {
      "epoch": 0.7816105214582372,
      "grad_norm": 0.18907365202903748,
      "learning_rate": 2.7677986798786615e-05,
      "loss": 0.9657,
      "step": 6775
    },
    {
      "epoch": 0.7821873557914167,
      "grad_norm": 0.19230635464191437,
      "learning_rate": 2.753907190622944e-05,
      "loss": 0.9517,
      "step": 6780
    },
    {
      "epoch": 0.7827641901245962,
      "grad_norm": 0.19662320613861084,
      "learning_rate": 2.7400450807686938e-05,
      "loss": 0.9558,
      "step": 6785
    },
    {
      "epoch": 0.7833410244577758,
      "grad_norm": 0.18257470428943634,
      "learning_rate": 2.726212406520051e-05,
      "loss": 0.9132,
      "step": 6790
    },
    {
      "epoch": 0.7839178587909552,
      "grad_norm": 0.19373179972171783,
      "learning_rate": 2.712409223961826e-05,
      "loss": 0.9496,
      "step": 6795
    },
    {
      "epoch": 0.7844946931241348,
      "grad_norm": 0.19530361890792847,
      "learning_rate": 2.698635589059242e-05,
      "loss": 0.9682,
      "step": 6800
    },
    {
      "epoch": 0.7850715274573142,
      "grad_norm": 0.18772435188293457,
      "learning_rate": 2.6848915576577317e-05,
      "loss": 0.9552,
      "step": 6805
    },
    {
      "epoch": 0.7856483617904938,
      "grad_norm": 0.19148732721805573,
      "learning_rate": 2.6711771854826905e-05,
      "loss": 0.9047,
      "step": 6810
    },
    {
      "epoch": 0.7862251961236733,
      "grad_norm": 0.19749705493450165,
      "learning_rate": 2.657492528139268e-05,
      "loss": 0.9442,
      "step": 6815
    },
    {
      "epoch": 0.7868020304568528,
      "grad_norm": 0.18885494768619537,
      "learning_rate": 2.643837641112128e-05,
      "loss": 0.9223,
      "step": 6820
    },
    {
      "epoch": 0.7873788647900323,
      "grad_norm": 0.19046206772327423,
      "learning_rate": 2.630212579765231e-05,
      "loss": 0.9801,
      "step": 6825
    },
    {
      "epoch": 0.7879556991232118,
      "grad_norm": 0.19383732974529266,
      "learning_rate": 2.6166173993416154e-05,
      "loss": 1.0104,
      "step": 6830
    },
    {
      "epoch": 0.7885325334563913,
      "grad_norm": 0.20124277472496033,
      "learning_rate": 2.603052154963158e-05,
      "loss": 1.008,
      "step": 6835
    },
    {
      "epoch": 0.7891093677895709,
      "grad_norm": 0.20482531189918518,
      "learning_rate": 2.5895169016303623e-05,
      "loss": 0.8936,
      "step": 6840
    },
    {
      "epoch": 0.7896862021227503,
      "grad_norm": 0.1819208562374115,
      "learning_rate": 2.576011694222139e-05,
      "loss": 0.9452,
      "step": 6845
    },
    {
      "epoch": 0.7902630364559299,
      "grad_norm": 0.18517275154590607,
      "learning_rate": 2.5625365874955674e-05,
      "loss": 0.8879,
      "step": 6850
    },
    {
      "epoch": 0.7908398707891093,
      "grad_norm": 0.18698491156101227,
      "learning_rate": 2.5490916360856853e-05,
      "loss": 0.929,
      "step": 6855
    },
    {
      "epoch": 0.7914167051222889,
      "grad_norm": 0.18061292171478271,
      "learning_rate": 2.5356768945052745e-05,
      "loss": 0.9385,
      "step": 6860
    },
    {
      "epoch": 0.7919935394554684,
      "grad_norm": 0.1996062994003296,
      "learning_rate": 2.522292417144617e-05,
      "loss": 0.9852,
      "step": 6865
    },
    {
      "epoch": 0.7925703737886479,
      "grad_norm": 0.1814109832048416,
      "learning_rate": 2.5089382582712994e-05,
      "loss": 0.9543,
      "step": 6870
    },
    {
      "epoch": 0.7931472081218274,
      "grad_norm": 0.19972620904445648,
      "learning_rate": 2.4956144720299712e-05,
      "loss": 0.9139,
      "step": 6875
    },
    {
      "epoch": 0.7937240424550069,
      "grad_norm": 0.19318068027496338,
      "learning_rate": 2.482321112442151e-05,
      "loss": 1.0012,
      "step": 6880
    },
    {
      "epoch": 0.7943008767881864,
      "grad_norm": 0.18499110639095306,
      "learning_rate": 2.4690582334059685e-05,
      "loss": 0.9307,
      "step": 6885
    },
    {
      "epoch": 0.794877711121366,
      "grad_norm": 0.1849159598350525,
      "learning_rate": 2.455825888695994e-05,
      "loss": 0.9354,
      "step": 6890
    },
    {
      "epoch": 0.7954545454545454,
      "grad_norm": 0.1951223909854889,
      "learning_rate": 2.4426241319629772e-05,
      "loss": 0.9596,
      "step": 6895
    },
    {
      "epoch": 0.796031379787725,
      "grad_norm": 0.19276456534862518,
      "learning_rate": 2.4294530167336615e-05,
      "loss": 0.9273,
      "step": 6900
    },
    {
      "epoch": 0.7966082141209044,
      "grad_norm": 0.1889081597328186,
      "learning_rate": 2.4163125964105448e-05,
      "loss": 0.979,
      "step": 6905
    },
    {
      "epoch": 0.797185048454084,
      "grad_norm": 0.19000983238220215,
      "learning_rate": 2.4032029242716826e-05,
      "loss": 0.9286,
      "step": 6910
    },
    {
      "epoch": 0.7977618827872635,
      "grad_norm": 0.19312219321727753,
      "learning_rate": 2.390124053470443e-05,
      "loss": 0.9655,
      "step": 6915
    },
    {
      "epoch": 0.798338717120443,
      "grad_norm": 0.19184233248233795,
      "learning_rate": 2.3770760370353294e-05,
      "loss": 0.964,
      "step": 6920
    },
    {
      "epoch": 0.7989155514536225,
      "grad_norm": 0.19048276543617249,
      "learning_rate": 2.364058927869732e-05,
      "loss": 0.9578,
      "step": 6925
    },
    {
      "epoch": 0.799492385786802,
      "grad_norm": 0.19102397561073303,
      "learning_rate": 2.3510727787517382e-05,
      "loss": 0.9342,
      "step": 6930
    },
    {
      "epoch": 0.8000692201199815,
      "grad_norm": 0.2003382444381714,
      "learning_rate": 2.3381176423338956e-05,
      "loss": 0.9429,
      "step": 6935
    },
    {
      "epoch": 0.8006460544531611,
      "grad_norm": 0.1823520064353943,
      "learning_rate": 2.325193571143024e-05,
      "loss": 0.9617,
      "step": 6940
    },
    {
      "epoch": 0.8012228887863405,
      "grad_norm": 0.18638677895069122,
      "learning_rate": 2.31230061757997e-05,
      "loss": 0.9731,
      "step": 6945
    },
    {
      "epoch": 0.8017997231195201,
      "grad_norm": 0.19570128619670868,
      "learning_rate": 2.299438833919432e-05,
      "loss": 0.9684,
      "step": 6950
    },
    {
      "epoch": 0.8023765574526995,
      "grad_norm": 0.18794956803321838,
      "learning_rate": 2.286608272309716e-05,
      "loss": 0.9115,
      "step": 6955
    },
    {
      "epoch": 0.8029533917858791,
      "grad_norm": 0.20243585109710693,
      "learning_rate": 2.2738089847725497e-05,
      "loss": 0.9751,
      "step": 6960
    },
    {
      "epoch": 0.8035302261190586,
      "grad_norm": 0.18690462410449982,
      "learning_rate": 2.2610410232028467e-05,
      "loss": 0.9299,
      "step": 6965
    },
    {
      "epoch": 0.8041070604522381,
      "grad_norm": 0.1870652735233307,
      "learning_rate": 2.2483044393685215e-05,
      "loss": 0.9366,
      "step": 6970
    },
    {
      "epoch": 0.8046838947854176,
      "grad_norm": 0.2046535462141037,
      "learning_rate": 2.235599284910258e-05,
      "loss": 0.9911,
      "step": 6975
    },
    {
      "epoch": 0.8052607291185971,
      "grad_norm": 0.18992750346660614,
      "learning_rate": 2.2229256113413087e-05,
      "loss": 0.9751,
      "step": 6980
    },
    {
      "epoch": 0.8058375634517766,
      "grad_norm": 0.19411560893058777,
      "learning_rate": 2.210283470047296e-05,
      "loss": 0.937,
      "step": 6985
    },
    {
      "epoch": 0.8064143977849562,
      "grad_norm": 0.1944170445203781,
      "learning_rate": 2.1976729122859864e-05,
      "loss": 0.9326,
      "step": 6990
    },
    {
      "epoch": 0.8069912321181357,
      "grad_norm": 0.18680644035339355,
      "learning_rate": 2.185093989187087e-05,
      "loss": 0.936,
      "step": 6995
    },
    {
      "epoch": 0.8075680664513152,
      "grad_norm": 0.18400752544403076,
      "learning_rate": 2.1725467517520526e-05,
      "loss": 0.9861,
      "step": 7000
    },
    {
      "epoch": 0.8081449007844947,
      "grad_norm": 0.19621169567108154,
      "learning_rate": 2.1600312508538602e-05,
      "loss": 0.9284,
      "step": 7005
    },
    {
      "epoch": 0.8087217351176742,
      "grad_norm": 0.18958927690982819,
      "learning_rate": 2.1475475372368094e-05,
      "loss": 0.9258,
      "step": 7010
    },
    {
      "epoch": 0.8092985694508538,
      "grad_norm": 0.20696218311786652,
      "learning_rate": 2.1350956615163254e-05,
      "loss": 0.9426,
      "step": 7015
    },
    {
      "epoch": 0.8098754037840332,
      "grad_norm": 0.20199459791183472,
      "learning_rate": 2.1226756741787356e-05,
      "loss": 0.9625,
      "step": 7020
    },
    {
      "epoch": 0.8104522381172128,
      "grad_norm": 0.1961347460746765,
      "learning_rate": 2.1102876255810887e-05,
      "loss": 0.9664,
      "step": 7025
    },
    {
      "epoch": 0.8110290724503922,
      "grad_norm": 0.19894124567508698,
      "learning_rate": 2.0979315659509223e-05,
      "loss": 0.9508,
      "step": 7030
    },
    {
      "epoch": 0.8116059067835718,
      "grad_norm": 0.1998283416032791,
      "learning_rate": 2.085607545386088e-05,
      "loss": 0.9117,
      "step": 7035
    },
    {
      "epoch": 0.8121827411167513,
      "grad_norm": 0.18765446543693542,
      "learning_rate": 2.0733156138545252e-05,
      "loss": 0.9908,
      "step": 7040
    },
    {
      "epoch": 0.8127595754499308,
      "grad_norm": 0.18497121334075928,
      "learning_rate": 2.0610558211940702e-05,
      "loss": 0.9453,
      "step": 7045
    },
    {
      "epoch": 0.8133364097831103,
      "grad_norm": 0.20545774698257446,
      "learning_rate": 2.0488282171122498e-05,
      "loss": 1.0136,
      "step": 7050
    },
    {
      "epoch": 0.8139132441162898,
      "grad_norm": 0.18799108266830444,
      "learning_rate": 2.036632851186091e-05,
      "loss": 0.9834,
      "step": 7055
    },
    {
      "epoch": 0.8144900784494693,
      "grad_norm": 0.17781130969524384,
      "learning_rate": 2.0244697728618966e-05,
      "loss": 0.906,
      "step": 7060
    },
    {
      "epoch": 0.8150669127826489,
      "grad_norm": 0.1909274309873581,
      "learning_rate": 2.0123390314550717e-05,
      "loss": 0.9443,
      "step": 7065
    },
    {
      "epoch": 0.8156437471158283,
      "grad_norm": 0.1991771012544632,
      "learning_rate": 2.000240676149904e-05,
      "loss": 0.9451,
      "step": 7070
    },
    {
      "epoch": 0.8162205814490079,
      "grad_norm": 0.18566997349262238,
      "learning_rate": 1.9881747559993703e-05,
      "loss": 0.9015,
      "step": 7075
    },
    {
      "epoch": 0.8167974157821873,
      "grad_norm": 0.19024524092674255,
      "learning_rate": 1.976141319924939e-05,
      "loss": 0.9458,
      "step": 7080
    },
    {
      "epoch": 0.8173742501153669,
      "grad_norm": 0.19629044830799103,
      "learning_rate": 1.964140416716379e-05,
      "loss": 0.9608,
      "step": 7085
    },
    {
      "epoch": 0.8179510844485464,
      "grad_norm": 0.185940220952034,
      "learning_rate": 1.9521720950315403e-05,
      "loss": 0.8914,
      "step": 7090
    },
    {
      "epoch": 0.8185279187817259,
      "grad_norm": 0.18228422105312347,
      "learning_rate": 1.940236403396186e-05,
      "loss": 0.9645,
      "step": 7095
    },
    {
      "epoch": 0.8191047531149054,
      "grad_norm": 0.18275770545005798,
      "learning_rate": 1.9283333902037694e-05,
      "loss": 0.9554,
      "step": 7100
    },
    {
      "epoch": 0.819681587448085,
      "grad_norm": 0.19047953188419342,
      "learning_rate": 1.9164631037152513e-05,
      "loss": 0.9352,
      "step": 7105
    },
    {
      "epoch": 0.8202584217812644,
      "grad_norm": 0.211176335811615,
      "learning_rate": 1.9046255920588985e-05,
      "loss": 0.9209,
      "step": 7110
    },
    {
      "epoch": 0.820835256114444,
      "grad_norm": 0.19080542027950287,
      "learning_rate": 1.8928209032301013e-05,
      "loss": 0.9404,
      "step": 7115
    },
    {
      "epoch": 0.8214120904476234,
      "grad_norm": 0.20228640735149384,
      "learning_rate": 1.8810490850911577e-05,
      "loss": 0.9741,
      "step": 7120
    },
    {
      "epoch": 0.821988924780803,
      "grad_norm": 0.19433507323265076,
      "learning_rate": 1.8693101853711004e-05,
      "loss": 0.9596,
      "step": 7125
    },
    {
      "epoch": 0.8225657591139824,
      "grad_norm": 0.20206230878829956,
      "learning_rate": 1.857604251665487e-05,
      "loss": 0.9728,
      "step": 7130
    },
    {
      "epoch": 0.823142593447162,
      "grad_norm": 0.1768370121717453,
      "learning_rate": 1.845931331436219e-05,
      "loss": 0.9523,
      "step": 7135
    },
    {
      "epoch": 0.8237194277803415,
      "grad_norm": 0.19058267772197723,
      "learning_rate": 1.8342914720113404e-05,
      "loss": 0.982,
      "step": 7140
    },
    {
      "epoch": 0.824296262113521,
      "grad_norm": 0.1983305811882019,
      "learning_rate": 1.822684720584852e-05,
      "loss": 1.0006,
      "step": 7145
    },
    {
      "epoch": 0.8248730964467005,
      "grad_norm": 0.19151514768600464,
      "learning_rate": 1.8111111242165124e-05,
      "loss": 1.0013,
      "step": 7150
    },
    {
      "epoch": 0.82544993077988,
      "grad_norm": 0.1844472587108612,
      "learning_rate": 1.7995707298316632e-05,
      "loss": 0.9621,
      "step": 7155
    },
    {
      "epoch": 0.8260267651130595,
      "grad_norm": 0.1923513561487198,
      "learning_rate": 1.788063584221017e-05,
      "loss": 0.8956,
      "step": 7160
    },
    {
      "epoch": 0.8266035994462391,
      "grad_norm": 0.1910182386636734,
      "learning_rate": 1.776589734040487e-05,
      "loss": 0.9845,
      "step": 7165
    },
    {
      "epoch": 0.8271804337794185,
      "grad_norm": 0.1840658187866211,
      "learning_rate": 1.7651492258109835e-05,
      "loss": 0.9264,
      "step": 7170
    },
    {
      "epoch": 0.8277572681125981,
      "grad_norm": 0.21792177855968475,
      "learning_rate": 1.7537421059182314e-05,
      "loss": 0.9866,
      "step": 7175
    },
    {
      "epoch": 0.8283341024457775,
      "grad_norm": 0.19659163057804108,
      "learning_rate": 1.74236842061259e-05,
      "loss": 0.9331,
      "step": 7180
    },
    {
      "epoch": 0.8289109367789571,
      "grad_norm": 0.1791463941335678,
      "learning_rate": 1.7310282160088465e-05,
      "loss": 0.9154,
      "step": 7185
    },
    {
      "epoch": 0.8294877711121366,
      "grad_norm": 0.19773773849010468,
      "learning_rate": 1.7197215380860497e-05,
      "loss": 0.9588,
      "step": 7190
    },
    {
      "epoch": 0.8300646054453161,
      "grad_norm": 0.2065158188343048,
      "learning_rate": 1.7084484326873062e-05,
      "loss": 0.9579,
      "step": 7195
    },
    {
      "epoch": 0.8306414397784956,
      "grad_norm": 0.19158004224300385,
      "learning_rate": 1.6972089455196115e-05,
      "loss": 0.9358,
      "step": 7200
    },
    {
      "epoch": 0.8312182741116751,
      "grad_norm": 0.2142983227968216,
      "learning_rate": 1.6860031221536398e-05,
      "loss": 0.9572,
      "step": 7205
    },
    {
      "epoch": 0.8317951084448546,
      "grad_norm": 0.1989503800868988,
      "learning_rate": 1.674831008023594e-05,
      "loss": 0.9698,
      "step": 7210
    },
    {
      "epoch": 0.8323719427780342,
      "grad_norm": 0.19422230124473572,
      "learning_rate": 1.6636926484269855e-05,
      "loss": 0.936,
      "step": 7215
    },
    {
      "epoch": 0.8329487771112136,
      "grad_norm": 0.22429165244102478,
      "learning_rate": 1.6525880885244815e-05,
      "loss": 0.9515,
      "step": 7220
    },
    {
      "epoch": 0.8335256114443932,
      "grad_norm": 0.19105808436870575,
      "learning_rate": 1.641517373339696e-05,
      "loss": 0.9732,
      "step": 7225
    },
    {
      "epoch": 0.8341024457775726,
      "grad_norm": 0.20830240845680237,
      "learning_rate": 1.6304805477590312e-05,
      "loss": 0.9794,
      "step": 7230
    },
    {
      "epoch": 0.8346792801107522,
      "grad_norm": 0.20146532356739044,
      "learning_rate": 1.6194776565314672e-05,
      "loss": 0.9492,
      "step": 7235
    },
    {
      "epoch": 0.8352561144439317,
      "grad_norm": 0.18800169229507446,
      "learning_rate": 1.6085087442684122e-05,
      "loss": 0.92,
      "step": 7240
    },
    {
      "epoch": 0.8358329487771112,
      "grad_norm": 0.19800886511802673,
      "learning_rate": 1.597573855443497e-05,
      "loss": 0.9912,
      "step": 7245
    },
    {
      "epoch": 0.8364097831102907,
      "grad_norm": 0.18179026246070862,
      "learning_rate": 1.5866730343924085e-05,
      "loss": 0.9121,
      "step": 7250
    },
    {
      "epoch": 0.8369866174434702,
      "grad_norm": 0.19229067862033844,
      "learning_rate": 1.575806325312702e-05,
      "loss": 0.9529,
      "step": 7255
    },
    {
      "epoch": 0.8375634517766497,
      "grad_norm": 0.1789880245923996,
      "learning_rate": 1.5649737722636315e-05,
      "loss": 0.926,
      "step": 7260
    },
    {
      "epoch": 0.8381402861098293,
      "grad_norm": 0.1741928905248642,
      "learning_rate": 1.554175419165951e-05,
      "loss": 0.9237,
      "step": 7265
    },
    {
      "epoch": 0.8387171204430087,
      "grad_norm": 0.20456916093826294,
      "learning_rate": 1.5434113098017667e-05,
      "loss": 0.9821,
      "step": 7270
    },
    {
      "epoch": 0.8392939547761883,
      "grad_norm": 0.1989426463842392,
      "learning_rate": 1.5326814878143304e-05,
      "loss": 0.9187,
      "step": 7275
    },
    {
      "epoch": 0.8398707891093677,
      "grad_norm": 0.18545162677764893,
      "learning_rate": 1.5219859967078854e-05,
      "loss": 0.945,
      "step": 7280
    },
    {
      "epoch": 0.8404476234425473,
      "grad_norm": 0.17884254455566406,
      "learning_rate": 1.5113248798474689e-05,
      "loss": 0.9181,
      "step": 7285
    },
    {
      "epoch": 0.8410244577757268,
      "grad_norm": 0.18553291261196136,
      "learning_rate": 1.5006981804587595e-05,
      "loss": 0.9737,
      "step": 7290
    },
    {
      "epoch": 0.8416012921089063,
      "grad_norm": 0.1832725703716278,
      "learning_rate": 1.4901059416278806e-05,
      "loss": 0.9121,
      "step": 7295
    },
    {
      "epoch": 0.8421781264420858,
      "grad_norm": 0.18451160192489624,
      "learning_rate": 1.4795482063012367e-05,
      "loss": 0.9595,
      "step": 7300
    },
    {
      "epoch": 0.8427549607752653,
      "grad_norm": 0.1967657506465912,
      "learning_rate": 1.4690250172853348e-05,
      "loss": 0.9664,
      "step": 7305
    },
    {
      "epoch": 0.8433317951084448,
      "grad_norm": 0.1973174512386322,
      "learning_rate": 1.4585364172466231e-05,
      "loss": 0.9763,
      "step": 7310
    },
    {
      "epoch": 0.8439086294416244,
      "grad_norm": 0.1908571720123291,
      "learning_rate": 1.4480824487112943e-05,
      "loss": 0.9536,
      "step": 7315
    },
    {
      "epoch": 0.8444854637748038,
      "grad_norm": 0.17997972667217255,
      "learning_rate": 1.437663154065142e-05,
      "loss": 0.977,
      "step": 7320
    },
    {
      "epoch": 0.8450622981079834,
      "grad_norm": 0.19456464052200317,
      "learning_rate": 1.4272785755533601e-05,
      "loss": 0.96,
      "step": 7325
    },
    {
      "epoch": 0.8456391324411628,
      "grad_norm": 0.18535476922988892,
      "learning_rate": 1.4169287552803923e-05,
      "loss": 0.9358,
      "step": 7330
    },
    {
      "epoch": 0.8462159667743424,
      "grad_norm": 0.19435110688209534,
      "learning_rate": 1.4066137352097575e-05,
      "loss": 0.9578,
      "step": 7335
    },
    {
      "epoch": 0.846792801107522,
      "grad_norm": 0.196323961019516,
      "learning_rate": 1.396333557163868e-05,
      "loss": 0.9587,
      "step": 7340
    },
    {
      "epoch": 0.8473696354407014,
      "grad_norm": 0.18497958779335022,
      "learning_rate": 1.3860882628238781e-05,
      "loss": 0.8837,
      "step": 7345
    },
    {
      "epoch": 0.847946469773881,
      "grad_norm": 0.1857413649559021,
      "learning_rate": 1.3758778937294947e-05,
      "loss": 0.9882,
      "step": 7350
    },
    {
      "epoch": 0.8485233041070604,
      "grad_norm": 0.18033544719219208,
      "learning_rate": 1.365702491278833e-05,
      "loss": 0.9649,
      "step": 7355
    },
    {
      "epoch": 0.84910013844024,
      "grad_norm": 0.17830757796764374,
      "learning_rate": 1.3555620967282235e-05,
      "loss": 0.9306,
      "step": 7360
    },
    {
      "epoch": 0.8496769727734195,
      "grad_norm": 0.18326257169246674,
      "learning_rate": 1.3454567511920634e-05,
      "loss": 0.9583,
      "step": 7365
    },
    {
      "epoch": 0.850253807106599,
      "grad_norm": 0.1927419751882553,
      "learning_rate": 1.3353864956426366e-05,
      "loss": 0.9199,
      "step": 7370
    },
    {
      "epoch": 0.8508306414397785,
      "grad_norm": 0.20454275608062744,
      "learning_rate": 1.3253513709099652e-05,
      "loss": 0.9859,
      "step": 7375
    },
    {
      "epoch": 0.8514074757729581,
      "grad_norm": 0.1943024843931198,
      "learning_rate": 1.3153514176816195e-05,
      "loss": 0.9491,
      "step": 7380
    },
    {
      "epoch": 0.8519843101061375,
      "grad_norm": 0.196246936917305,
      "learning_rate": 1.305386676502578e-05,
      "loss": 0.9904,
      "step": 7385
    },
    {
      "epoch": 0.8525611444393171,
      "grad_norm": 0.1911761462688446,
      "learning_rate": 1.2954571877750443e-05,
      "loss": 0.9533,
      "step": 7390
    },
    {
      "epoch": 0.8531379787724965,
      "grad_norm": 0.193502277135849,
      "learning_rate": 1.2855629917582935e-05,
      "loss": 0.9714,
      "step": 7395
    },
    {
      "epoch": 0.8537148131056761,
      "grad_norm": 0.193160742521286,
      "learning_rate": 1.2757041285685011e-05,
      "loss": 0.9481,
      "step": 7400
    },
    {
      "epoch": 0.8542916474388556,
      "grad_norm": 0.19016680121421814,
      "learning_rate": 1.2658806381785926e-05,
      "loss": 0.9405,
      "step": 7405
    },
    {
      "epoch": 0.8548684817720351,
      "grad_norm": 0.19033581018447876,
      "learning_rate": 1.2560925604180673e-05,
      "loss": 0.9857,
      "step": 7410
    },
    {
      "epoch": 0.8554453161052146,
      "grad_norm": 0.17935208976268768,
      "learning_rate": 1.2463399349728488e-05,
      "loss": 0.9586,
      "step": 7415
    },
    {
      "epoch": 0.8560221504383941,
      "grad_norm": 0.18908259272575378,
      "learning_rate": 1.2366228013851156e-05,
      "loss": 0.9478,
      "step": 7420
    },
    {
      "epoch": 0.8565989847715736,
      "grad_norm": 0.18578480184078217,
      "learning_rate": 1.2269411990531421e-05,
      "loss": 0.9384,
      "step": 7425
    },
    {
      "epoch": 0.8571758191047532,
      "grad_norm": 0.18580469489097595,
      "learning_rate": 1.2172951672311427e-05,
      "loss": 0.9289,
      "step": 7430
    },
    {
      "epoch": 0.8577526534379326,
      "grad_norm": 0.20112477242946625,
      "learning_rate": 1.207684745029114e-05,
      "loss": 0.9331,
      "step": 7435
    },
    {
      "epoch": 0.8583294877711122,
      "grad_norm": 0.19187471270561218,
      "learning_rate": 1.1981099714126654e-05,
      "loss": 0.9518,
      "step": 7440
    },
    {
      "epoch": 0.8589063221042916,
      "grad_norm": 0.1830911487340927,
      "learning_rate": 1.1885708852028777e-05,
      "loss": 0.9235,
      "step": 7445
    },
    {
      "epoch": 0.8594831564374712,
      "grad_norm": 0.1817176192998886,
      "learning_rate": 1.1790675250761263e-05,
      "loss": 0.9221,
      "step": 7450
    },
    {
      "epoch": 0.8600599907706507,
      "grad_norm": 0.19020894169807434,
      "learning_rate": 1.1695999295639459e-05,
      "loss": 0.9953,
      "step": 7455
    },
    {
      "epoch": 0.8606368251038302,
      "grad_norm": 0.191711887717247,
      "learning_rate": 1.1601681370528484e-05,
      "loss": 0.9635,
      "step": 7460
    },
    {
      "epoch": 0.8612136594370097,
      "grad_norm": 0.19259481132030487,
      "learning_rate": 1.150772185784198e-05,
      "loss": 0.9135,
      "step": 7465
    },
    {
      "epoch": 0.8617904937701892,
      "grad_norm": 0.1911952793598175,
      "learning_rate": 1.1414121138540279e-05,
      "loss": 0.9416,
      "step": 7470
    },
    {
      "epoch": 0.8623673281033687,
      "grad_norm": 0.2062252312898636,
      "learning_rate": 1.1320879592129052e-05,
      "loss": 0.9167,
      "step": 7475
    },
    {
      "epoch": 0.8629441624365483,
      "grad_norm": 0.1907137632369995,
      "learning_rate": 1.1227997596657636e-05,
      "loss": 0.9375,
      "step": 7480
    },
    {
      "epoch": 0.8635209967697277,
      "grad_norm": 0.19318221509456635,
      "learning_rate": 1.1135475528717642e-05,
      "loss": 0.936,
      "step": 7485
    },
    {
      "epoch": 0.8640978311029073,
      "grad_norm": 0.18050383031368256,
      "learning_rate": 1.1043313763441277e-05,
      "loss": 0.9388,
      "step": 7490
    },
    {
      "epoch": 0.8646746654360867,
      "grad_norm": 0.18808601796627045,
      "learning_rate": 1.0951512674499898e-05,
      "loss": 0.9033,
      "step": 7495
    },
    {
      "epoch": 0.8652514997692663,
      "grad_norm": 0.18681325018405914,
      "learning_rate": 1.0860072634102569e-05,
      "loss": 0.9511,
      "step": 7500
    },
    {
      "epoch": 0.8658283341024458,
      "grad_norm": 0.18992042541503906,
      "learning_rate": 1.0768994012994371e-05,
      "loss": 0.9316,
      "step": 7505
    },
    {
      "epoch": 0.8664051684356253,
      "grad_norm": 0.19558371603488922,
      "learning_rate": 1.0678277180455109e-05,
      "loss": 0.9203,
      "step": 7510
    },
    {
      "epoch": 0.8669820027688048,
      "grad_norm": 0.18919962644577026,
      "learning_rate": 1.0587922504297642e-05,
      "loss": 0.939,
      "step": 7515
    },
    {
      "epoch": 0.8675588371019843,
      "grad_norm": 0.18517306447029114,
      "learning_rate": 1.049793035086647e-05,
      "loss": 0.9253,
      "step": 7520
    },
    {
      "epoch": 0.8681356714351638,
      "grad_norm": 0.1969662606716156,
      "learning_rate": 1.040830108503622e-05,
      "loss": 0.9644,
      "step": 7525
    },
    {
      "epoch": 0.8687125057683434,
      "grad_norm": 0.18453893065452576,
      "learning_rate": 1.031903507021027e-05,
      "loss": 0.9617,
      "step": 7530
    },
    {
      "epoch": 0.8692893401015228,
      "grad_norm": 0.18227744102478027,
      "learning_rate": 1.0230132668319082e-05,
      "loss": 0.9707,
      "step": 7535
    },
    {
      "epoch": 0.8698661744347024,
      "grad_norm": 0.19960415363311768,
      "learning_rate": 1.014159423981893e-05,
      "loss": 0.9484,
      "step": 7540
    },
    {
      "epoch": 0.8704430087678818,
      "grad_norm": 0.1987922042608261,
      "learning_rate": 1.0053420143690284e-05,
      "loss": 0.931,
      "step": 7545
    },
    {
      "epoch": 0.8710198431010614,
      "grad_norm": 0.1911085695028305,
      "learning_rate": 9.965610737436515e-06,
      "loss": 0.9723,
      "step": 7550
    },
    {
      "epoch": 0.8715966774342409,
      "grad_norm": 0.1827116310596466,
      "learning_rate": 9.87816637708221e-06,
      "loss": 0.9181,
      "step": 7555
    },
    {
      "epoch": 0.8721735117674204,
      "grad_norm": 0.19509339332580566,
      "learning_rate": 9.791087417172019e-06,
      "loss": 0.971,
      "step": 7560
    },
    {
      "epoch": 0.8727503461005999,
      "grad_norm": 0.18567214906215668,
      "learning_rate": 9.704374210768952e-06,
      "loss": 0.9254,
      "step": 7565
    },
    {
      "epoch": 0.8733271804337794,
      "grad_norm": 0.20758579671382904,
      "learning_rate": 9.618027109453176e-06,
      "loss": 0.9661,
      "step": 7570
    },
    {
      "epoch": 0.8739040147669589,
      "grad_norm": 0.20255382359027863,
      "learning_rate": 9.532046463320365e-06,
      "loss": 0.9802,
      "step": 7575
    },
    {
      "epoch": 0.8744808491001385,
      "grad_norm": 0.18845497071743011,
      "learning_rate": 9.446432620980517e-06,
      "loss": 0.94,
      "step": 7580
    },
    {
      "epoch": 0.8750576834333179,
      "grad_norm": 0.19557009637355804,
      "learning_rate": 9.361185929556282e-06,
      "loss": 0.9853,
      "step": 7585
    },
    {
      "epoch": 0.8756345177664975,
      "grad_norm": 0.1927575021982193,
      "learning_rate": 9.276306734681805e-06,
      "loss": 0.966,
      "step": 7590
    },
    {
      "epoch": 0.8762113520996769,
      "grad_norm": 0.18559756875038147,
      "learning_rate": 9.191795380501134e-06,
      "loss": 0.9768,
      "step": 7595
    },
    {
      "epoch": 0.8767881864328565,
      "grad_norm": 0.18745888769626617,
      "learning_rate": 9.107652209666973e-06,
      "loss": 0.9522,
      "step": 7600
    },
    {
      "epoch": 0.877365020766036,
      "grad_norm": 0.1900642067193985,
      "learning_rate": 9.023877563339134e-06,
      "loss": 0.8757,
      "step": 7605
    },
    {
      "epoch": 0.8779418550992155,
      "grad_norm": 0.1792660653591156,
      "learning_rate": 8.940471781183335e-06,
      "loss": 0.9486,
      "step": 7610
    },
    {
      "epoch": 0.878518689432395,
      "grad_norm": 0.19872300326824188,
      "learning_rate": 8.857435201369645e-06,
      "loss": 0.955,
      "step": 7615
    },
    {
      "epoch": 0.8790955237655745,
      "grad_norm": 0.2203512042760849,
      "learning_rate": 8.774768160571257e-06,
      "loss": 0.9289,
      "step": 7620
    },
    {
      "epoch": 0.879672358098754,
      "grad_norm": 0.19183123111724854,
      "learning_rate": 8.692470993962987e-06,
      "loss": 0.9636,
      "step": 7625
    },
    {
      "epoch": 0.8802491924319336,
      "grad_norm": 0.1879664957523346,
      "learning_rate": 8.610544035220103e-06,
      "loss": 0.9431,
      "step": 7630
    },
    {
      "epoch": 0.880826026765113,
      "grad_norm": 0.19013233482837677,
      "learning_rate": 8.528987616516748e-06,
      "loss": 0.8901,
      "step": 7635
    },
    {
      "epoch": 0.8814028610982926,
      "grad_norm": 0.18684031069278717,
      "learning_rate": 8.44780206852478e-06,
      "loss": 0.9107,
      "step": 7640
    },
    {
      "epoch": 0.881979695431472,
      "grad_norm": 0.24778002500534058,
      "learning_rate": 8.366987720412322e-06,
      "loss": 0.9398,
      "step": 7645
    },
    {
      "epoch": 0.8825565297646516,
      "grad_norm": 0.20799873769283295,
      "learning_rate": 8.286544899842441e-06,
      "loss": 0.9893,
      "step": 7650
    },
    {
      "epoch": 0.883133364097831,
      "grad_norm": 0.18839897215366364,
      "learning_rate": 8.206473932971903e-06,
      "loss": 1.0032,
      "step": 7655
    },
    {
      "epoch": 0.8837101984310106,
      "grad_norm": 0.1880505084991455,
      "learning_rate": 8.126775144449705e-06,
      "loss": 0.9634,
      "step": 7660
    },
    {
      "epoch": 0.8842870327641901,
      "grad_norm": 0.1813763827085495,
      "learning_rate": 8.04744885741593e-06,
      "loss": 0.9001,
      "step": 7665
    },
    {
      "epoch": 0.8848638670973696,
      "grad_norm": 0.18489223718643188,
      "learning_rate": 7.968495393500285e-06,
      "loss": 0.9576,
      "step": 7670
    },
    {
      "epoch": 0.8854407014305491,
      "grad_norm": 0.19136987626552582,
      "learning_rate": 7.889915072820874e-06,
      "loss": 0.9586,
      "step": 7675
    },
    {
      "epoch": 0.8860175357637287,
      "grad_norm": 0.18882869184017181,
      "learning_rate": 7.811708213982883e-06,
      "loss": 0.938,
      "step": 7680
    },
    {
      "epoch": 0.8865943700969081,
      "grad_norm": 0.19385488331317902,
      "learning_rate": 7.733875134077307e-06,
      "loss": 0.9481,
      "step": 7685
    },
    {
      "epoch": 0.8871712044300877,
      "grad_norm": 0.2002614289522171,
      "learning_rate": 7.656416148679612e-06,
      "loss": 0.9657,
      "step": 7690
    },
    {
      "epoch": 0.8877480387632672,
      "grad_norm": 0.19226016104221344,
      "learning_rate": 7.579331571848569e-06,
      "loss": 1.0032,
      "step": 7695
    },
    {
      "epoch": 0.8883248730964467,
      "grad_norm": 0.192755326628685,
      "learning_rate": 7.502621716124791e-06,
      "loss": 0.9508,
      "step": 7700
    },
    {
      "epoch": 0.8889017074296263,
      "grad_norm": 0.19249974191188812,
      "learning_rate": 7.4262868925296995e-06,
      "loss": 0.9289,
      "step": 7705
    },
    {
      "epoch": 0.8894785417628057,
      "grad_norm": 0.19001740217208862,
      "learning_rate": 7.35032741056404e-06,
      "loss": 0.9426,
      "step": 7710
    },
    {
      "epoch": 0.8900553760959853,
      "grad_norm": 0.20642143487930298,
      "learning_rate": 7.274743578206788e-06,
      "loss": 0.9962,
      "step": 7715
    },
    {
      "epoch": 0.8906322104291647,
      "grad_norm": 0.1881352961063385,
      "learning_rate": 7.199535701913806e-06,
      "loss": 0.92,
      "step": 7720
    },
    {
      "epoch": 0.8912090447623443,
      "grad_norm": 0.18823300302028656,
      "learning_rate": 7.124704086616684e-06,
      "loss": 0.9823,
      "step": 7725
    },
    {
      "epoch": 0.8917858790955238,
      "grad_norm": 0.19650129973888397,
      "learning_rate": 7.05024903572139e-06,
      "loss": 0.9415,
      "step": 7730
    },
    {
      "epoch": 0.8923627134287033,
      "grad_norm": 0.18661227822303772,
      "learning_rate": 6.976170851107178e-06,
      "loss": 0.986,
      "step": 7735
    },
    {
      "epoch": 0.8929395477618828,
      "grad_norm": 0.1886250525712967,
      "learning_rate": 6.902469833125236e-06,
      "loss": 0.9679,
      "step": 7740
    },
    {
      "epoch": 0.8935163820950623,
      "grad_norm": 0.1884276568889618,
      "learning_rate": 6.8291462805975535e-06,
      "loss": 0.9508,
      "step": 7745
    },
    {
      "epoch": 0.8940932164282418,
      "grad_norm": 0.18795393407344818,
      "learning_rate": 6.756200490815645e-06,
      "loss": 0.9148,
      "step": 7750
    },
    {
      "epoch": 0.8946700507614214,
      "grad_norm": 0.19203375279903412,
      "learning_rate": 6.683632759539449e-06,
      "loss": 0.9604,
      "step": 7755
    },
    {
      "epoch": 0.8952468850946008,
      "grad_norm": 0.19676725566387177,
      "learning_rate": 6.611443380995963e-06,
      "loss": 0.964,
      "step": 7760
    },
    {
      "epoch": 0.8958237194277804,
      "grad_norm": 0.17924650013446808,
      "learning_rate": 6.5396326478782465e-06,
      "loss": 0.8975,
      "step": 7765
    },
    {
      "epoch": 0.8964005537609598,
      "grad_norm": 0.18147966265678406,
      "learning_rate": 6.468200851344042e-06,
      "loss": 0.9611,
      "step": 7770
    },
    {
      "epoch": 0.8969773880941394,
      "grad_norm": 0.17754757404327393,
      "learning_rate": 6.397148281014798e-06,
      "loss": 0.9375,
      "step": 7775
    },
    {
      "epoch": 0.8975542224273189,
      "grad_norm": 0.192587211728096,
      "learning_rate": 6.326475224974249e-06,
      "loss": 0.9408,
      "step": 7780
    },
    {
      "epoch": 0.8981310567604984,
      "grad_norm": 0.1927655041217804,
      "learning_rate": 6.256181969767505e-06,
      "loss": 0.9238,
      "step": 7785
    },
    {
      "epoch": 0.8987078910936779,
      "grad_norm": 0.18646924197673798,
      "learning_rate": 6.186268800399675e-06,
      "loss": 0.9693,
      "step": 7790
    },
    {
      "epoch": 0.8992847254268574,
      "grad_norm": 0.18755947053432465,
      "learning_rate": 6.116736000334888e-06,
      "loss": 0.9422,
      "step": 7795
    },
    {
      "epoch": 0.8998615597600369,
      "grad_norm": 0.19362567365169525,
      "learning_rate": 6.047583851494965e-06,
      "loss": 0.9084,
      "step": 7800
    },
    {
      "epoch": 0.9004383940932165,
      "grad_norm": 0.1956445276737213,
      "learning_rate": 5.978812634258468e-06,
      "loss": 0.9196,
      "step": 7805
    },
    {
      "epoch": 0.9010152284263959,
      "grad_norm": 0.20970386266708374,
      "learning_rate": 5.910422627459411e-06,
      "loss": 0.968,
      "step": 7810
    },
    {
      "epoch": 0.9015920627595755,
      "grad_norm": 0.19140861928462982,
      "learning_rate": 5.842414108386151e-06,
      "loss": 0.9587,
      "step": 7815
    },
    {
      "epoch": 0.9021688970927549,
      "grad_norm": 0.18535487353801727,
      "learning_rate": 5.774787352780387e-06,
      "loss": 0.9869,
      "step": 7820
    },
    {
      "epoch": 0.9027457314259345,
      "grad_norm": 0.1941797137260437,
      "learning_rate": 5.707542634835883e-06,
      "loss": 0.983,
      "step": 7825
    },
    {
      "epoch": 0.903322565759114,
      "grad_norm": 0.1803128570318222,
      "learning_rate": 5.640680227197426e-06,
      "loss": 0.9323,
      "step": 7830
    },
    {
      "epoch": 0.9038994000922935,
      "grad_norm": 0.18415096402168274,
      "learning_rate": 5.574200400959773e-06,
      "loss": 0.9442,
      "step": 7835
    },
    {
      "epoch": 0.904476234425473,
      "grad_norm": 0.19649024307727814,
      "learning_rate": 5.5081034256664445e-06,
      "loss": 1.0359,
      "step": 7840
    },
    {
      "epoch": 0.9050530687586525,
      "grad_norm": 0.19169114530086517,
      "learning_rate": 5.442389569308703e-06,
      "loss": 0.9368,
      "step": 7845
    },
    {
      "epoch": 0.905629903091832,
      "grad_norm": 0.19056066870689392,
      "learning_rate": 5.377059098324455e-06,
      "loss": 0.9447,
      "step": 7850
    },
    {
      "epoch": 0.9062067374250116,
      "grad_norm": 0.18303756415843964,
      "learning_rate": 5.312112277597159e-06,
      "loss": 0.8908,
      "step": 7855
    },
    {
      "epoch": 0.906783571758191,
      "grad_norm": 0.20311231911182404,
      "learning_rate": 5.247549370454763e-06,
      "loss": 0.9742,
      "step": 7860
    },
    {
      "epoch": 0.9073604060913706,
      "grad_norm": 0.19544216990470886,
      "learning_rate": 5.183370638668616e-06,
      "loss": 1.0013,
      "step": 7865
    },
    {
      "epoch": 0.90793724042455,
      "grad_norm": 0.19310228526592255,
      "learning_rate": 5.119576342452459e-06,
      "loss": 0.9131,
      "step": 7870
    },
    {
      "epoch": 0.9085140747577296,
      "grad_norm": 0.2016146332025528,
      "learning_rate": 5.056166740461265e-06,
      "loss": 0.9889,
      "step": 7875
    },
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.25115054845809937,
      "learning_rate": 4.993142089790337e-06,
      "loss": 0.9626,
      "step": 7880
    },
    {
      "epoch": 0.9096677434240886,
      "grad_norm": 0.19337384402751923,
      "learning_rate": 4.9305026459741224e-06,
      "loss": 0.9716,
      "step": 7885
    },
    {
      "epoch": 0.9102445777572681,
      "grad_norm": 0.1815733164548874,
      "learning_rate": 4.8682486629852975e-06,
      "loss": 0.9293,
      "step": 7890
    },
    {
      "epoch": 0.9108214120904476,
      "grad_norm": 0.18898643553256989,
      "learning_rate": 4.8063803932336114e-06,
      "loss": 0.8889,
      "step": 7895
    },
    {
      "epoch": 0.9113982464236271,
      "grad_norm": 0.1844266951084137,
      "learning_rate": 4.74489808756502e-06,
      "loss": 0.9805,
      "step": 7900
    },
    {
      "epoch": 0.9119750807568067,
      "grad_norm": 0.18842408061027527,
      "learning_rate": 4.683801995260484e-06,
      "loss": 0.9662,
      "step": 7905
    },
    {
      "epoch": 0.9125519150899861,
      "grad_norm": 0.20098374783992767,
      "learning_rate": 4.623092364035153e-06,
      "loss": 0.9969,
      "step": 7910
    },
    {
      "epoch": 0.9131287494231657,
      "grad_norm": 0.19723452627658844,
      "learning_rate": 4.562769440037174e-06,
      "loss": 0.8958,
      "step": 7915
    },
    {
      "epoch": 0.9137055837563451,
      "grad_norm": 0.20587897300720215,
      "learning_rate": 4.502833467846857e-06,
      "loss": 1.036,
      "step": 7920
    },
    {
      "epoch": 0.9142824180895247,
      "grad_norm": 0.18344053626060486,
      "learning_rate": 4.443284690475558e-06,
      "loss": 0.9227,
      "step": 7925
    },
    {
      "epoch": 0.9148592524227042,
      "grad_norm": 0.19742338359355927,
      "learning_rate": 4.384123349364788e-06,
      "loss": 0.9817,
      "step": 7930
    },
    {
      "epoch": 0.9154360867558837,
      "grad_norm": 0.1915377378463745,
      "learning_rate": 4.32534968438516e-06,
      "loss": 0.9285,
      "step": 7935
    },
    {
      "epoch": 0.9160129210890632,
      "grad_norm": 0.20171992480754852,
      "learning_rate": 4.266963933835455e-06,
      "loss": 0.9628,
      "step": 7940
    },
    {
      "epoch": 0.9165897554222427,
      "grad_norm": 0.19588075578212738,
      "learning_rate": 4.208966334441633e-06,
      "loss": 0.944,
      "step": 7945
    },
    {
      "epoch": 0.9171665897554222,
      "grad_norm": 0.1999482959508896,
      "learning_rate": 4.151357121355947e-06,
      "loss": 0.9673,
      "step": 7950
    },
    {
      "epoch": 0.9177434240886018,
      "grad_norm": 0.1867521107196808,
      "learning_rate": 4.0941365281558454e-06,
      "loss": 0.9605,
      "step": 7955
    },
    {
      "epoch": 0.9183202584217812,
      "grad_norm": 0.19164207577705383,
      "learning_rate": 4.037304786843188e-06,
      "loss": 0.9841,
      "step": 7960
    },
    {
      "epoch": 0.9188970927549608,
      "grad_norm": 0.18731051683425903,
      "learning_rate": 3.980862127843199e-06,
      "loss": 0.9518,
      "step": 7965
    },
    {
      "epoch": 0.9194739270881402,
      "grad_norm": 0.18932907283306122,
      "learning_rate": 3.924808780003531e-06,
      "loss": 0.9197,
      "step": 7970
    },
    {
      "epoch": 0.9200507614213198,
      "grad_norm": 0.19089601933956146,
      "learning_rate": 3.86914497059343e-06,
      "loss": 0.916,
      "step": 7975
    },
    {
      "epoch": 0.9206275957544993,
      "grad_norm": 0.19187115132808685,
      "learning_rate": 3.813870925302698e-06,
      "loss": 0.9431,
      "step": 7980
    },
    {
      "epoch": 0.9212044300876788,
      "grad_norm": 0.20279189944267273,
      "learning_rate": 3.7589868682408434e-06,
      "loss": 0.9663,
      "step": 7985
    },
    {
      "epoch": 0.9217812644208583,
      "grad_norm": 0.18457676470279694,
      "learning_rate": 3.7044930219362063e-06,
      "loss": 0.9523,
      "step": 7990
    },
    {
      "epoch": 0.9223580987540378,
      "grad_norm": 0.18185338377952576,
      "learning_rate": 3.6503896073349587e-06,
      "loss": 0.8865,
      "step": 7995
    },
    {
      "epoch": 0.9229349330872173,
      "grad_norm": 0.19247382879257202,
      "learning_rate": 3.5966768438002507e-06,
      "loss": 0.9723,
      "step": 8000
    },
    {
      "epoch": 0.9235117674203969,
      "grad_norm": 0.1936521977186203,
      "learning_rate": 3.5433549491113884e-06,
      "loss": 0.9622,
      "step": 8005
    },
    {
      "epoch": 0.9240886017535763,
      "grad_norm": 0.19727076590061188,
      "learning_rate": 3.4904241394628557e-06,
      "loss": 0.921,
      "step": 8010
    },
    {
      "epoch": 0.9246654360867559,
      "grad_norm": 0.19335485994815826,
      "learning_rate": 3.4378846294634835e-06,
      "loss": 0.9722,
      "step": 8015
    },
    {
      "epoch": 0.9252422704199353,
      "grad_norm": 0.1931992471218109,
      "learning_rate": 3.3857366321355722e-06,
      "loss": 0.9835,
      "step": 8020
    },
    {
      "epoch": 0.9258191047531149,
      "grad_norm": 0.17889727652072906,
      "learning_rate": 3.3339803589140352e-06,
      "loss": 0.9528,
      "step": 8025
    },
    {
      "epoch": 0.9263959390862944,
      "grad_norm": 0.19378302991390228,
      "learning_rate": 3.2826160196455123e-06,
      "loss": 0.9556,
      "step": 8030
    },
    {
      "epoch": 0.9269727734194739,
      "grad_norm": 0.18681880831718445,
      "learning_rate": 3.23164382258756e-06,
      "loss": 0.9854,
      "step": 8035
    },
    {
      "epoch": 0.9275496077526535,
      "grad_norm": 0.19622889161109924,
      "learning_rate": 3.181063974407772e-06,
      "loss": 0.9782,
      "step": 8040
    },
    {
      "epoch": 0.928126442085833,
      "grad_norm": 0.17983347177505493,
      "learning_rate": 3.1308766801829926e-06,
      "loss": 0.968,
      "step": 8045
    },
    {
      "epoch": 0.9287032764190125,
      "grad_norm": 0.2013084590435028,
      "learning_rate": 3.081082143398395e-06,
      "loss": 0.9816,
      "step": 8050
    },
    {
      "epoch": 0.929280110752192,
      "grad_norm": 0.2396339625120163,
      "learning_rate": 3.0316805659467705e-06,
      "loss": 0.9845,
      "step": 8055
    },
    {
      "epoch": 0.9298569450853715,
      "grad_norm": 0.19670408964157104,
      "learning_rate": 2.9826721481276077e-06,
      "loss": 0.9738,
      "step": 8060
    },
    {
      "epoch": 0.930433779418551,
      "grad_norm": 0.19861535727977753,
      "learning_rate": 2.934057088646336e-06,
      "loss": 0.9276,
      "step": 8065
    },
    {
      "epoch": 0.9310106137517306,
      "grad_norm": 0.19099079072475433,
      "learning_rate": 2.8858355846134944e-06,
      "loss": 0.9658,
      "step": 8070
    },
    {
      "epoch": 0.93158744808491,
      "grad_norm": 0.18421880900859833,
      "learning_rate": 2.8380078315439653e-06,
      "loss": 0.945,
      "step": 8075
    },
    {
      "epoch": 0.9321642824180896,
      "grad_norm": 0.1922229677438736,
      "learning_rate": 2.790574023356163e-06,
      "loss": 0.9513,
      "step": 8080
    },
    {
      "epoch": 0.932741116751269,
      "grad_norm": 0.18163804709911346,
      "learning_rate": 2.7435343523712242e-06,
      "loss": 0.93,
      "step": 8085
    },
    {
      "epoch": 0.9333179510844486,
      "grad_norm": 0.2035042643547058,
      "learning_rate": 2.6968890093122754e-06,
      "loss": 0.955,
      "step": 8090
    },
    {
      "epoch": 0.933894785417628,
      "grad_norm": 0.19784539937973022,
      "learning_rate": 2.650638183303611e-06,
      "loss": 0.9324,
      "step": 8095
    },
    {
      "epoch": 0.9344716197508076,
      "grad_norm": 0.2163614183664322,
      "learning_rate": 2.6047820618699592e-06,
      "loss": 0.9646,
      "step": 8100
    },
    {
      "epoch": 0.9350484540839871,
      "grad_norm": 0.19160954654216766,
      "learning_rate": 2.5593208309357187e-06,
      "loss": 0.9506,
      "step": 8105
    },
    {
      "epoch": 0.9356252884171666,
      "grad_norm": 0.18938621878623962,
      "learning_rate": 2.514254674824168e-06,
      "loss": 0.9404,
      "step": 8110
    },
    {
      "epoch": 0.9362021227503461,
      "grad_norm": 0.20650902390480042,
      "learning_rate": 2.469583776256812e-06,
      "loss": 0.9374,
      "step": 8115
    },
    {
      "epoch": 0.9367789570835257,
      "grad_norm": 0.19179539382457733,
      "learning_rate": 2.4253083163525038e-06,
      "loss": 0.9592,
      "step": 8120
    },
    {
      "epoch": 0.9373557914167051,
      "grad_norm": 0.18857675790786743,
      "learning_rate": 2.3814284746268344e-06,
      "loss": 0.9976,
      "step": 8125
    },
    {
      "epoch": 0.9379326257498847,
      "grad_norm": 0.19117896258831024,
      "learning_rate": 2.3379444289913342e-06,
      "loss": 0.9419,
      "step": 8130
    },
    {
      "epoch": 0.9385094600830641,
      "grad_norm": 0.2242845743894577,
      "learning_rate": 2.2948563557527836e-06,
      "loss": 0.9697,
      "step": 8135
    },
    {
      "epoch": 0.9390862944162437,
      "grad_norm": 0.1854942888021469,
      "learning_rate": 2.2521644296124466e-06,
      "loss": 0.9324,
      "step": 8140
    },
    {
      "epoch": 0.9396631287494231,
      "grad_norm": 0.19978578388690948,
      "learning_rate": 2.209868823665473e-06,
      "loss": 0.9658,
      "step": 8145
    },
    {
      "epoch": 0.9402399630826027,
      "grad_norm": 0.19244587421417236,
      "learning_rate": 2.1679697094000638e-06,
      "loss": 0.9476,
      "step": 8150
    },
    {
      "epoch": 0.9408167974157822,
      "grad_norm": 0.18521623313426971,
      "learning_rate": 2.1264672566968736e-06,
      "loss": 0.9274,
      "step": 8155
    },
    {
      "epoch": 0.9413936317489617,
      "grad_norm": 0.18388035893440247,
      "learning_rate": 2.0853616338282644e-06,
      "loss": 0.9005,
      "step": 8160
    },
    {
      "epoch": 0.9419704660821412,
      "grad_norm": 0.2041049599647522,
      "learning_rate": 2.044653007457653e-06,
      "loss": 0.9389,
      "step": 8165
    },
    {
      "epoch": 0.9425473004153208,
      "grad_norm": 0.18562230467796326,
      "learning_rate": 2.0043415426388324e-06,
      "loss": 0.9862,
      "step": 8170
    },
    {
      "epoch": 0.9431241347485002,
      "grad_norm": 0.192937970161438,
      "learning_rate": 1.964427402815294e-06,
      "loss": 0.9155,
      "step": 8175
    },
    {
      "epoch": 0.9437009690816798,
      "grad_norm": 0.1922484189271927,
      "learning_rate": 1.924910749819586e-06,
      "loss": 0.9495,
      "step": 8180
    },
    {
      "epoch": 0.9442778034148592,
      "grad_norm": 0.170320063829422,
      "learning_rate": 1.8857917438725892e-06,
      "loss": 0.9415,
      "step": 8185
    },
    {
      "epoch": 0.9448546377480388,
      "grad_norm": 0.20970895886421204,
      "learning_rate": 1.8470705435829849e-06,
      "loss": 0.9352,
      "step": 8190
    },
    {
      "epoch": 0.9454314720812182,
      "grad_norm": 0.18411558866500854,
      "learning_rate": 1.8087473059464788e-06,
      "loss": 0.9427,
      "step": 8195
    },
    {
      "epoch": 0.9460083064143978,
      "grad_norm": 0.19260326027870178,
      "learning_rate": 1.770822186345289e-06,
      "loss": 0.9852,
      "step": 8200
    },
    {
      "epoch": 0.9465851407475773,
      "grad_norm": 0.18823884427547455,
      "learning_rate": 1.7332953385474027e-06,
      "loss": 0.9574,
      "step": 8205
    },
    {
      "epoch": 0.9471619750807568,
      "grad_norm": 0.20108766853809357,
      "learning_rate": 1.6961669147060765e-06,
      "loss": 0.9593,
      "step": 8210
    },
    {
      "epoch": 0.9477388094139363,
      "grad_norm": 0.18643486499786377,
      "learning_rate": 1.6594370653590706e-06,
      "loss": 0.9279,
      "step": 8215
    },
    {
      "epoch": 0.9483156437471159,
      "grad_norm": 0.19154737889766693,
      "learning_rate": 1.6231059394281934e-06,
      "loss": 0.9204,
      "step": 8220
    },
    {
      "epoch": 0.9488924780802953,
      "grad_norm": 0.20026449859142303,
      "learning_rate": 1.587173684218557e-06,
      "loss": 0.9065,
      "step": 8225
    },
    {
      "epoch": 0.9494693124134749,
      "grad_norm": 0.20944979786872864,
      "learning_rate": 1.55164044541809e-06,
      "loss": 0.9708,
      "step": 8230
    },
    {
      "epoch": 0.9500461467466543,
      "grad_norm": 0.21715082228183746,
      "learning_rate": 1.5165063670968926e-06,
      "loss": 0.9512,
      "step": 8235
    },
    {
      "epoch": 0.9506229810798339,
      "grad_norm": 0.19158127903938293,
      "learning_rate": 1.4817715917066488e-06,
      "loss": 0.9409,
      "step": 8240
    },
    {
      "epoch": 0.9511998154130133,
      "grad_norm": 0.18626423180103302,
      "learning_rate": 1.4474362600800706e-06,
      "loss": 0.9412,
      "step": 8245
    },
    {
      "epoch": 0.9517766497461929,
      "grad_norm": 0.1935131996870041,
      "learning_rate": 1.4135005114303435e-06,
      "loss": 0.9377,
      "step": 8250
    },
    {
      "epoch": 0.9523534840793724,
      "grad_norm": 0.19177401065826416,
      "learning_rate": 1.379964483350482e-06,
      "loss": 0.9414,
      "step": 8255
    },
    {
      "epoch": 0.9529303184125519,
      "grad_norm": 0.1947384476661682,
      "learning_rate": 1.3468283118128756e-06,
      "loss": 1.0007,
      "step": 8260
    },
    {
      "epoch": 0.9535071527457314,
      "grad_norm": 0.2043754905462265,
      "learning_rate": 1.314092131168665e-06,
      "loss": 0.9421,
      "step": 8265
    },
    {
      "epoch": 0.954083987078911,
      "grad_norm": 0.19373486936092377,
      "learning_rate": 1.2817560741472445e-06,
      "loss": 1.0349,
      "step": 8270
    },
    {
      "epoch": 0.9546608214120904,
      "grad_norm": 0.1884533017873764,
      "learning_rate": 1.2498202718556617e-06,
      "loss": 0.9743,
      "step": 8275
    },
    {
      "epoch": 0.95523765574527,
      "grad_norm": 0.20388375222682953,
      "learning_rate": 1.2182848537781622e-06,
      "loss": 0.9565,
      "step": 8280
    },
    {
      "epoch": 0.9558144900784494,
      "grad_norm": 0.18673211336135864,
      "learning_rate": 1.187149947775612e-06,
      "loss": 0.9801,
      "step": 8285
    },
    {
      "epoch": 0.956391324411629,
      "grad_norm": 0.19092413783073425,
      "learning_rate": 1.1564156800849879e-06,
      "loss": 0.9634,
      "step": 8290
    },
    {
      "epoch": 0.9569681587448085,
      "grad_norm": 0.19628967344760895,
      "learning_rate": 1.1260821753188987e-06,
      "loss": 0.9575,
      "step": 8295
    },
    {
      "epoch": 0.957544993077988,
      "grad_norm": 0.19361461699008942,
      "learning_rate": 1.0961495564650092e-06,
      "loss": 0.9055,
      "step": 8300
    },
    {
      "epoch": 0.9581218274111675,
      "grad_norm": 0.20304569602012634,
      "learning_rate": 1.0666179448856174e-06,
      "loss": 0.8844,
      "step": 8305
    },
    {
      "epoch": 0.958698661744347,
      "grad_norm": 0.19096963107585907,
      "learning_rate": 1.0374874603171326e-06,
      "loss": 0.9868,
      "step": 8310
    },
    {
      "epoch": 0.9592754960775265,
      "grad_norm": 0.1914781928062439,
      "learning_rate": 1.0087582208695768e-06,
      "loss": 0.8896,
      "step": 8315
    },
    {
      "epoch": 0.9598523304107061,
      "grad_norm": 0.19837866723537445,
      "learning_rate": 9.804303430261174e-07,
      "loss": 0.9526,
      "step": 8320
    },
    {
      "epoch": 0.9604291647438855,
      "grad_norm": 0.1940370500087738,
      "learning_rate": 9.525039416425907e-07,
      "loss": 0.947,
      "step": 8325
    },
    {
      "epoch": 0.9610059990770651,
      "grad_norm": 0.1854429841041565,
      "learning_rate": 9.249791299470567e-07,
      "loss": 0.9243,
      "step": 8330
    },
    {
      "epoch": 0.9615828334102445,
      "grad_norm": 0.20479361712932587,
      "learning_rate": 8.978560195393115e-07,
      "loss": 0.9391,
      "step": 8335
    },
    {
      "epoch": 0.9621596677434241,
      "grad_norm": 0.19264739751815796,
      "learning_rate": 8.711347203904541e-07,
      "loss": 0.9304,
      "step": 8340
    },
    {
      "epoch": 0.9627365020766036,
      "grad_norm": 0.18168406188488007,
      "learning_rate": 8.448153408424087e-07,
      "loss": 0.9627,
      "step": 8345
    },
    {
      "epoch": 0.9633133364097831,
      "grad_norm": 0.1960950791835785,
      "learning_rate": 8.188979876075475e-07,
      "loss": 0.9653,
      "step": 8350
    },
    {
      "epoch": 0.9638901707429626,
      "grad_norm": 0.22994817793369293,
      "learning_rate": 7.933827657682025e-07,
      "loss": 0.9642,
      "step": 8355
    },
    {
      "epoch": 0.9644670050761421,
      "grad_norm": 0.19266270101070404,
      "learning_rate": 7.682697787762317e-07,
      "loss": 0.9601,
      "step": 8360
    },
    {
      "epoch": 0.9650438394093216,
      "grad_norm": 0.20038749277591705,
      "learning_rate": 7.435591284526866e-07,
      "loss": 1.018,
      "step": 8365
    },
    {
      "epoch": 0.9656206737425012,
      "grad_norm": 0.18728072941303253,
      "learning_rate": 7.192509149872684e-07,
      "loss": 0.9477,
      "step": 8370
    },
    {
      "epoch": 0.9661975080756806,
      "grad_norm": 0.1958947330713272,
      "learning_rate": 6.953452369380497e-07,
      "loss": 0.9806,
      "step": 8375
    },
    {
      "epoch": 0.9667743424088602,
      "grad_norm": 0.1987629234790802,
      "learning_rate": 6.718421912309758e-07,
      "loss": 0.8928,
      "step": 8380
    },
    {
      "epoch": 0.9673511767420396,
      "grad_norm": 0.19647015631198883,
      "learning_rate": 6.487418731595418e-07,
      "loss": 0.9933,
      "step": 8385
    },
    {
      "epoch": 0.9679280110752192,
      "grad_norm": 0.19900114834308624,
      "learning_rate": 6.260443763843493e-07,
      "loss": 0.951,
      "step": 8390
    },
    {
      "epoch": 0.9685048454083988,
      "grad_norm": 0.19372044503688812,
      "learning_rate": 6.037497929327839e-07,
      "loss": 0.9665,
      "step": 8395
    },
    {
      "epoch": 0.9690816797415782,
      "grad_norm": 0.20044179260730743,
      "learning_rate": 5.818582131985939e-07,
      "loss": 0.9543,
      "step": 8400
    },
    {
      "epoch": 0.9696585140747578,
      "grad_norm": 0.19964653253555298,
      "learning_rate": 5.603697259415341e-07,
      "loss": 0.9529,
      "step": 8405
    },
    {
      "epoch": 0.9702353484079372,
      "grad_norm": 0.19442757964134216,
      "learning_rate": 5.392844182870449e-07,
      "loss": 0.987,
      "step": 8410
    },
    {
      "epoch": 0.9708121827411168,
      "grad_norm": 0.18775001168251038,
      "learning_rate": 5.186023757258407e-07,
      "loss": 0.9705,
      "step": 8415
    },
    {
      "epoch": 0.9713890170742963,
      "grad_norm": 0.19862832129001617,
      "learning_rate": 4.983236821135995e-07,
      "loss": 0.9409,
      "step": 8420
    },
    {
      "epoch": 0.9719658514074758,
      "grad_norm": 0.18501073122024536,
      "learning_rate": 4.784484196706073e-07,
      "loss": 0.9682,
      "step": 8425
    },
    {
      "epoch": 0.9725426857406553,
      "grad_norm": 0.19959832727909088,
      "learning_rate": 4.5897666898145896e-07,
      "loss": 0.9556,
      "step": 8430
    },
    {
      "epoch": 0.9731195200738348,
      "grad_norm": 0.18275192379951477,
      "learning_rate": 4.3990850899467975e-07,
      "loss": 0.9612,
      "step": 8435
    },
    {
      "epoch": 0.9736963544070143,
      "grad_norm": 0.18589888513088226,
      "learning_rate": 4.2124401702241524e-07,
      "loss": 0.947,
      "step": 8440
    },
    {
      "epoch": 0.9742731887401939,
      "grad_norm": 0.19903455674648285,
      "learning_rate": 4.029832687401758e-07,
      "loss": 0.9574,
      "step": 8445
    },
    {
      "epoch": 0.9748500230733733,
      "grad_norm": 0.1937977373600006,
      "learning_rate": 3.851263381864589e-07,
      "loss": 0.9699,
      "step": 8450
    },
    {
      "epoch": 0.9754268574065529,
      "grad_norm": 0.19150091707706451,
      "learning_rate": 3.67673297762483e-07,
      "loss": 0.9158,
      "step": 8455
    },
    {
      "epoch": 0.9760036917397323,
      "grad_norm": 0.1840011477470398,
      "learning_rate": 3.506242182318653e-07,
      "loss": 0.9732,
      "step": 8460
    },
    {
      "epoch": 0.9765805260729119,
      "grad_norm": 0.1933344304561615,
      "learning_rate": 3.339791687203997e-07,
      "loss": 0.9664,
      "step": 8465
    },
    {
      "epoch": 0.9771573604060914,
      "grad_norm": 0.18675784766674042,
      "learning_rate": 3.177382167156906e-07,
      "loss": 0.9303,
      "step": 8470
    },
    {
      "epoch": 0.9777341947392709,
      "grad_norm": 0.19082637131214142,
      "learning_rate": 3.019014280669641e-07,
      "loss": 0.9024,
      "step": 8475
    },
    {
      "epoch": 0.9783110290724504,
      "grad_norm": 0.2020220011472702,
      "learning_rate": 2.8646886698473484e-07,
      "loss": 0.9963,
      "step": 8480
    },
    {
      "epoch": 0.9788878634056299,
      "grad_norm": 0.20680001378059387,
      "learning_rate": 2.7144059604055085e-07,
      "loss": 0.9355,
      "step": 8485
    },
    {
      "epoch": 0.9794646977388094,
      "grad_norm": 0.1897319108247757,
      "learning_rate": 2.568166761668156e-07,
      "loss": 0.9195,
      "step": 8490
    },
    {
      "epoch": 0.980041532071989,
      "grad_norm": 0.19423364102840424,
      "learning_rate": 2.4259716665641083e-07,
      "loss": 0.9845,
      "step": 8495
    },
    {
      "epoch": 0.9806183664051684,
      "grad_norm": 0.19203795492649078,
      "learning_rate": 2.2878212516260766e-07,
      "loss": 0.9134,
      "step": 8500
    },
    {
      "epoch": 0.981195200738348,
      "grad_norm": 0.18443405628204346,
      "learning_rate": 2.1537160769870002e-07,
      "loss": 0.9501,
      "step": 8505
    },
    {
      "epoch": 0.9817720350715274,
      "grad_norm": 0.2106209546327591,
      "learning_rate": 2.0236566863784944e-07,
      "loss": 1.0056,
      "step": 8510
    },
    {
      "epoch": 0.982348869404707,
      "grad_norm": 0.19864287972450256,
      "learning_rate": 1.8976436071284076e-07,
      "loss": 0.9141,
      "step": 8515
    },
    {
      "epoch": 0.9829257037378865,
      "grad_norm": 0.19259801506996155,
      "learning_rate": 1.775677350159044e-07,
      "loss": 0.9578,
      "step": 8520
    },
    {
      "epoch": 0.983502538071066,
      "grad_norm": 0.1885799914598465,
      "learning_rate": 1.657758409984278e-07,
      "loss": 0.9788,
      "step": 8525
    },
    {
      "epoch": 0.9840793724042455,
      "grad_norm": 0.18557386100292206,
      "learning_rate": 1.5438872647086655e-07,
      "loss": 0.9251,
      "step": 8530
    },
    {
      "epoch": 0.984656206737425,
      "grad_norm": 0.1834561824798584,
      "learning_rate": 1.4340643760244464e-07,
      "loss": 0.9683,
      "step": 8535
    },
    {
      "epoch": 0.9852330410706045,
      "grad_norm": 0.2051221877336502,
      "learning_rate": 1.328290189210435e-07,
      "loss": 0.9747,
      "step": 8540
    },
    {
      "epoch": 0.9858098754037841,
      "grad_norm": 0.18745014071464539,
      "learning_rate": 1.2265651331296869e-07,
      "loss": 0.9445,
      "step": 8545
    },
    {
      "epoch": 0.9863867097369635,
      "grad_norm": 0.188162162899971,
      "learning_rate": 1.1288896202281685e-07,
      "loss": 0.9681,
      "step": 8550
    },
    {
      "epoch": 0.9869635440701431,
      "grad_norm": 0.1787605881690979,
      "learning_rate": 1.0352640465327578e-07,
      "loss": 0.9767,
      "step": 8555
    },
    {
      "epoch": 0.9875403784033225,
      "grad_norm": 0.19322550296783447,
      "learning_rate": 9.456887916499125e-08,
      "loss": 0.9264,
      "step": 8560
    },
    {
      "epoch": 0.9881172127365021,
      "grad_norm": 0.210786372423172,
      "learning_rate": 8.601642187640036e-08,
      "loss": 0.9254,
      "step": 8565
    },
    {
      "epoch": 0.9886940470696816,
      "grad_norm": 0.18473652005195618,
      "learning_rate": 7.786906746358735e-08,
      "loss": 0.9626,
      "step": 8570
    },
    {
      "epoch": 0.9892708814028611,
      "grad_norm": 0.21195201575756073,
      "learning_rate": 7.012684896011702e-08,
      "loss": 0.9211,
      "step": 8575
    },
    {
      "epoch": 0.9898477157360406,
      "grad_norm": 0.18607978522777557,
      "learning_rate": 6.278979775694582e-08,
      "loss": 0.9473,
      "step": 8580
    },
    {
      "epoch": 0.9904245500692201,
      "grad_norm": 0.194194957613945,
      "learning_rate": 5.585794360226659e-08,
      "loss": 0.9632,
      "step": 8585
    },
    {
      "epoch": 0.9910013844023996,
      "grad_norm": 0.199600949883461,
      "learning_rate": 4.9331314601408495e-08,
      "loss": 0.9848,
      "step": 8590
    },
    {
      "epoch": 0.9915782187355792,
      "grad_norm": 0.20218248665332794,
      "learning_rate": 4.320993721668165e-08,
      "loss": 0.9596,
      "step": 8595
    },
    {
      "epoch": 0.9921550530687586,
      "grad_norm": 0.19225312769412994,
      "learning_rate": 3.7493836267310514e-08,
      "loss": 0.9497,
      "step": 8600
    },
    {
      "epoch": 0.9927318874019382,
      "grad_norm": 0.18986690044403076,
      "learning_rate": 3.218303492932284e-08,
      "loss": 0.9513,
      "step": 8605
    },
    {
      "epoch": 0.9933087217351176,
      "grad_norm": 0.19648917019367218,
      "learning_rate": 2.7277554735449794e-08,
      "loss": 0.9769,
      "step": 8610
    },
    {
      "epoch": 0.9938855560682972,
      "grad_norm": 0.24255739152431488,
      "learning_rate": 2.2777415575037098e-08,
      "loss": 0.9355,
      "step": 8615
    },
    {
      "epoch": 0.9944623904014767,
      "grad_norm": 0.1974877566099167,
      "learning_rate": 1.8682635693978433e-08,
      "loss": 0.9339,
      "step": 8620
    },
    {
      "epoch": 0.9950392247346562,
      "grad_norm": 0.1958783119916916,
      "learning_rate": 1.499323169462663e-08,
      "loss": 0.9779,
      "step": 8625
    },
    {
      "epoch": 0.9956160590678357,
      "grad_norm": 0.18412433564662933,
      "learning_rate": 1.1709218535715938e-08,
      "loss": 0.9261,
      "step": 8630
    },
    {
      "epoch": 0.9961928934010152,
      "grad_norm": 0.18094801902770996,
      "learning_rate": 8.83060953235093e-09,
      "loss": 0.904,
      "step": 8635
    },
    {
      "epoch": 0.9967697277341947,
      "grad_norm": 0.19589665532112122,
      "learning_rate": 6.357416355884382e-09,
      "loss": 0.9594,
      "step": 8640
    },
    {
      "epoch": 0.9973465620673743,
      "grad_norm": 0.19667023420333862,
      "learning_rate": 4.289649033928367e-09,
      "loss": 1.0295,
      "step": 8645
    },
    {
      "epoch": 0.9979233964005537,
      "grad_norm": 0.1948215663433075,
      "learning_rate": 2.627315950265441e-09,
      "loss": 0.982,
      "step": 8650
    },
    {
      "epoch": 0.9985002307337333,
      "grad_norm": 0.18232625722885132,
      "learning_rate": 1.3704238448708496e-09,
      "loss": 0.959,
      "step": 8655
    },
    {
      "epoch": 0.9990770650669127,
      "grad_norm": 0.18606936931610107,
      "learning_rate": 5.189778138237067e-10,
      "loss": 0.9795,
      "step": 8660
    },
    {
      "epoch": 0.9996538994000923,
      "grad_norm": 0.20517615973949432,
      "learning_rate": 7.298130931809865e-11,
      "loss": 0.9515,
      "step": 8665
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.9548913240432739,
      "eval_runtime": 3071.7071,
      "eval_samples_per_second": 4.997,
      "eval_steps_per_second": 0.313,
      "step": 8668
    },
    {
      "epoch": 1.0,
      "step": 8668,
      "total_flos": 1.3416878770665554e+19,
      "train_loss": 0.9644491172051397,
      "train_runtime": 101367.857,
      "train_samples_per_second": 1.368,
      "train_steps_per_second": 0.086
    }
  ],
  "logging_steps": 5,
  "max_steps": 8668,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.3416878770665554e+19,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}