{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.999691643539932,
  "eval_steps": 500,
  "global_step": 14592,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00020557097337855896,
      "grad_norm": 6.1312150955200195,
      "learning_rate": 2.0547945205479452e-07,
      "loss": 1.5536,
      "step": 1
    },
    {
      "epoch": 0.0004111419467571179,
      "grad_norm": 6.5972065925598145,
      "learning_rate": 4.1095890410958903e-07,
      "loss": 1.5863,
      "step": 2
    },
    {
      "epoch": 0.0006167129201356768,
      "grad_norm": 5.653270244598389,
      "learning_rate": 6.164383561643835e-07,
      "loss": 1.491,
      "step": 3
    },
    {
      "epoch": 0.0008222838935142358,
      "grad_norm": 6.296363830566406,
      "learning_rate": 8.219178082191781e-07,
      "loss": 1.5794,
      "step": 4
    },
    {
      "epoch": 0.0010278548668927947,
      "grad_norm": 1.6855748891830444,
      "learning_rate": 1.0273972602739727e-06,
      "loss": 0.8016,
      "step": 5
    },
    {
      "epoch": 0.0012334258402713536,
      "grad_norm": 5.977898120880127,
      "learning_rate": 1.232876712328767e-06,
      "loss": 1.5352,
      "step": 6
    },
    {
      "epoch": 0.0014389968136499125,
      "grad_norm": 1.6171352863311768,
      "learning_rate": 1.4383561643835616e-06,
      "loss": 0.7893,
      "step": 7
    },
    {
      "epoch": 0.0016445677870284717,
      "grad_norm": 5.830033302307129,
      "learning_rate": 1.6438356164383561e-06,
      "loss": 1.5201,
      "step": 8
    },
    {
      "epoch": 0.0018501387604070306,
      "grad_norm": 5.7432990074157715,
      "learning_rate": 1.8493150684931507e-06,
      "loss": 1.5304,
      "step": 9
    },
    {
      "epoch": 0.0020557097337855893,
      "grad_norm": 5.845605373382568,
      "learning_rate": 2.0547945205479454e-06,
      "loss": 1.5429,
      "step": 10
    },
    {
      "epoch": 0.0022612807071641485,
      "grad_norm": 5.0305399894714355,
      "learning_rate": 2.2602739726027396e-06,
      "loss": 1.4662,
      "step": 11
    },
    {
      "epoch": 0.002466851680542707,
      "grad_norm": 5.128103256225586,
      "learning_rate": 2.465753424657534e-06,
      "loss": 1.4793,
      "step": 12
    },
    {
      "epoch": 0.0026724226539212663,
      "grad_norm": 4.975289344787598,
      "learning_rate": 2.6712328767123286e-06,
      "loss": 1.4448,
      "step": 13
    },
    {
      "epoch": 0.002877993627299825,
      "grad_norm": 4.8694987297058105,
      "learning_rate": 2.876712328767123e-06,
      "loss": 1.4351,
      "step": 14
    },
    {
      "epoch": 0.003083564600678384,
      "grad_norm": 1.9994945526123047,
      "learning_rate": 3.0821917808219177e-06,
      "loss": 0.7466,
      "step": 15
    },
    {
      "epoch": 0.0032891355740569434,
      "grad_norm": 5.214486598968506,
      "learning_rate": 3.2876712328767123e-06,
      "loss": 1.293,
      "step": 16
    },
    {
      "epoch": 0.003494706547435502,
      "grad_norm": 5.771518707275391,
      "learning_rate": 3.493150684931507e-06,
      "loss": 1.2649,
      "step": 17
    },
    {
      "epoch": 0.0037002775208140612,
      "grad_norm": 5.648902893066406,
      "learning_rate": 3.6986301369863014e-06,
      "loss": 1.2114,
      "step": 18
    },
    {
      "epoch": 0.00390584849419262,
      "grad_norm": 2.0775961875915527,
      "learning_rate": 3.904109589041096e-06,
      "loss": 1.1923,
      "step": 19
    },
    {
      "epoch": 0.004111419467571179,
      "grad_norm": 1.4989817142486572,
      "learning_rate": 4.109589041095891e-06,
      "loss": 1.1838,
      "step": 20
    },
    {
      "epoch": 0.004316990440949738,
      "grad_norm": 1.3304322957992554,
      "learning_rate": 4.315068493150685e-06,
      "loss": 1.1334,
      "step": 21
    },
    {
      "epoch": 0.004522561414328297,
      "grad_norm": 1.2907218933105469,
      "learning_rate": 4.520547945205479e-06,
      "loss": 1.126,
      "step": 22
    },
    {
      "epoch": 0.004728132387706856,
      "grad_norm": 1.3639134168624878,
      "learning_rate": 4.726027397260274e-06,
      "loss": 1.1328,
      "step": 23
    },
    {
      "epoch": 0.004933703361085414,
      "grad_norm": 1.2764439582824707,
      "learning_rate": 4.931506849315068e-06,
      "loss": 1.1774,
      "step": 24
    },
    {
      "epoch": 0.005139274334463974,
      "grad_norm": 1.016863465309143,
      "learning_rate": 5.136986301369863e-06,
      "loss": 0.7168,
      "step": 25
    },
    {
      "epoch": 0.005344845307842533,
      "grad_norm": 0.9651275277137756,
      "learning_rate": 5.342465753424657e-06,
      "loss": 0.7096,
      "step": 26
    },
    {
      "epoch": 0.005550416281221091,
      "grad_norm": 0.922505795955658,
      "learning_rate": 5.547945205479452e-06,
      "loss": 1.0915,
      "step": 27
    },
    {
      "epoch": 0.00575598725459965,
      "grad_norm": 1.0005972385406494,
      "learning_rate": 5.753424657534246e-06,
      "loss": 1.1167,
      "step": 28
    },
    {
      "epoch": 0.00596155822797821,
      "grad_norm": 1.006510615348816,
      "learning_rate": 5.958904109589041e-06,
      "loss": 1.1025,
      "step": 29
    },
    {
      "epoch": 0.006167129201356768,
      "grad_norm": 1.069066047668457,
      "learning_rate": 6.1643835616438354e-06,
      "loss": 1.0833,
      "step": 30
    },
    {
      "epoch": 0.006372700174735327,
      "grad_norm": 1.1197434663772583,
      "learning_rate": 6.36986301369863e-06,
      "loss": 0.7086,
      "step": 31
    },
    {
      "epoch": 0.006578271148113887,
      "grad_norm": 1.1849225759506226,
      "learning_rate": 6.5753424657534245e-06,
      "loss": 0.7198,
      "step": 32
    },
    {
      "epoch": 0.0067838421214924454,
      "grad_norm": 1.0908714532852173,
      "learning_rate": 6.7808219178082195e-06,
      "loss": 1.0882,
      "step": 33
    },
    {
      "epoch": 0.006989413094871004,
      "grad_norm": 1.1033886671066284,
      "learning_rate": 6.986301369863014e-06,
      "loss": 1.0619,
      "step": 34
    },
    {
      "epoch": 0.007194984068249563,
      "grad_norm": 0.9067010283470154,
      "learning_rate": 7.191780821917809e-06,
      "loss": 1.0383,
      "step": 35
    },
    {
      "epoch": 0.0074005550416281225,
      "grad_norm": 0.7680827379226685,
      "learning_rate": 7.397260273972603e-06,
      "loss": 1.0172,
      "step": 36
    },
    {
      "epoch": 0.007606126015006681,
      "grad_norm": 0.6832679510116577,
      "learning_rate": 7.602739726027398e-06,
      "loss": 1.0656,
      "step": 37
    },
    {
      "epoch": 0.00781169698838524,
      "grad_norm": 0.631285548210144,
      "learning_rate": 7.808219178082192e-06,
      "loss": 1.0222,
      "step": 38
    },
    {
      "epoch": 0.008017267961763799,
      "grad_norm": 0.6489036083221436,
      "learning_rate": 8.013698630136987e-06,
      "loss": 1.0526,
      "step": 39
    },
    {
      "epoch": 0.008222838935142357,
      "grad_norm": 0.7755447626113892,
      "learning_rate": 8.219178082191782e-06,
      "loss": 0.7246,
      "step": 40
    },
    {
      "epoch": 0.008428409908520916,
      "grad_norm": 0.7579307556152344,
      "learning_rate": 8.424657534246575e-06,
      "loss": 1.0303,
      "step": 41
    },
    {
      "epoch": 0.008633980881899476,
      "grad_norm": 0.746900200843811,
      "learning_rate": 8.63013698630137e-06,
      "loss": 1.0118,
      "step": 42
    },
    {
      "epoch": 0.008839551855278035,
      "grad_norm": 0.6753754615783691,
      "learning_rate": 8.835616438356165e-06,
      "loss": 1.0531,
      "step": 43
    },
    {
      "epoch": 0.009045122828656594,
      "grad_norm": 0.6792585253715515,
      "learning_rate": 9.041095890410958e-06,
      "loss": 1.0317,
      "step": 44
    },
    {
      "epoch": 0.009250693802035153,
      "grad_norm": 0.6036022305488586,
      "learning_rate": 9.246575342465753e-06,
      "loss": 1.0008,
      "step": 45
    },
    {
      "epoch": 0.009456264775413711,
      "grad_norm": 0.5249003767967224,
      "learning_rate": 9.452054794520548e-06,
      "loss": 1.0103,
      "step": 46
    },
    {
      "epoch": 0.00966183574879227,
      "grad_norm": 0.48237892985343933,
      "learning_rate": 9.657534246575343e-06,
      "loss": 1.0129,
      "step": 47
    },
    {
      "epoch": 0.009867406722170829,
      "grad_norm": 0.4669821858406067,
      "learning_rate": 9.863013698630136e-06,
      "loss": 0.6748,
      "step": 48
    },
    {
      "epoch": 0.01007297769554939,
      "grad_norm": 0.7257899045944214,
      "learning_rate": 1.0068493150684931e-05,
      "loss": 1.0394,
      "step": 49
    },
    {
      "epoch": 0.010278548668927948,
      "grad_norm": 0.5101274847984314,
      "learning_rate": 1.0273972602739726e-05,
      "loss": 0.9956,
      "step": 50
    },
    {
      "epoch": 0.010484119642306507,
      "grad_norm": 0.4904460906982422,
      "learning_rate": 1.0479452054794521e-05,
      "loss": 1.0081,
      "step": 51
    },
    {
      "epoch": 0.010689690615685065,
      "grad_norm": 0.49294978380203247,
      "learning_rate": 1.0684931506849315e-05,
      "loss": 0.9707,
      "step": 52
    },
    {
      "epoch": 0.010895261589063624,
      "grad_norm": 0.5110352039337158,
      "learning_rate": 1.089041095890411e-05,
      "loss": 0.9684,
      "step": 53
    },
    {
      "epoch": 0.011100832562442183,
      "grad_norm": 0.44021663069725037,
      "learning_rate": 1.1095890410958904e-05,
      "loss": 0.9872,
      "step": 54
    },
    {
      "epoch": 0.011306403535820742,
      "grad_norm": 0.5229463577270508,
      "learning_rate": 1.13013698630137e-05,
      "loss": 0.9821,
      "step": 55
    },
    {
      "epoch": 0.0115119745091993,
      "grad_norm": 0.4633481502532959,
      "learning_rate": 1.1506849315068493e-05,
      "loss": 0.9858,
      "step": 56
    },
    {
      "epoch": 0.01171754548257786,
      "grad_norm": 0.43951645493507385,
      "learning_rate": 1.1712328767123288e-05,
      "loss": 0.9608,
      "step": 57
    },
    {
      "epoch": 0.01192311645595642,
      "grad_norm": 0.46415814757347107,
      "learning_rate": 1.1917808219178083e-05,
      "loss": 0.9831,
      "step": 58
    },
    {
      "epoch": 0.012128687429334978,
      "grad_norm": 0.35238775610923767,
      "learning_rate": 1.2123287671232878e-05,
      "loss": 0.671,
      "step": 59
    },
    {
      "epoch": 0.012334258402713537,
      "grad_norm": 0.4979459047317505,
      "learning_rate": 1.2328767123287671e-05,
      "loss": 0.9634,
      "step": 60
    },
    {
      "epoch": 0.012539829376092096,
      "grad_norm": 0.40928781032562256,
      "learning_rate": 1.2534246575342466e-05,
      "loss": 0.9618,
      "step": 61
    },
    {
      "epoch": 0.012745400349470654,
      "grad_norm": 0.35449472069740295,
      "learning_rate": 1.273972602739726e-05,
      "loss": 0.6745,
      "step": 62
    },
    {
      "epoch": 0.012950971322849213,
      "grad_norm": 0.5600117444992065,
      "learning_rate": 1.2945205479452054e-05,
      "loss": 0.9651,
      "step": 63
    },
    {
      "epoch": 0.013156542296227773,
      "grad_norm": 0.4429936110973358,
      "learning_rate": 1.3150684931506849e-05,
      "loss": 0.9478,
      "step": 64
    },
    {
      "epoch": 0.013362113269606332,
      "grad_norm": 0.47870925068855286,
      "learning_rate": 1.3356164383561644e-05,
      "loss": 0.9631,
      "step": 65
    },
    {
      "epoch": 0.013567684242984891,
      "grad_norm": 0.4984883964061737,
      "learning_rate": 1.3561643835616439e-05,
      "loss": 0.9612,
      "step": 66
    },
    {
      "epoch": 0.01377325521636345,
      "grad_norm": 0.43905192613601685,
      "learning_rate": 1.3767123287671232e-05,
      "loss": 0.9495,
      "step": 67
    },
    {
      "epoch": 0.013978826189742008,
      "grad_norm": 0.4528709650039673,
      "learning_rate": 1.3972602739726027e-05,
      "loss": 0.9597,
      "step": 68
    },
    {
      "epoch": 0.014184397163120567,
      "grad_norm": 0.2834670841693878,
      "learning_rate": 1.4178082191780822e-05,
      "loss": 0.6768,
      "step": 69
    },
    {
      "epoch": 0.014389968136499126,
      "grad_norm": 0.736508846282959,
      "learning_rate": 1.4383561643835617e-05,
      "loss": 0.9616,
      "step": 70
    },
    {
      "epoch": 0.014595539109877684,
      "grad_norm": 0.2635529935359955,
      "learning_rate": 1.458904109589041e-05,
      "loss": 0.6671,
      "step": 71
    },
    {
      "epoch": 0.014801110083256245,
      "grad_norm": 0.5397729873657227,
      "learning_rate": 1.4794520547945205e-05,
      "loss": 0.9488,
      "step": 72
    },
    {
      "epoch": 0.015006681056634804,
      "grad_norm": 0.23914408683776855,
      "learning_rate": 1.5e-05,
      "loss": 0.6537,
      "step": 73
    },
    {
      "epoch": 0.015212252030013362,
      "grad_norm": 0.6451640129089355,
      "learning_rate": 1.5205479452054795e-05,
      "loss": 0.954,
      "step": 74
    },
    {
      "epoch": 0.015417823003391921,
      "grad_norm": 0.37705564498901367,
      "learning_rate": 1.541095890410959e-05,
      "loss": 0.9367,
      "step": 75
    },
    {
      "epoch": 0.01562339397677048,
      "grad_norm": 0.5562038421630859,
      "learning_rate": 1.5616438356164384e-05,
      "loss": 0.9374,
      "step": 76
    },
    {
      "epoch": 0.01582896495014904,
      "grad_norm": 0.2332352101802826,
      "learning_rate": 1.582191780821918e-05,
      "loss": 0.6542,
      "step": 77
    },
    {
      "epoch": 0.016034535923527597,
      "grad_norm": 0.5999805331230164,
      "learning_rate": 1.6027397260273974e-05,
      "loss": 0.9342,
      "step": 78
    },
    {
      "epoch": 0.016240106896906158,
      "grad_norm": 0.3581260144710541,
      "learning_rate": 1.623287671232877e-05,
      "loss": 0.9542,
      "step": 79
    },
    {
      "epoch": 0.016445677870284715,
      "grad_norm": 0.5643858909606934,
      "learning_rate": 1.6438356164383563e-05,
      "loss": 0.9312,
      "step": 80
    },
    {
      "epoch": 0.016651248843663275,
      "grad_norm": 0.5196654200553894,
      "learning_rate": 1.6643835616438355e-05,
      "loss": 0.9256,
      "step": 81
    },
    {
      "epoch": 0.016856819817041832,
      "grad_norm": 0.37860536575317383,
      "learning_rate": 1.684931506849315e-05,
      "loss": 0.9139,
      "step": 82
    },
    {
      "epoch": 0.017062390790420393,
      "grad_norm": 0.6562532186508179,
      "learning_rate": 1.7054794520547945e-05,
      "loss": 0.8984,
      "step": 83
    },
    {
      "epoch": 0.017267961763798953,
      "grad_norm": 0.4133750796318054,
      "learning_rate": 1.726027397260274e-05,
      "loss": 0.905,
      "step": 84
    },
    {
      "epoch": 0.01747353273717751,
      "grad_norm": 0.38232654333114624,
      "learning_rate": 1.7465753424657535e-05,
      "loss": 0.9202,
      "step": 85
    },
    {
      "epoch": 0.01767910371055607,
      "grad_norm": 0.5049018859863281,
      "learning_rate": 1.767123287671233e-05,
      "loss": 0.9235,
      "step": 86
    },
    {
      "epoch": 0.017884674683934627,
      "grad_norm": 0.4014778137207031,
      "learning_rate": 1.7876712328767125e-05,
      "loss": 0.9272,
      "step": 87
    },
    {
      "epoch": 0.018090245657313188,
      "grad_norm": 0.45734459161758423,
      "learning_rate": 1.8082191780821916e-05,
      "loss": 0.9312,
      "step": 88
    },
    {
      "epoch": 0.018295816630691745,
      "grad_norm": 0.46464303135871887,
      "learning_rate": 1.828767123287671e-05,
      "loss": 0.9394,
      "step": 89
    },
    {
      "epoch": 0.018501387604070305,
      "grad_norm": 0.39655131101608276,
      "learning_rate": 1.8493150684931506e-05,
      "loss": 0.9133,
      "step": 90
    },
    {
      "epoch": 0.018706958577448866,
      "grad_norm": 0.36367830634117126,
      "learning_rate": 1.86986301369863e-05,
      "loss": 0.9085,
      "step": 91
    },
    {
      "epoch": 0.018912529550827423,
      "grad_norm": 0.4867264926433563,
      "learning_rate": 1.8904109589041096e-05,
      "loss": 0.8848,
      "step": 92
    },
    {
      "epoch": 0.019118100524205983,
      "grad_norm": 0.3669883906841278,
      "learning_rate": 1.910958904109589e-05,
      "loss": 0.8986,
      "step": 93
    },
    {
      "epoch": 0.01932367149758454,
      "grad_norm": 0.4508739411830902,
      "learning_rate": 1.9315068493150686e-05,
      "loss": 0.9478,
      "step": 94
    },
    {
      "epoch": 0.0195292424709631,
      "grad_norm": 0.4065166711807251,
      "learning_rate": 1.952054794520548e-05,
      "loss": 0.9318,
      "step": 95
    },
    {
      "epoch": 0.019734813444341658,
      "grad_norm": 0.21278417110443115,
      "learning_rate": 1.9726027397260273e-05,
      "loss": 0.6272,
      "step": 96
    },
    {
      "epoch": 0.019940384417720218,
      "grad_norm": 0.5677651762962341,
      "learning_rate": 1.9931506849315068e-05,
      "loss": 0.9209,
      "step": 97
    },
    {
      "epoch": 0.02014595539109878,
      "grad_norm": 0.4079231023788452,
      "learning_rate": 2.0136986301369863e-05,
      "loss": 0.9189,
      "step": 98
    },
    {
      "epoch": 0.020351526364477335,
      "grad_norm": 0.3942011892795563,
      "learning_rate": 2.0342465753424658e-05,
      "loss": 0.8827,
      "step": 99
    },
    {
      "epoch": 0.020557097337855896,
      "grad_norm": 0.5771577954292297,
      "learning_rate": 2.0547945205479453e-05,
      "loss": 0.885,
      "step": 100
    },
    {
      "epoch": 0.020762668311234453,
      "grad_norm": 0.35876256227493286,
      "learning_rate": 2.0753424657534248e-05,
      "loss": 0.867,
      "step": 101
    },
    {
      "epoch": 0.020968239284613013,
      "grad_norm": 0.47500577569007874,
      "learning_rate": 2.0958904109589043e-05,
      "loss": 0.8921,
      "step": 102
    },
    {
      "epoch": 0.02117381025799157,
      "grad_norm": 0.4215965270996094,
      "learning_rate": 2.1164383561643834e-05,
      "loss": 0.883,
      "step": 103
    },
    {
      "epoch": 0.02137938123137013,
      "grad_norm": 0.41377994418144226,
      "learning_rate": 2.136986301369863e-05,
      "loss": 0.9116,
      "step": 104
    },
    {
      "epoch": 0.021584952204748688,
      "grad_norm": 0.4422590434551239,
      "learning_rate": 2.1575342465753424e-05,
      "loss": 0.9215,
      "step": 105
    },
    {
      "epoch": 0.021790523178127248,
      "grad_norm": 0.39756667613983154,
      "learning_rate": 2.178082191780822e-05,
      "loss": 0.8749,
      "step": 106
    },
    {
      "epoch": 0.02199609415150581,
      "grad_norm": 0.3924627900123596,
      "learning_rate": 2.1986301369863014e-05,
      "loss": 0.9013,
      "step": 107
    },
    {
      "epoch": 0.022201665124884366,
      "grad_norm": 0.4422127306461334,
      "learning_rate": 2.219178082191781e-05,
      "loss": 0.8741,
      "step": 108
    },
    {
      "epoch": 0.022407236098262926,
      "grad_norm": 0.37621861696243286,
      "learning_rate": 2.2397260273972604e-05,
      "loss": 0.8726,
      "step": 109
    },
    {
      "epoch": 0.022612807071641483,
      "grad_norm": 0.38060134649276733,
      "learning_rate": 2.26027397260274e-05,
      "loss": 0.8584,
      "step": 110
    },
    {
      "epoch": 0.022818378045020044,
      "grad_norm": 0.2121458202600479,
      "learning_rate": 2.2808219178082194e-05,
      "loss": 0.6438,
      "step": 111
    },
    {
      "epoch": 0.0230239490183986,
      "grad_norm": 0.5301511883735657,
      "learning_rate": 2.3013698630136985e-05,
      "loss": 0.894,
      "step": 112
    },
    {
      "epoch": 0.02322951999177716,
      "grad_norm": 0.3643994629383087,
      "learning_rate": 2.3219178082191784e-05,
      "loss": 0.8608,
      "step": 113
    },
    {
      "epoch": 0.02343509096515572,
      "grad_norm": 0.4830370843410492,
      "learning_rate": 2.3424657534246575e-05,
      "loss": 0.9062,
      "step": 114
    },
    {
      "epoch": 0.02364066193853428,
      "grad_norm": 0.384884774684906,
      "learning_rate": 2.3630136986301374e-05,
      "loss": 0.8855,
      "step": 115
    },
    {
      "epoch": 0.02384623291191284,
      "grad_norm": 0.3976382315158844,
      "learning_rate": 2.3835616438356165e-05,
      "loss": 0.8806,
      "step": 116
    },
    {
      "epoch": 0.024051803885291396,
      "grad_norm": 0.1835232675075531,
      "learning_rate": 2.404109589041096e-05,
      "loss": 0.611,
      "step": 117
    },
    {
      "epoch": 0.024257374858669956,
      "grad_norm": 0.5072860717773438,
      "learning_rate": 2.4246575342465755e-05,
      "loss": 0.9086,
      "step": 118
    },
    {
      "epoch": 0.024462945832048513,
      "grad_norm": 0.3984593152999878,
      "learning_rate": 2.445205479452055e-05,
      "loss": 0.8694,
      "step": 119
    },
    {
      "epoch": 0.024668516805427074,
      "grad_norm": 0.4669335186481476,
      "learning_rate": 2.4657534246575342e-05,
      "loss": 0.8798,
      "step": 120
    },
    {
      "epoch": 0.024874087778805634,
      "grad_norm": 0.4184141159057617,
      "learning_rate": 2.486301369863014e-05,
      "loss": 0.8805,
      "step": 121
    },
    {
      "epoch": 0.02507965875218419,
      "grad_norm": 0.4648849070072174,
      "learning_rate": 2.5068493150684932e-05,
      "loss": 0.8941,
      "step": 122
    },
    {
      "epoch": 0.02528522972556275,
      "grad_norm": 0.503567636013031,
      "learning_rate": 2.527397260273973e-05,
      "loss": 0.9006,
      "step": 123
    },
    {
      "epoch": 0.02549080069894131,
      "grad_norm": 0.4252830445766449,
      "learning_rate": 2.547945205479452e-05,
      "loss": 0.8887,
      "step": 124
    },
    {
      "epoch": 0.02569637167231987,
      "grad_norm": 0.4380176067352295,
      "learning_rate": 2.5684931506849317e-05,
      "loss": 0.8662,
      "step": 125
    },
    {
      "epoch": 0.025901942645698426,
      "grad_norm": 0.3882461488246918,
      "learning_rate": 2.5890410958904108e-05,
      "loss": 0.8969,
      "step": 126
    },
    {
      "epoch": 0.026107513619076986,
      "grad_norm": 0.43722933530807495,
      "learning_rate": 2.6095890410958907e-05,
      "loss": 0.8589,
      "step": 127
    },
    {
      "epoch": 0.026313084592455547,
      "grad_norm": 0.46026188135147095,
      "learning_rate": 2.6301369863013698e-05,
      "loss": 0.8831,
      "step": 128
    },
    {
      "epoch": 0.026518655565834104,
      "grad_norm": 0.36106160283088684,
      "learning_rate": 2.6506849315068496e-05,
      "loss": 0.8433,
      "step": 129
    },
    {
      "epoch": 0.026724226539212664,
      "grad_norm": 0.19909483194351196,
      "learning_rate": 2.6712328767123288e-05,
      "loss": 0.6199,
      "step": 130
    },
    {
      "epoch": 0.02692979751259122,
      "grad_norm": 0.5032296180725098,
      "learning_rate": 2.6917808219178086e-05,
      "loss": 0.9036,
      "step": 131
    },
    {
      "epoch": 0.027135368485969782,
      "grad_norm": 0.40603938698768616,
      "learning_rate": 2.7123287671232878e-05,
      "loss": 0.8892,
      "step": 132
    },
    {
      "epoch": 0.02734093945934834,
      "grad_norm": 0.43442800641059875,
      "learning_rate": 2.7328767123287673e-05,
      "loss": 0.8975,
      "step": 133
    },
    {
      "epoch": 0.0275465104327269,
      "grad_norm": 0.442852258682251,
      "learning_rate": 2.7534246575342465e-05,
      "loss": 0.8509,
      "step": 134
    },
    {
      "epoch": 0.027752081406105456,
      "grad_norm": 0.4811699688434601,
      "learning_rate": 2.7739726027397263e-05,
      "loss": 0.8496,
      "step": 135
    },
    {
      "epoch": 0.027957652379484017,
      "grad_norm": 0.38817986845970154,
      "learning_rate": 2.7945205479452054e-05,
      "loss": 0.8383,
      "step": 136
    },
    {
      "epoch": 0.028163223352862577,
      "grad_norm": 0.41808751225471497,
      "learning_rate": 2.8150684931506853e-05,
      "loss": 0.8662,
      "step": 137
    },
    {
      "epoch": 0.028368794326241134,
      "grad_norm": 0.49768969416618347,
      "learning_rate": 2.8356164383561644e-05,
      "loss": 0.8526,
      "step": 138
    },
    {
      "epoch": 0.028574365299619695,
      "grad_norm": 0.3861895203590393,
      "learning_rate": 2.856164383561644e-05,
      "loss": 0.8454,
      "step": 139
    },
    {
      "epoch": 0.02877993627299825,
      "grad_norm": 0.4545285999774933,
      "learning_rate": 2.8767123287671234e-05,
      "loss": 0.8717,
      "step": 140
    },
    {
      "epoch": 0.028985507246376812,
      "grad_norm": 0.20150704681873322,
      "learning_rate": 2.897260273972603e-05,
      "loss": 0.6377,
      "step": 141
    },
    {
      "epoch": 0.02919107821975537,
      "grad_norm": 0.42400142550468445,
      "learning_rate": 2.917808219178082e-05,
      "loss": 0.8583,
      "step": 142
    },
    {
      "epoch": 0.02939664919313393,
      "grad_norm": 0.3788576126098633,
      "learning_rate": 2.938356164383562e-05,
      "loss": 0.8476,
      "step": 143
    },
    {
      "epoch": 0.02960222016651249,
      "grad_norm": 0.17580586671829224,
      "learning_rate": 2.958904109589041e-05,
      "loss": 0.6334,
      "step": 144
    },
    {
      "epoch": 0.029807791139891047,
      "grad_norm": 0.17598563432693481,
      "learning_rate": 2.979452054794521e-05,
      "loss": 0.6251,
      "step": 145
    },
    {
      "epoch": 0.030013362113269607,
      "grad_norm": 0.7843010425567627,
      "learning_rate": 3e-05,
      "loss": 0.8495,
      "step": 146
    },
    {
      "epoch": 0.030218933086648164,
      "grad_norm": 0.478127121925354,
      "learning_rate": 3.0205479452054796e-05,
      "loss": 0.8733,
      "step": 147
    },
    {
      "epoch": 0.030424504060026725,
      "grad_norm": 0.6210460066795349,
      "learning_rate": 3.041095890410959e-05,
      "loss": 0.8513,
      "step": 148
    },
    {
      "epoch": 0.03063007503340528,
      "grad_norm": 0.5364311337471008,
      "learning_rate": 3.061643835616439e-05,
      "loss": 0.8532,
      "step": 149
    },
    {
      "epoch": 0.030835646006783842,
      "grad_norm": 0.5108141899108887,
      "learning_rate": 3.082191780821918e-05,
      "loss": 0.852,
      "step": 150
    },
    {
      "epoch": 0.031041216980162403,
      "grad_norm": 0.4817136228084564,
      "learning_rate": 3.102739726027397e-05,
      "loss": 0.8431,
      "step": 151
    },
    {
      "epoch": 0.03124678795354096,
      "grad_norm": 0.5212568044662476,
      "learning_rate": 3.123287671232877e-05,
      "loss": 0.8591,
      "step": 152
    },
    {
      "epoch": 0.03145235892691952,
      "grad_norm": 0.4288831949234009,
      "learning_rate": 3.143835616438356e-05,
      "loss": 0.8614,
      "step": 153
    },
    {
      "epoch": 0.03165792990029808,
      "grad_norm": 0.1943136751651764,
      "learning_rate": 3.164383561643836e-05,
      "loss": 0.6347,
      "step": 154
    },
    {
      "epoch": 0.031863500873676634,
      "grad_norm": 0.7128695249557495,
      "learning_rate": 3.184931506849315e-05,
      "loss": 0.87,
      "step": 155
    },
    {
      "epoch": 0.032069071847055194,
      "grad_norm": 0.40213656425476074,
      "learning_rate": 3.205479452054795e-05,
      "loss": 0.8425,
      "step": 156
    },
    {
      "epoch": 0.032274642820433755,
      "grad_norm": 0.4853759706020355,
      "learning_rate": 3.226027397260274e-05,
      "loss": 0.8643,
      "step": 157
    },
    {
      "epoch": 0.032480213793812315,
      "grad_norm": 0.5050686001777649,
      "learning_rate": 3.246575342465754e-05,
      "loss": 0.8628,
      "step": 158
    },
    {
      "epoch": 0.032685784767190876,
      "grad_norm": 0.5028424263000488,
      "learning_rate": 3.267123287671233e-05,
      "loss": 0.8267,
      "step": 159
    },
    {
      "epoch": 0.03289135574056943,
      "grad_norm": 0.4855990707874298,
      "learning_rate": 3.287671232876713e-05,
      "loss": 0.8549,
      "step": 160
    },
    {
      "epoch": 0.03309692671394799,
      "grad_norm": 0.40553873777389526,
      "learning_rate": 3.308219178082192e-05,
      "loss": 0.8548,
      "step": 161
    },
    {
      "epoch": 0.03330249768732655,
      "grad_norm": 0.22181855142116547,
      "learning_rate": 3.328767123287671e-05,
      "loss": 0.6371,
      "step": 162
    },
    {
      "epoch": 0.03350806866070511,
      "grad_norm": 0.7873424887657166,
      "learning_rate": 3.349315068493151e-05,
      "loss": 0.8876,
      "step": 163
    },
    {
      "epoch": 0.033713639634083664,
      "grad_norm": 0.4477074444293976,
      "learning_rate": 3.36986301369863e-05,
      "loss": 0.8418,
      "step": 164
    },
    {
      "epoch": 0.033919210607462225,
      "grad_norm": 0.6497864127159119,
      "learning_rate": 3.39041095890411e-05,
      "loss": 0.8605,
      "step": 165
    },
    {
      "epoch": 0.034124781580840785,
      "grad_norm": 0.41493016481399536,
      "learning_rate": 3.410958904109589e-05,
      "loss": 0.8276,
      "step": 166
    },
    {
      "epoch": 0.034330352554219346,
      "grad_norm": 0.5347689390182495,
      "learning_rate": 3.4315068493150685e-05,
      "loss": 0.8809,
      "step": 167
    },
    {
      "epoch": 0.034535923527597906,
      "grad_norm": 0.4067676365375519,
      "learning_rate": 3.452054794520548e-05,
      "loss": 0.8329,
      "step": 168
    },
    {
      "epoch": 0.03474149450097646,
      "grad_norm": 0.4063913822174072,
      "learning_rate": 3.4726027397260275e-05,
      "loss": 0.8556,
      "step": 169
    },
    {
      "epoch": 0.03494706547435502,
      "grad_norm": 0.4246818721294403,
      "learning_rate": 3.493150684931507e-05,
      "loss": 0.8664,
      "step": 170
    },
    {
      "epoch": 0.03515263644773358,
      "grad_norm": 0.41586360335350037,
      "learning_rate": 3.5136986301369865e-05,
      "loss": 0.842,
      "step": 171
    },
    {
      "epoch": 0.03535820742111214,
      "grad_norm": 0.3807069659233093,
      "learning_rate": 3.534246575342466e-05,
      "loss": 0.824,
      "step": 172
    },
    {
      "epoch": 0.0355637783944907,
      "grad_norm": 0.7290697693824768,
      "learning_rate": 3.5547945205479455e-05,
      "loss": 0.6189,
      "step": 173
    },
    {
      "epoch": 0.035769349367869255,
      "grad_norm": 0.19204974174499512,
      "learning_rate": 3.575342465753425e-05,
      "loss": 0.6093,
      "step": 174
    },
    {
      "epoch": 0.035974920341247815,
      "grad_norm": 0.6416502594947815,
      "learning_rate": 3.5958904109589045e-05,
      "loss": 0.8379,
      "step": 175
    },
    {
      "epoch": 0.036180491314626376,
      "grad_norm": 0.3935816287994385,
      "learning_rate": 3.616438356164383e-05,
      "loss": 0.8263,
      "step": 176
    },
    {
      "epoch": 0.036386062288004936,
      "grad_norm": 0.47259315848350525,
      "learning_rate": 3.6369863013698635e-05,
      "loss": 0.8132,
      "step": 177
    },
    {
      "epoch": 0.03659163326138349,
      "grad_norm": 0.47834697365760803,
      "learning_rate": 3.657534246575342e-05,
      "loss": 0.8393,
      "step": 178
    },
    {
      "epoch": 0.03679720423476205,
      "grad_norm": 0.3470703363418579,
      "learning_rate": 3.6780821917808224e-05,
      "loss": 0.6182,
      "step": 179
    },
    {
      "epoch": 0.03700277520814061,
      "grad_norm": 0.5120542645454407,
      "learning_rate": 3.698630136986301e-05,
      "loss": 0.8336,
      "step": 180
    },
    {
      "epoch": 0.03720834618151917,
      "grad_norm": 0.42222753167152405,
      "learning_rate": 3.719178082191781e-05,
      "loss": 0.837,
      "step": 181
    },
    {
      "epoch": 0.03741391715489773,
      "grad_norm": 0.38363730907440186,
      "learning_rate": 3.73972602739726e-05,
      "loss": 0.8651,
      "step": 182
    },
    {
      "epoch": 0.037619488128276285,
      "grad_norm": 0.4108883738517761,
      "learning_rate": 3.76027397260274e-05,
      "loss": 0.8175,
      "step": 183
    },
    {
      "epoch": 0.037825059101654845,
      "grad_norm": 0.41021236777305603,
      "learning_rate": 3.780821917808219e-05,
      "loss": 0.8412,
      "step": 184
    },
    {
      "epoch": 0.038030630075033406,
      "grad_norm": 0.24833433330059052,
      "learning_rate": 3.801369863013699e-05,
      "loss": 0.6215,
      "step": 185
    },
    {
      "epoch": 0.038236201048411966,
      "grad_norm": 0.465718537569046,
      "learning_rate": 3.821917808219178e-05,
      "loss": 0.842,
      "step": 186
    },
    {
      "epoch": 0.03844177202179052,
      "grad_norm": 0.41596537828445435,
      "learning_rate": 3.842465753424658e-05,
      "loss": 0.8296,
      "step": 187
    },
    {
      "epoch": 0.03864734299516908,
      "grad_norm": 0.3815116286277771,
      "learning_rate": 3.863013698630137e-05,
      "loss": 0.8131,
      "step": 188
    },
    {
      "epoch": 0.03885291396854764,
      "grad_norm": 0.38065505027770996,
      "learning_rate": 3.883561643835617e-05,
      "loss": 0.8227,
      "step": 189
    },
    {
      "epoch": 0.0390584849419262,
      "grad_norm": 0.40238457918167114,
      "learning_rate": 3.904109589041096e-05,
      "loss": 0.829,
      "step": 190
    },
    {
      "epoch": 0.03926405591530476,
      "grad_norm": 0.39533552527427673,
      "learning_rate": 3.924657534246576e-05,
      "loss": 0.8062,
      "step": 191
    },
    {
      "epoch": 0.039469626888683315,
      "grad_norm": 0.2254960983991623,
      "learning_rate": 3.9452054794520546e-05,
      "loss": 0.6202,
      "step": 192
    },
    {
      "epoch": 0.039675197862061876,
      "grad_norm": 0.5490075945854187,
      "learning_rate": 3.965753424657535e-05,
      "loss": 0.8587,
      "step": 193
    },
    {
      "epoch": 0.039880768835440436,
      "grad_norm": 0.3820808231830597,
      "learning_rate": 3.9863013698630135e-05,
      "loss": 0.8461,
      "step": 194
    },
    {
      "epoch": 0.040086339808818996,
      "grad_norm": 0.48500680923461914,
      "learning_rate": 4.006849315068494e-05,
      "loss": 0.8319,
      "step": 195
    },
    {
      "epoch": 0.04029191078219756,
      "grad_norm": 0.20103423297405243,
      "learning_rate": 4.0273972602739725e-05,
      "loss": 0.6231,
      "step": 196
    },
    {
      "epoch": 0.04049748175557611,
      "grad_norm": 0.5550208687782288,
      "learning_rate": 4.047945205479452e-05,
      "loss": 0.8343,
      "step": 197
    },
    {
      "epoch": 0.04070305272895467,
      "grad_norm": 0.37427324056625366,
      "learning_rate": 4.0684931506849315e-05,
      "loss": 0.8292,
      "step": 198
    },
    {
      "epoch": 0.04090862370233323,
      "grad_norm": 0.2106785923242569,
      "learning_rate": 4.089041095890411e-05,
      "loss": 0.603,
      "step": 199
    },
    {
      "epoch": 0.04111419467571179,
      "grad_norm": 0.7520186305046082,
      "learning_rate": 4.1095890410958905e-05,
      "loss": 0.86,
      "step": 200
    },
    {
      "epoch": 0.041319765649090345,
      "grad_norm": 0.38897809386253357,
      "learning_rate": 4.13013698630137e-05,
      "loss": 0.82,
      "step": 201
    },
    {
      "epoch": 0.041525336622468906,
      "grad_norm": 0.5800373554229736,
      "learning_rate": 4.1506849315068495e-05,
      "loss": 0.8282,
      "step": 202
    },
    {
      "epoch": 0.041730907595847466,
      "grad_norm": 0.46717479825019836,
      "learning_rate": 4.171232876712329e-05,
      "loss": 0.8268,
      "step": 203
    },
    {
      "epoch": 0.04193647856922603,
      "grad_norm": 0.45258304476737976,
      "learning_rate": 4.1917808219178085e-05,
      "loss": 0.8178,
      "step": 204
    },
    {
      "epoch": 0.04214204954260459,
      "grad_norm": 0.44093188643455505,
      "learning_rate": 4.212328767123288e-05,
      "loss": 0.8507,
      "step": 205
    },
    {
      "epoch": 0.04234762051598314,
      "grad_norm": 0.38282710313796997,
      "learning_rate": 4.232876712328767e-05,
      "loss": 0.823,
      "step": 206
    },
    {
      "epoch": 0.0425531914893617,
      "grad_norm": 0.21601058542728424,
      "learning_rate": 4.253424657534247e-05,
      "loss": 0.6133,
      "step": 207
    },
    {
      "epoch": 0.04275876246274026,
      "grad_norm": 0.6589162945747375,
      "learning_rate": 4.273972602739726e-05,
      "loss": 0.8517,
      "step": 208
    },
    {
      "epoch": 0.04296433343611882,
      "grad_norm": 0.39537516236305237,
      "learning_rate": 4.294520547945206e-05,
      "loss": 0.8297,
      "step": 209
    },
    {
      "epoch": 0.043169904409497376,
      "grad_norm": 0.5449748039245605,
      "learning_rate": 4.315068493150685e-05,
      "loss": 0.8329,
      "step": 210
    },
    {
      "epoch": 0.043375475382875936,
      "grad_norm": 0.4801601767539978,
      "learning_rate": 4.335616438356165e-05,
      "loss": 0.8263,
      "step": 211
    },
    {
      "epoch": 0.043581046356254496,
      "grad_norm": 0.3884707987308502,
      "learning_rate": 4.356164383561644e-05,
      "loss": 0.8392,
      "step": 212
    },
    {
      "epoch": 0.04378661732963306,
      "grad_norm": 0.4665462374687195,
      "learning_rate": 4.376712328767123e-05,
      "loss": 0.8319,
      "step": 213
    },
    {
      "epoch": 0.04399218830301162,
      "grad_norm": 0.3869108557701111,
      "learning_rate": 4.397260273972603e-05,
      "loss": 0.8207,
      "step": 214
    },
    {
      "epoch": 0.04419775927639017,
      "grad_norm": 0.38586127758026123,
      "learning_rate": 4.417808219178082e-05,
      "loss": 0.8035,
      "step": 215
    },
    {
      "epoch": 0.04440333024976873,
      "grad_norm": 0.41265037655830383,
      "learning_rate": 4.438356164383562e-05,
      "loss": 0.8578,
      "step": 216
    },
    {
      "epoch": 0.04460890122314729,
      "grad_norm": 0.3726780116558075,
      "learning_rate": 4.458904109589041e-05,
      "loss": 0.8103,
      "step": 217
    },
    {
      "epoch": 0.04481447219652585,
      "grad_norm": 0.21903295814990997,
      "learning_rate": 4.479452054794521e-05,
      "loss": 0.6149,
      "step": 218
    },
    {
      "epoch": 0.04502004316990441,
      "grad_norm": 0.470803439617157,
      "learning_rate": 4.5e-05,
      "loss": 0.8187,
      "step": 219
    },
    {
      "epoch": 0.045225614143282966,
      "grad_norm": 0.3907180726528168,
      "learning_rate": 4.52054794520548e-05,
      "loss": 0.843,
      "step": 220
    },
    {
      "epoch": 0.04543118511666153,
      "grad_norm": 0.3910331726074219,
      "learning_rate": 4.54109589041096e-05,
      "loss": 0.8228,
      "step": 221
    },
    {
      "epoch": 0.04563675609004009,
      "grad_norm": 0.4238927364349365,
      "learning_rate": 4.561643835616439e-05,
      "loss": 0.8287,
      "step": 222
    },
    {
      "epoch": 0.04584232706341865,
      "grad_norm": 0.38111889362335205,
      "learning_rate": 4.582191780821918e-05,
      "loss": 0.8375,
      "step": 223
    },
    {
      "epoch": 0.0460478980367972,
      "grad_norm": 0.17004454135894775,
      "learning_rate": 4.602739726027397e-05,
      "loss": 0.6103,
      "step": 224
    },
    {
      "epoch": 0.04625346901017576,
      "grad_norm": 0.5066764950752258,
      "learning_rate": 4.623287671232877e-05,
      "loss": 0.8377,
      "step": 225
    },
    {
      "epoch": 0.04645903998355432,
      "grad_norm": 0.16975145041942596,
      "learning_rate": 4.643835616438357e-05,
      "loss": 0.6379,
      "step": 226
    },
    {
      "epoch": 0.04666461095693288,
      "grad_norm": 0.17714980244636536,
      "learning_rate": 4.6643835616438356e-05,
      "loss": 0.6246,
      "step": 227
    },
    {
      "epoch": 0.04687018193031144,
      "grad_norm": 0.44060373306274414,
      "learning_rate": 4.684931506849315e-05,
      "loss": 0.8455,
      "step": 228
    },
    {
      "epoch": 0.047075752903689996,
      "grad_norm": 0.41871070861816406,
      "learning_rate": 4.705479452054795e-05,
      "loss": 0.8438,
      "step": 229
    },
    {
      "epoch": 0.04728132387706856,
      "grad_norm": 0.20235472917556763,
      "learning_rate": 4.726027397260275e-05,
      "loss": 0.6155,
      "step": 230
    },
    {
      "epoch": 0.04748689485044712,
      "grad_norm": 0.4988607168197632,
      "learning_rate": 4.7465753424657536e-05,
      "loss": 0.8098,
      "step": 231
    },
    {
      "epoch": 0.04769246582382568,
      "grad_norm": 0.41510388255119324,
      "learning_rate": 4.767123287671233e-05,
      "loss": 0.8214,
      "step": 232
    },
    {
      "epoch": 0.04789803679720424,
      "grad_norm": 0.3907022178173065,
      "learning_rate": 4.787671232876713e-05,
      "loss": 0.8112,
      "step": 233
    },
    {
      "epoch": 0.04810360777058279,
      "grad_norm": 0.40868282318115234,
      "learning_rate": 4.808219178082192e-05,
      "loss": 0.8161,
      "step": 234
    },
    {
      "epoch": 0.04830917874396135,
      "grad_norm": 0.3888959288597107,
      "learning_rate": 4.8287671232876716e-05,
      "loss": 0.803,
      "step": 235
    },
    {
      "epoch": 0.04851474971733991,
      "grad_norm": 0.38003799319267273,
      "learning_rate": 4.849315068493151e-05,
      "loss": 0.8293,
      "step": 236
    },
    {
      "epoch": 0.04872032069071847,
      "grad_norm": 0.2189408391714096,
      "learning_rate": 4.869863013698631e-05,
      "loss": 0.601,
      "step": 237
    },
    {
      "epoch": 0.048925891664097027,
      "grad_norm": 0.44841453433036804,
      "learning_rate": 4.89041095890411e-05,
      "loss": 0.8239,
      "step": 238
    },
    {
      "epoch": 0.04913146263747559,
      "grad_norm": 0.41675901412963867,
      "learning_rate": 4.9109589041095895e-05,
      "loss": 0.8041,
      "step": 239
    },
    {
      "epoch": 0.04933703361085415,
      "grad_norm": 0.3353470265865326,
      "learning_rate": 4.9315068493150684e-05,
      "loss": 0.8233,
      "step": 240
    },
    {
      "epoch": 0.04954260458423271,
      "grad_norm": 0.38614898920059204,
      "learning_rate": 4.9520547945205485e-05,
      "loss": 0.8202,
      "step": 241
    },
    {
      "epoch": 0.04974817555761127,
      "grad_norm": 0.3578384220600128,
      "learning_rate": 4.972602739726028e-05,
      "loss": 0.8155,
      "step": 242
    },
    {
      "epoch": 0.04995374653098982,
      "grad_norm": 0.3806624114513397,
      "learning_rate": 4.993150684931507e-05,
      "loss": 0.8475,
      "step": 243
    },
    {
      "epoch": 0.05015931750436838,
      "grad_norm": 0.23930180072784424,
      "learning_rate": 5.0136986301369863e-05,
      "loss": 0.6126,
      "step": 244
    },
    {
      "epoch": 0.05036488847774694,
      "grad_norm": 0.4321422278881073,
      "learning_rate": 5.0342465753424665e-05,
      "loss": 0.8145,
      "step": 245
    },
    {
      "epoch": 0.0505704594511255,
      "grad_norm": 0.3582285940647125,
      "learning_rate": 5.054794520547946e-05,
      "loss": 0.8384,
      "step": 246
    },
    {
      "epoch": 0.05077603042450406,
      "grad_norm": 0.3378206491470337,
      "learning_rate": 5.075342465753425e-05,
      "loss": 0.8189,
      "step": 247
    },
    {
      "epoch": 0.05098160139788262,
      "grad_norm": 0.3585507571697235,
      "learning_rate": 5.095890410958904e-05,
      "loss": 0.8379,
      "step": 248
    },
    {
      "epoch": 0.05118717237126118,
      "grad_norm": 0.36620137095451355,
      "learning_rate": 5.1164383561643845e-05,
      "loss": 0.8059,
      "step": 249
    },
    {
      "epoch": 0.05139274334463974,
      "grad_norm": 0.348910391330719,
      "learning_rate": 5.136986301369863e-05,
      "loss": 0.8231,
      "step": 250
    },
    {
      "epoch": 0.0515983143180183,
      "grad_norm": 0.37466245889663696,
      "learning_rate": 5.157534246575343e-05,
      "loss": 0.8263,
      "step": 251
    },
    {
      "epoch": 0.05180388529139685,
      "grad_norm": 0.3923078775405884,
      "learning_rate": 5.1780821917808216e-05,
      "loss": 0.8142,
      "step": 252
    },
    {
      "epoch": 0.05200945626477541,
      "grad_norm": 0.3668658435344696,
      "learning_rate": 5.1986301369863025e-05,
      "loss": 0.815,
      "step": 253
    },
    {
      "epoch": 0.05221502723815397,
      "grad_norm": 0.34352773427963257,
      "learning_rate": 5.219178082191781e-05,
      "loss": 0.8103,
      "step": 254
    },
    {
      "epoch": 0.05242059821153253,
      "grad_norm": 0.35997268557548523,
      "learning_rate": 5.239726027397261e-05,
      "loss": 0.8021,
      "step": 255
    },
    {
      "epoch": 0.052626169184911094,
      "grad_norm": 0.4281958043575287,
      "learning_rate": 5.2602739726027396e-05,
      "loss": 0.613,
      "step": 256
    },
    {
      "epoch": 0.05283174015828965,
      "grad_norm": 0.40191400051116943,
      "learning_rate": 5.28082191780822e-05,
      "loss": 0.8114,
      "step": 257
    },
    {
      "epoch": 0.05303731113166821,
      "grad_norm": 0.2332005500793457,
      "learning_rate": 5.301369863013699e-05,
      "loss": 0.6145,
      "step": 258
    },
    {
      "epoch": 0.05324288210504677,
      "grad_norm": 0.3814218044281006,
      "learning_rate": 5.321917808219178e-05,
      "loss": 0.8322,
      "step": 259
    },
    {
      "epoch": 0.05344845307842533,
      "grad_norm": 0.8000903129577637,
      "learning_rate": 5.3424657534246576e-05,
      "loss": 0.8061,
      "step": 260
    },
    {
      "epoch": 0.05365402405180388,
      "grad_norm": 0.3613252341747284,
      "learning_rate": 5.363013698630138e-05,
      "loss": 0.817,
      "step": 261
    },
    {
      "epoch": 0.05385959502518244,
      "grad_norm": 0.3710997998714447,
      "learning_rate": 5.383561643835617e-05,
      "loss": 0.847,
      "step": 262
    },
    {
      "epoch": 0.054065165998561,
      "grad_norm": 0.36693164706230164,
      "learning_rate": 5.404109589041096e-05,
      "loss": 0.6174,
      "step": 263
    },
    {
      "epoch": 0.054270736971939564,
      "grad_norm": 0.4523719251155853,
      "learning_rate": 5.4246575342465756e-05,
      "loss": 0.8234,
      "step": 264
    },
    {
      "epoch": 0.054476307945318124,
      "grad_norm": 0.3696235120296478,
      "learning_rate": 5.445205479452056e-05,
      "loss": 0.7997,
      "step": 265
    },
    {
      "epoch": 0.05468187891869668,
      "grad_norm": 0.3745763599872589,
      "learning_rate": 5.4657534246575346e-05,
      "loss": 0.8098,
      "step": 266
    },
    {
      "epoch": 0.05488744989207524,
      "grad_norm": 0.36916518211364746,
      "learning_rate": 5.486301369863014e-05,
      "loss": 0.788,
      "step": 267
    },
    {
      "epoch": 0.0550930208654538,
      "grad_norm": 0.351854532957077,
      "learning_rate": 5.506849315068493e-05,
      "loss": 0.8124,
      "step": 268
    },
    {
      "epoch": 0.05529859183883236,
      "grad_norm": 0.3717731535434723,
      "learning_rate": 5.527397260273973e-05,
      "loss": 0.8166,
      "step": 269
    },
    {
      "epoch": 0.05550416281221091,
      "grad_norm": 0.3277188837528229,
      "learning_rate": 5.5479452054794526e-05,
      "loss": 0.6006,
      "step": 270
    },
    {
      "epoch": 0.05570973378558947,
      "grad_norm": 0.39217084646224976,
      "learning_rate": 5.568493150684932e-05,
      "loss": 0.8076,
      "step": 271
    },
    {
      "epoch": 0.05591530475896803,
      "grad_norm": 0.37465596199035645,
      "learning_rate": 5.589041095890411e-05,
      "loss": 0.8196,
      "step": 272
    },
    {
      "epoch": 0.056120875732346594,
      "grad_norm": 0.37113896012306213,
      "learning_rate": 5.609589041095891e-05,
      "loss": 0.8206,
      "step": 273
    },
    {
      "epoch": 0.056326446705725154,
      "grad_norm": 0.3641659915447235,
      "learning_rate": 5.6301369863013706e-05,
      "loss": 0.8372,
      "step": 274
    },
    {
      "epoch": 0.05653201767910371,
      "grad_norm": 0.3738704025745392,
      "learning_rate": 5.6506849315068494e-05,
      "loss": 0.8201,
      "step": 275
    },
    {
      "epoch": 0.05673758865248227,
      "grad_norm": 0.35747018456459045,
      "learning_rate": 5.671232876712329e-05,
      "loss": 0.8082,
      "step": 276
    },
    {
      "epoch": 0.05694315962586083,
      "grad_norm": 0.29701605439186096,
      "learning_rate": 5.691780821917809e-05,
      "loss": 0.6105,
      "step": 277
    },
    {
      "epoch": 0.05714873059923939,
      "grad_norm": 0.4180268347263336,
      "learning_rate": 5.712328767123288e-05,
      "loss": 0.8325,
      "step": 278
    },
    {
      "epoch": 0.05735430157261795,
      "grad_norm": 0.36010023951530457,
      "learning_rate": 5.7328767123287674e-05,
      "loss": 0.8403,
      "step": 279
    },
    {
      "epoch": 0.0575598725459965,
      "grad_norm": 0.35812970995903015,
      "learning_rate": 5.753424657534247e-05,
      "loss": 0.8201,
      "step": 280
    },
    {
      "epoch": 0.05776544351937506,
      "grad_norm": 0.35655659437179565,
      "learning_rate": 5.773972602739727e-05,
      "loss": 0.8104,
      "step": 281
    },
    {
      "epoch": 0.057971014492753624,
      "grad_norm": 0.3628866970539093,
      "learning_rate": 5.794520547945206e-05,
      "loss": 0.8011,
      "step": 282
    },
    {
      "epoch": 0.058176585466132184,
      "grad_norm": 0.33707040548324585,
      "learning_rate": 5.8150684931506854e-05,
      "loss": 0.7863,
      "step": 283
    },
    {
      "epoch": 0.05838215643951074,
      "grad_norm": 0.25686392188072205,
      "learning_rate": 5.835616438356164e-05,
      "loss": 0.605,
      "step": 284
    },
    {
      "epoch": 0.0585877274128893,
      "grad_norm": 0.4549000859260559,
      "learning_rate": 5.8561643835616444e-05,
      "loss": 0.7871,
      "step": 285
    },
    {
      "epoch": 0.05879329838626786,
      "grad_norm": 0.17129164934158325,
      "learning_rate": 5.876712328767124e-05,
      "loss": 0.6043,
      "step": 286
    },
    {
      "epoch": 0.05899886935964642,
      "grad_norm": 0.4582807719707489,
      "learning_rate": 5.8972602739726033e-05,
      "loss": 0.7943,
      "step": 287
    },
    {
      "epoch": 0.05920444033302498,
      "grad_norm": 0.3587150573730469,
      "learning_rate": 5.917808219178082e-05,
      "loss": 0.818,
      "step": 288
    },
    {
      "epoch": 0.05941001130640353,
      "grad_norm": 0.35766854882240295,
      "learning_rate": 5.9383561643835623e-05,
      "loss": 0.8084,
      "step": 289
    },
    {
      "epoch": 0.059615582279782094,
      "grad_norm": 0.24981027841567993,
      "learning_rate": 5.958904109589042e-05,
      "loss": 0.6123,
      "step": 290
    },
    {
      "epoch": 0.059821153253160654,
      "grad_norm": 0.4611298143863678,
      "learning_rate": 5.9794520547945207e-05,
      "loss": 0.7859,
      "step": 291
    },
    {
      "epoch": 0.060026724226539215,
      "grad_norm": 0.1829315423965454,
      "learning_rate": 6e-05,
      "loss": 0.6047,
      "step": 292
    },
    {
      "epoch": 0.060232295199917775,
      "grad_norm": 0.432064026594162,
      "learning_rate": 6.02054794520548e-05,
      "loss": 0.8252,
      "step": 293
    },
    {
      "epoch": 0.06043786617329633,
      "grad_norm": 0.3626839518547058,
      "learning_rate": 6.041095890410959e-05,
      "loss": 0.8004,
      "step": 294
    },
    {
      "epoch": 0.06064343714667489,
      "grad_norm": 0.3860291838645935,
      "learning_rate": 6.0616438356164386e-05,
      "loss": 0.8287,
      "step": 295
    },
    {
      "epoch": 0.06084900812005345,
      "grad_norm": 0.2607959806919098,
      "learning_rate": 6.082191780821918e-05,
      "loss": 0.617,
      "step": 296
    },
    {
      "epoch": 0.06105457909343201,
      "grad_norm": 0.494211882352829,
      "learning_rate": 6.102739726027398e-05,
      "loss": 0.8062,
      "step": 297
    },
    {
      "epoch": 0.06126015006681056,
      "grad_norm": 0.37032371759414673,
      "learning_rate": 6.123287671232878e-05,
      "loss": 0.7842,
      "step": 298
    },
    {
      "epoch": 0.061465721040189124,
      "grad_norm": 0.3706514835357666,
      "learning_rate": 6.143835616438357e-05,
      "loss": 0.8076,
      "step": 299
    },
    {
      "epoch": 0.061671292013567684,
      "grad_norm": 0.41590166091918945,
      "learning_rate": 6.164383561643835e-05,
      "loss": 0.8142,
      "step": 300
    },
    {
      "epoch": 0.061876862986946245,
      "grad_norm": 0.4085366129875183,
      "learning_rate": 6.184931506849316e-05,
      "loss": 0.8583,
      "step": 301
    },
    {
      "epoch": 0.062082433960324805,
      "grad_norm": 0.3671876788139343,
      "learning_rate": 6.205479452054794e-05,
      "loss": 0.7891,
      "step": 302
    },
    {
      "epoch": 0.06228800493370336,
      "grad_norm": 0.39252158999443054,
      "learning_rate": 6.226027397260275e-05,
      "loss": 0.8023,
      "step": 303
    },
    {
      "epoch": 0.06249357590708192,
      "grad_norm": 0.35324522852897644,
      "learning_rate": 6.246575342465753e-05,
      "loss": 0.7921,
      "step": 304
    },
    {
      "epoch": 0.06269914688046048,
      "grad_norm": 0.28854769468307495,
      "learning_rate": 6.267123287671234e-05,
      "loss": 0.6309,
      "step": 305
    },
    {
      "epoch": 0.06290471785383904,
      "grad_norm": 0.48670095205307007,
      "learning_rate": 6.287671232876712e-05,
      "loss": 0.7814,
      "step": 306
    },
    {
      "epoch": 0.0631102888272176,
      "grad_norm": 0.3746386170387268,
      "learning_rate": 6.308219178082193e-05,
      "loss": 0.8142,
      "step": 307
    },
    {
      "epoch": 0.06331585980059616,
      "grad_norm": 0.42179784178733826,
      "learning_rate": 6.328767123287671e-05,
      "loss": 0.8312,
      "step": 308
    },
    {
      "epoch": 0.06352143077397472,
      "grad_norm": 0.37425556778907776,
      "learning_rate": 6.349315068493152e-05,
      "loss": 0.8397,
      "step": 309
    },
    {
      "epoch": 0.06372700174735327,
      "grad_norm": 0.42048847675323486,
      "learning_rate": 6.36986301369863e-05,
      "loss": 0.7864,
      "step": 310
    },
    {
      "epoch": 0.06393257272073183,
      "grad_norm": 0.34095990657806396,
      "learning_rate": 6.390410958904109e-05,
      "loss": 0.8275,
      "step": 311
    },
    {
      "epoch": 0.06413814369411039,
      "grad_norm": 0.3992113769054413,
      "learning_rate": 6.41095890410959e-05,
      "loss": 0.8037,
      "step": 312
    },
    {
      "epoch": 0.06434371466748895,
      "grad_norm": 0.3752027451992035,
      "learning_rate": 6.43150684931507e-05,
      "loss": 0.8096,
      "step": 313
    },
    {
      "epoch": 0.06454928564086751,
      "grad_norm": 0.3788531422615051,
      "learning_rate": 6.452054794520548e-05,
      "loss": 0.8148,
      "step": 314
    },
    {
      "epoch": 0.06475485661424607,
      "grad_norm": 0.34858015179634094,
      "learning_rate": 6.472602739726027e-05,
      "loss": 0.7865,
      "step": 315
    },
    {
      "epoch": 0.06496042758762463,
      "grad_norm": 0.3562847375869751,
      "learning_rate": 6.493150684931507e-05,
      "loss": 0.7953,
      "step": 316
    },
    {
      "epoch": 0.06516599856100319,
      "grad_norm": 0.3146650493144989,
      "learning_rate": 6.513698630136988e-05,
      "loss": 0.5924,
      "step": 317
    },
    {
      "epoch": 0.06537156953438175,
      "grad_norm": 0.21578195691108704,
      "learning_rate": 6.534246575342466e-05,
      "loss": 0.6165,
      "step": 318
    },
    {
      "epoch": 0.0655771405077603,
      "grad_norm": 0.19480906426906586,
      "learning_rate": 6.554794520547945e-05,
      "loss": 0.6254,
      "step": 319
    },
    {
      "epoch": 0.06578271148113886,
      "grad_norm": 0.8668273091316223,
      "learning_rate": 6.575342465753425e-05,
      "loss": 0.8364,
      "step": 320
    },
    {
      "epoch": 0.06598828245451742,
      "grad_norm": 0.5889570116996765,
      "learning_rate": 6.595890410958906e-05,
      "loss": 0.8205,
      "step": 321
    },
    {
      "epoch": 0.06619385342789598,
      "grad_norm": 0.3477165102958679,
      "learning_rate": 6.616438356164384e-05,
      "loss": 0.6104,
      "step": 322
    },
    {
      "epoch": 0.06639942440127454,
      "grad_norm": 1.1917229890823364,
      "learning_rate": 6.636986301369863e-05,
      "loss": 0.8402,
      "step": 323
    },
    {
      "epoch": 0.0666049953746531,
      "grad_norm": 0.5916200876235962,
      "learning_rate": 6.657534246575342e-05,
      "loss": 0.8265,
      "step": 324
    },
    {
      "epoch": 0.06681056634803166,
      "grad_norm": 0.6326993107795715,
      "learning_rate": 6.678082191780822e-05,
      "loss": 0.822,
      "step": 325
    },
    {
      "epoch": 0.06701613732141022,
      "grad_norm": 0.545361340045929,
      "learning_rate": 6.698630136986302e-05,
      "loss": 0.8369,
      "step": 326
    },
    {
      "epoch": 0.06722170829478878,
      "grad_norm": 0.5392776727676392,
      "learning_rate": 6.719178082191781e-05,
      "loss": 0.8009,
      "step": 327
    },
    {
      "epoch": 0.06742727926816733,
      "grad_norm": 0.2618131637573242,
      "learning_rate": 6.73972602739726e-05,
      "loss": 0.6182,
      "step": 328
    },
    {
      "epoch": 0.06763285024154589,
      "grad_norm": 0.6088753342628479,
      "learning_rate": 6.76027397260274e-05,
      "loss": 0.8189,
      "step": 329
    },
    {
      "epoch": 0.06783842121492445,
      "grad_norm": 0.5107940435409546,
      "learning_rate": 6.78082191780822e-05,
      "loss": 0.8304,
      "step": 330
    },
    {
      "epoch": 0.06804399218830301,
      "grad_norm": 0.38624778389930725,
      "learning_rate": 6.801369863013699e-05,
      "loss": 0.8361,
      "step": 331
    },
    {
      "epoch": 0.06824956316168157,
      "grad_norm": 0.41758957505226135,
      "learning_rate": 6.821917808219178e-05,
      "loss": 0.7881,
      "step": 332
    },
    {
      "epoch": 0.06845513413506013,
      "grad_norm": 0.41675320267677307,
      "learning_rate": 6.842465753424658e-05,
      "loss": 0.8297,
      "step": 333
    },
    {
      "epoch": 0.06866070510843869,
      "grad_norm": 0.3944019079208374,
      "learning_rate": 6.863013698630137e-05,
      "loss": 0.8154,
      "step": 334
    },
    {
      "epoch": 0.06886627608181725,
      "grad_norm": 0.3403918743133545,
      "learning_rate": 6.883561643835617e-05,
      "loss": 0.6183,
      "step": 335
    },
    {
      "epoch": 0.06907184705519581,
      "grad_norm": 0.5603693127632141,
      "learning_rate": 6.904109589041096e-05,
      "loss": 0.8398,
      "step": 336
    },
    {
      "epoch": 0.06927741802857436,
      "grad_norm": 0.3981553912162781,
      "learning_rate": 6.924657534246576e-05,
      "loss": 0.8122,
      "step": 337
    },
    {
      "epoch": 0.06948298900195292,
      "grad_norm": 0.4603327214717865,
      "learning_rate": 6.945205479452055e-05,
      "loss": 0.8305,
      "step": 338
    },
    {
      "epoch": 0.06968855997533148,
      "grad_norm": 0.43689751625061035,
      "learning_rate": 6.965753424657535e-05,
      "loss": 0.828,
      "step": 339
    },
    {
      "epoch": 0.06989413094871004,
      "grad_norm": 0.41511690616607666,
      "learning_rate": 6.986301369863014e-05,
      "loss": 0.7844,
      "step": 340
    },
    {
      "epoch": 0.0700997019220886,
      "grad_norm": 0.3534780740737915,
      "learning_rate": 7.006849315068494e-05,
      "loss": 0.7882,
      "step": 341
    },
    {
      "epoch": 0.07030527289546716,
      "grad_norm": 0.33764714002609253,
      "learning_rate": 7.027397260273973e-05,
      "loss": 0.6009,
      "step": 342
    },
    {
      "epoch": 0.07051084386884572,
      "grad_norm": 0.4741517901420593,
      "learning_rate": 7.047945205479452e-05,
      "loss": 0.7903,
      "step": 343
    },
    {
      "epoch": 0.07071641484222428,
      "grad_norm": 0.19411741197109222,
      "learning_rate": 7.068493150684932e-05,
      "loss": 0.6019,
      "step": 344
    },
    {
      "epoch": 0.07092198581560284,
      "grad_norm": 0.2023278921842575,
      "learning_rate": 7.089041095890412e-05,
      "loss": 0.6041,
      "step": 345
    },
    {
      "epoch": 0.0711275567889814,
      "grad_norm": 0.18110667169094086,
      "learning_rate": 7.109589041095891e-05,
      "loss": 0.6082,
      "step": 346
    },
    {
      "epoch": 0.07133312776235995,
      "grad_norm": 0.6595879197120667,
      "learning_rate": 7.13013698630137e-05,
      "loss": 0.8487,
      "step": 347
    },
    {
      "epoch": 0.07153869873573851,
      "grad_norm": 0.3792790472507477,
      "learning_rate": 7.15068493150685e-05,
      "loss": 0.8155,
      "step": 348
    },
    {
      "epoch": 0.07174426970911707,
      "grad_norm": 0.553161084651947,
      "learning_rate": 7.17123287671233e-05,
      "loss": 0.8172,
      "step": 349
    },
    {
      "epoch": 0.07194984068249563,
      "grad_norm": 0.3672430217266083,
      "learning_rate": 7.191780821917809e-05,
      "loss": 0.7855,
      "step": 350
    },
    {
      "epoch": 0.07215541165587419,
      "grad_norm": 0.5036430358886719,
      "learning_rate": 7.212328767123288e-05,
      "loss": 0.8164,
      "step": 351
    },
    {
      "epoch": 0.07236098262925275,
      "grad_norm": 0.3772536814212799,
      "learning_rate": 7.232876712328767e-05,
      "loss": 0.7894,
      "step": 352
    },
    {
      "epoch": 0.07256655360263131,
      "grad_norm": 0.37201905250549316,
      "learning_rate": 7.253424657534247e-05,
      "loss": 0.8306,
      "step": 353
    },
    {
      "epoch": 0.07277212457600987,
      "grad_norm": 0.4128398597240448,
      "learning_rate": 7.273972602739727e-05,
      "loss": 0.8272,
      "step": 354
    },
    {
      "epoch": 0.07297769554938843,
      "grad_norm": 0.3522986173629761,
      "learning_rate": 7.294520547945206e-05,
      "loss": 0.8075,
      "step": 355
    },
    {
      "epoch": 0.07318326652276698,
      "grad_norm": 0.3743478059768677,
      "learning_rate": 7.315068493150685e-05,
      "loss": 0.8188,
      "step": 356
    },
    {
      "epoch": 0.07338883749614554,
      "grad_norm": 0.4586912989616394,
      "learning_rate": 7.335616438356165e-05,
      "loss": 0.6061,
      "step": 357
    },
    {
      "epoch": 0.0735944084695241,
      "grad_norm": 0.21246209740638733,
      "learning_rate": 7.356164383561645e-05,
      "loss": 0.6243,
      "step": 358
    },
    {
      "epoch": 0.07379997944290266,
      "grad_norm": 0.5889565944671631,
      "learning_rate": 7.376712328767124e-05,
      "loss": 0.8188,
      "step": 359
    },
    {
      "epoch": 0.07400555041628122,
      "grad_norm": 0.37973251938819885,
      "learning_rate": 7.397260273972603e-05,
      "loss": 0.8092,
      "step": 360
    },
    {
      "epoch": 0.07421112138965978,
      "grad_norm": 0.45936939120292664,
      "learning_rate": 7.417808219178083e-05,
      "loss": 0.6085,
      "step": 361
    },
    {
      "epoch": 0.07441669236303834,
      "grad_norm": 0.33185017108917236,
      "learning_rate": 7.438356164383562e-05,
      "loss": 0.5758,
      "step": 362
    },
    {
      "epoch": 0.0746222633364169,
      "grad_norm": 0.7869192361831665,
      "learning_rate": 7.458904109589042e-05,
      "loss": 0.8316,
      "step": 363
    },
    {
      "epoch": 0.07482783430979546,
      "grad_norm": 0.5039427876472473,
      "learning_rate": 7.47945205479452e-05,
      "loss": 0.8197,
      "step": 364
    },
    {
      "epoch": 0.07503340528317401,
      "grad_norm": 0.4809415340423584,
      "learning_rate": 7.500000000000001e-05,
      "loss": 0.8023,
      "step": 365
    },
    {
      "epoch": 0.07523897625655257,
      "grad_norm": 0.5067195296287537,
      "learning_rate": 7.52054794520548e-05,
      "loss": 0.8258,
      "step": 366
    },
    {
      "epoch": 0.07544454722993113,
      "grad_norm": 0.44106048345565796,
      "learning_rate": 7.54109589041096e-05,
      "loss": 0.8063,
      "step": 367
    },
    {
      "epoch": 0.07565011820330969,
      "grad_norm": 0.40639805793762207,
      "learning_rate": 7.561643835616439e-05,
      "loss": 0.8315,
      "step": 368
    },
    {
      "epoch": 0.07585568917668825,
      "grad_norm": 0.44400423765182495,
      "learning_rate": 7.582191780821919e-05,
      "loss": 0.8053,
      "step": 369
    },
    {
      "epoch": 0.07606126015006681,
      "grad_norm": 0.3997926414012909,
      "learning_rate": 7.602739726027398e-05,
      "loss": 0.8118,
      "step": 370
    },
    {
      "epoch": 0.07626683112344537,
      "grad_norm": 0.36897820234298706,
      "learning_rate": 7.623287671232878e-05,
      "loss": 0.8377,
      "step": 371
    },
    {
      "epoch": 0.07647240209682393,
      "grad_norm": 0.40449821949005127,
      "learning_rate": 7.643835616438356e-05,
      "loss": 0.8115,
      "step": 372
    },
    {
      "epoch": 0.0766779730702025,
      "grad_norm": 0.39014002680778503,
      "learning_rate": 7.664383561643837e-05,
      "loss": 0.8149,
      "step": 373
    },
    {
      "epoch": 0.07688354404358104,
      "grad_norm": 0.3730955421924591,
      "learning_rate": 7.684931506849315e-05,
      "loss": 0.8019,
      "step": 374
    },
    {
      "epoch": 0.0770891150169596,
      "grad_norm": 0.36292803287506104,
      "learning_rate": 7.705479452054794e-05,
      "loss": 0.8305,
      "step": 375
    },
    {
      "epoch": 0.07729468599033816,
      "grad_norm": 0.8635247349739075,
      "learning_rate": 7.726027397260274e-05,
      "loss": 0.6601,
      "step": 376
    },
    {
      "epoch": 0.07750025696371672,
      "grad_norm": 0.4957028925418854,
      "learning_rate": 7.746575342465755e-05,
      "loss": 0.8365,
      "step": 377
    },
    {
      "epoch": 0.07770582793709528,
      "grad_norm": 0.400206983089447,
      "learning_rate": 7.767123287671233e-05,
      "loss": 0.8128,
      "step": 378
    },
    {
      "epoch": 0.07791139891047384,
      "grad_norm": 0.3647255301475525,
      "learning_rate": 7.787671232876712e-05,
      "loss": 0.7968,
      "step": 379
    },
    {
      "epoch": 0.0781169698838524,
      "grad_norm": 0.39965569972991943,
      "learning_rate": 7.808219178082192e-05,
      "loss": 0.8015,
      "step": 380
    },
    {
      "epoch": 0.07832254085723096,
      "grad_norm": 0.3467910885810852,
      "learning_rate": 7.828767123287673e-05,
      "loss": 0.7904,
      "step": 381
    },
    {
      "epoch": 0.07852811183060952,
      "grad_norm": 0.33436062932014465,
      "learning_rate": 7.849315068493151e-05,
      "loss": 0.7647,
      "step": 382
    },
    {
      "epoch": 0.07873368280398808,
      "grad_norm": 0.3548223376274109,
      "learning_rate": 7.86986301369863e-05,
      "loss": 0.7939,
      "step": 383
    },
    {
      "epoch": 0.07893925377736663,
      "grad_norm": 0.7502946853637695,
      "learning_rate": 7.890410958904109e-05,
      "loss": 0.6747,
      "step": 384
    },
    {
      "epoch": 0.07914482475074519,
      "grad_norm": 0.3931428790092468,
      "learning_rate": 7.910958904109589e-05,
      "loss": 0.8237,
      "step": 385
    },
    {
      "epoch": 0.07935039572412375,
      "grad_norm": 0.30833980441093445,
      "learning_rate": 7.93150684931507e-05,
      "loss": 0.64,
      "step": 386
    },
    {
      "epoch": 0.07955596669750231,
      "grad_norm": 0.43092408776283264,
      "learning_rate": 7.952054794520548e-05,
      "loss": 0.8138,
      "step": 387
    },
    {
      "epoch": 0.07976153767088087,
      "grad_norm": 0.26460933685302734,
      "learning_rate": 7.972602739726027e-05,
      "loss": 0.6153,
      "step": 388
    },
    {
      "epoch": 0.07996710864425943,
      "grad_norm": 0.4149387776851654,
      "learning_rate": 7.993150684931507e-05,
      "loss": 0.7809,
      "step": 389
    },
    {
      "epoch": 0.08017267961763799,
      "grad_norm": 0.35397103428840637,
      "learning_rate": 8.013698630136987e-05,
      "loss": 0.8249,
      "step": 390
    },
    {
      "epoch": 0.08037825059101655,
      "grad_norm": 0.34258702397346497,
      "learning_rate": 8.034246575342466e-05,
      "loss": 0.8259,
      "step": 391
    },
    {
      "epoch": 0.08058382156439511,
      "grad_norm": 0.3488398790359497,
      "learning_rate": 8.054794520547945e-05,
      "loss": 0.7772,
      "step": 392
    },
    {
      "epoch": 0.08078939253777366,
      "grad_norm": 0.3264416456222534,
      "learning_rate": 8.075342465753425e-05,
      "loss": 0.7751,
      "step": 393
    },
    {
      "epoch": 0.08099496351115222,
      "grad_norm": 0.3270927965641022,
      "learning_rate": 8.095890410958904e-05,
      "loss": 0.7992,
      "step": 394
    },
    {
      "epoch": 0.08120053448453078,
      "grad_norm": 0.2641488313674927,
      "learning_rate": 8.116438356164384e-05,
      "loss": 0.6224,
      "step": 395
    },
    {
      "epoch": 0.08140610545790934,
      "grad_norm": 0.3740901052951813,
      "learning_rate": 8.136986301369863e-05,
      "loss": 0.8118,
      "step": 396
    },
    {
      "epoch": 0.0816116764312879,
      "grad_norm": 0.328571081161499,
      "learning_rate": 8.157534246575343e-05,
      "loss": 0.7969,
      "step": 397
    },
    {
      "epoch": 0.08181724740466646,
      "grad_norm": 0.2278534322977066,
      "learning_rate": 8.178082191780822e-05,
      "loss": 0.6215,
      "step": 398
    },
    {
      "epoch": 0.08202281837804502,
      "grad_norm": 0.3593691885471344,
      "learning_rate": 8.198630136986302e-05,
      "loss": 0.7949,
      "step": 399
    },
    {
      "epoch": 0.08222838935142358,
      "grad_norm": 0.3530971109867096,
      "learning_rate": 8.219178082191781e-05,
      "loss": 0.8042,
      "step": 400
    },
    {
      "epoch": 0.08243396032480214,
      "grad_norm": 0.17606891691684723,
      "learning_rate": 8.239726027397261e-05,
      "loss": 0.638,
      "step": 401
    },
    {
      "epoch": 0.08263953129818069,
      "grad_norm": 0.1690833419561386,
      "learning_rate": 8.26027397260274e-05,
      "loss": 0.5895,
      "step": 402
    },
    {
      "epoch": 0.08284510227155925,
      "grad_norm": 0.17045153677463531,
      "learning_rate": 8.280821917808219e-05,
      "loss": 0.5924,
      "step": 403
    },
    {
      "epoch": 0.08305067324493781,
      "grad_norm": 0.5894138813018799,
      "learning_rate": 8.301369863013699e-05,
      "loss": 0.8156,
      "step": 404
    },
    {
      "epoch": 0.08325624421831637,
      "grad_norm": 0.3428020477294922,
      "learning_rate": 8.321917808219179e-05,
      "loss": 0.8131,
      "step": 405
    },
    {
      "epoch": 0.08346181519169493,
      "grad_norm": 0.4333934783935547,
      "learning_rate": 8.342465753424658e-05,
      "loss": 0.8106,
      "step": 406
    },
    {
      "epoch": 0.08366738616507349,
      "grad_norm": 0.4093782901763916,
      "learning_rate": 8.363013698630137e-05,
      "loss": 0.8158,
      "step": 407
    },
    {
      "epoch": 0.08387295713845205,
      "grad_norm": 0.3554767668247223,
      "learning_rate": 8.383561643835617e-05,
      "loss": 0.805,
      "step": 408
    },
    {
      "epoch": 0.08407852811183061,
      "grad_norm": 0.35396429896354675,
      "learning_rate": 8.404109589041097e-05,
      "loss": 0.787,
      "step": 409
    },
    {
      "epoch": 0.08428409908520917,
      "grad_norm": 0.36389169096946716,
      "learning_rate": 8.424657534246576e-05,
      "loss": 0.8378,
      "step": 410
    },
    {
      "epoch": 0.08448967005858772,
      "grad_norm": 0.3563280999660492,
      "learning_rate": 8.445205479452055e-05,
      "loss": 0.7844,
      "step": 411
    },
    {
      "epoch": 0.08469524103196628,
      "grad_norm": 0.340190589427948,
      "learning_rate": 8.465753424657534e-05,
      "loss": 0.8288,
      "step": 412
    },
    {
      "epoch": 0.08490081200534484,
      "grad_norm": 0.2419368475675583,
      "learning_rate": 8.486301369863015e-05,
      "loss": 0.6281,
      "step": 413
    },
    {
      "epoch": 0.0851063829787234,
      "grad_norm": 0.37181293964385986,
      "learning_rate": 8.506849315068494e-05,
      "loss": 0.8016,
      "step": 414
    },
    {
      "epoch": 0.08531195395210196,
      "grad_norm": 0.34155288338661194,
      "learning_rate": 8.527397260273973e-05,
      "loss": 0.7963,
      "step": 415
    },
    {
      "epoch": 0.08551752492548052,
      "grad_norm": 0.3259139358997345,
      "learning_rate": 8.547945205479452e-05,
      "loss": 0.8013,
      "step": 416
    },
    {
      "epoch": 0.08572309589885908,
      "grad_norm": 0.3541535437107086,
      "learning_rate": 8.568493150684932e-05,
      "loss": 0.7988,
      "step": 417
    },
    {
      "epoch": 0.08592866687223764,
      "grad_norm": 0.20659230649471283,
      "learning_rate": 8.589041095890412e-05,
      "loss": 0.6026,
      "step": 418
    },
    {
      "epoch": 0.0861342378456162,
      "grad_norm": 0.1695416420698166,
      "learning_rate": 8.609589041095891e-05,
      "loss": 0.5905,
      "step": 419
    },
    {
      "epoch": 0.08633980881899475,
      "grad_norm": 0.48443859815597534,
      "learning_rate": 8.63013698630137e-05,
      "loss": 0.8179,
      "step": 420
    },
    {
      "epoch": 0.08654537979237331,
      "grad_norm": 0.33505165576934814,
      "learning_rate": 8.65068493150685e-05,
      "loss": 0.7979,
      "step": 421
    },
    {
      "epoch": 0.08675095076575187,
      "grad_norm": 0.19388127326965332,
      "learning_rate": 8.67123287671233e-05,
      "loss": 0.6141,
      "step": 422
    },
    {
      "epoch": 0.08695652173913043,
      "grad_norm": 0.19659045338630676,
      "learning_rate": 8.691780821917809e-05,
      "loss": 0.5968,
      "step": 423
    },
    {
      "epoch": 0.08716209271250899,
      "grad_norm": 0.5674632787704468,
      "learning_rate": 8.712328767123288e-05,
      "loss": 0.8258,
      "step": 424
    },
    {
      "epoch": 0.08736766368588755,
      "grad_norm": 0.17561140656471252,
      "learning_rate": 8.732876712328768e-05,
      "loss": 0.5972,
      "step": 425
    },
    {
      "epoch": 0.08757323465926611,
      "grad_norm": 0.48669886589050293,
      "learning_rate": 8.753424657534247e-05,
      "loss": 0.7975,
      "step": 426
    },
    {
      "epoch": 0.08777880563264467,
      "grad_norm": 0.3487796187400818,
      "learning_rate": 8.773972602739727e-05,
      "loss": 0.7713,
      "step": 427
    },
    {
      "epoch": 0.08798437660602323,
      "grad_norm": 0.3712750971317291,
      "learning_rate": 8.794520547945206e-05,
      "loss": 0.7665,
      "step": 428
    },
    {
      "epoch": 0.0881899475794018,
      "grad_norm": 0.23141850531101227,
      "learning_rate": 8.815068493150686e-05,
      "loss": 0.6171,
      "step": 429
    },
    {
      "epoch": 0.08839551855278034,
      "grad_norm": 0.43884536623954773,
      "learning_rate": 8.835616438356165e-05,
      "loss": 0.7922,
      "step": 430
    },
    {
      "epoch": 0.0886010895261589,
      "grad_norm": 0.17824266850948334,
      "learning_rate": 8.856164383561645e-05,
      "loss": 0.616,
      "step": 431
    },
    {
      "epoch": 0.08880666049953746,
      "grad_norm": 0.4101521670818329,
      "learning_rate": 8.876712328767124e-05,
      "loss": 0.8083,
      "step": 432
    },
    {
      "epoch": 0.08901223147291602,
      "grad_norm": 0.3446323275566101,
      "learning_rate": 8.897260273972604e-05,
      "loss": 0.813,
      "step": 433
    },
    {
      "epoch": 0.08921780244629458,
      "grad_norm": 0.17695310711860657,
      "learning_rate": 8.917808219178083e-05,
      "loss": 0.5855,
      "step": 434
    },
    {
      "epoch": 0.08942337341967314,
      "grad_norm": 0.41505882143974304,
      "learning_rate": 8.938356164383561e-05,
      "loss": 0.7966,
      "step": 435
    },
    {
      "epoch": 0.0896289443930517,
      "grad_norm": 0.3373473286628723,
      "learning_rate": 8.958904109589042e-05,
      "loss": 0.7981,
      "step": 436
    },
    {
      "epoch": 0.08983451536643026,
      "grad_norm": 0.1881159394979477,
      "learning_rate": 8.979452054794522e-05,
      "loss": 0.5907,
      "step": 437
    },
    {
      "epoch": 0.09004008633980883,
      "grad_norm": 0.570391058921814,
      "learning_rate": 9e-05,
      "loss": 0.8141,
      "step": 438
    },
    {
      "epoch": 0.09024565731318737,
      "grad_norm": 0.34099552035331726,
      "learning_rate": 8.999999889153016e-05,
      "loss": 0.7716,
      "step": 439
    },
    {
      "epoch": 0.09045122828656593,
      "grad_norm": 0.4682377576828003,
      "learning_rate": 8.999999556612072e-05,
      "loss": 0.8084,
      "step": 440
    },
    {
      "epoch": 0.09065679925994449,
      "grad_norm": 0.36160755157470703,
      "learning_rate": 8.999999002377183e-05,
      "loss": 0.7883,
      "step": 441
    },
    {
      "epoch": 0.09086237023332305,
      "grad_norm": 0.42005038261413574,
      "learning_rate": 8.999998226448373e-05,
      "loss": 0.794,
      "step": 442
    },
    {
      "epoch": 0.09106794120670161,
      "grad_norm": 0.32100972533226013,
      "learning_rate": 8.999997228825685e-05,
      "loss": 0.7767,
      "step": 443
    },
    {
      "epoch": 0.09127351218008017,
      "grad_norm": 0.35609909892082214,
      "learning_rate": 8.999996009509166e-05,
      "loss": 0.7735,
      "step": 444
    },
    {
      "epoch": 0.09147908315345873,
      "grad_norm": 0.3225650191307068,
      "learning_rate": 8.999994568498878e-05,
      "loss": 0.7805,
      "step": 445
    },
    {
      "epoch": 0.0916846541268373,
      "grad_norm": 0.5321671962738037,
      "learning_rate": 8.999992905794889e-05,
      "loss": 0.8085,
      "step": 446
    },
    {
      "epoch": 0.09189022510021586,
      "grad_norm": 0.22884899377822876,
      "learning_rate": 8.999991021397283e-05,
      "loss": 0.6043,
      "step": 447
    },
    {
      "epoch": 0.0920957960735944,
      "grad_norm": 0.4308418333530426,
      "learning_rate": 8.999988915306154e-05,
      "loss": 0.7922,
      "step": 448
    },
    {
      "epoch": 0.09230136704697296,
      "grad_norm": 0.33842045068740845,
      "learning_rate": 8.999986587521601e-05,
      "loss": 0.8081,
      "step": 449
    },
    {
      "epoch": 0.09250693802035152,
      "grad_norm": 0.18722039461135864,
      "learning_rate": 8.999984038043744e-05,
      "loss": 0.5795,
      "step": 450
    },
    {
      "epoch": 0.09271250899373008,
      "grad_norm": 0.4215300679206848,
      "learning_rate": 8.999981266872705e-05,
      "loss": 0.7982,
      "step": 451
    },
    {
      "epoch": 0.09291807996710864,
      "grad_norm": 0.16856899857521057,
      "learning_rate": 8.999978274008622e-05,
      "loss": 0.5915,
      "step": 452
    },
    {
      "epoch": 0.0931236509404872,
      "grad_norm": 0.40007540583610535,
      "learning_rate": 8.999975059451644e-05,
      "loss": 0.7934,
      "step": 453
    },
    {
      "epoch": 0.09332922191386576,
      "grad_norm": 0.3234069049358368,
      "learning_rate": 8.999971623201925e-05,
      "loss": 0.7963,
      "step": 454
    },
    {
      "epoch": 0.09353479288724433,
      "grad_norm": 0.33642691373825073,
      "learning_rate": 8.999967965259639e-05,
      "loss": 0.7909,
      "step": 455
    },
    {
      "epoch": 0.09374036386062289,
      "grad_norm": 0.33508196473121643,
      "learning_rate": 8.999964085624962e-05,
      "loss": 0.7777,
      "step": 456
    },
    {
      "epoch": 0.09394593483400143,
      "grad_norm": 0.2953488826751709,
      "learning_rate": 8.999959984298089e-05,
      "loss": 0.7596,
      "step": 457
    },
    {
      "epoch": 0.09415150580737999,
      "grad_norm": 0.32082295417785645,
      "learning_rate": 8.99995566127922e-05,
      "loss": 0.7774,
      "step": 458
    },
    {
      "epoch": 0.09435707678075855,
      "grad_norm": 0.31374961137771606,
      "learning_rate": 8.999951116568568e-05,
      "loss": 0.7898,
      "step": 459
    },
    {
      "epoch": 0.09456264775413711,
      "grad_norm": 0.29701462388038635,
      "learning_rate": 8.999946350166357e-05,
      "loss": 0.7725,
      "step": 460
    },
    {
      "epoch": 0.09476821872751567,
      "grad_norm": 0.3302834630012512,
      "learning_rate": 8.999941362072822e-05,
      "loss": 0.7727,
      "step": 461
    },
    {
      "epoch": 0.09497378970089423,
      "grad_norm": 0.28933510184288025,
      "learning_rate": 8.99993615228821e-05,
      "loss": 0.8082,
      "step": 462
    },
    {
      "epoch": 0.0951793606742728,
      "grad_norm": 0.28469645977020264,
      "learning_rate": 8.999930720812776e-05,
      "loss": 0.78,
      "step": 463
    },
    {
      "epoch": 0.09538493164765136,
      "grad_norm": 0.30801114439964294,
      "learning_rate": 8.999925067646787e-05,
      "loss": 0.8154,
      "step": 464
    },
    {
      "epoch": 0.09559050262102992,
      "grad_norm": 0.28879374265670776,
      "learning_rate": 8.999919192790524e-05,
      "loss": 0.6174,
      "step": 465
    },
    {
      "epoch": 0.09579607359440848,
      "grad_norm": 0.35134953260421753,
      "learning_rate": 8.999913096244273e-05,
      "loss": 0.7819,
      "step": 466
    },
    {
      "epoch": 0.09600164456778702,
      "grad_norm": 0.31098031997680664,
      "learning_rate": 8.999906778008339e-05,
      "loss": 0.7876,
      "step": 467
    },
    {
      "epoch": 0.09620721554116558,
      "grad_norm": 0.31209641695022583,
      "learning_rate": 8.999900238083028e-05,
      "loss": 0.823,
      "step": 468
    },
    {
      "epoch": 0.09641278651454414,
      "grad_norm": 0.3438270688056946,
      "learning_rate": 8.999893476468666e-05,
      "loss": 0.7994,
      "step": 469
    },
    {
      "epoch": 0.0966183574879227,
      "grad_norm": 0.303815096616745,
      "learning_rate": 8.999886493165584e-05,
      "loss": 0.8183,
      "step": 470
    },
    {
      "epoch": 0.09682392846130126,
      "grad_norm": 0.31640782952308655,
      "learning_rate": 8.999879288174128e-05,
      "loss": 0.7947,
      "step": 471
    },
    {
      "epoch": 0.09702949943467983,
      "grad_norm": 0.31044483184814453,
      "learning_rate": 8.999871861494651e-05,
      "loss": 0.7867,
      "step": 472
    },
    {
      "epoch": 0.09723507040805839,
      "grad_norm": 0.3066295385360718,
      "learning_rate": 8.999864213127521e-05,
      "loss": 0.782,
      "step": 473
    },
    {
      "epoch": 0.09744064138143695,
      "grad_norm": 0.32025477290153503,
      "learning_rate": 8.999856343073111e-05,
      "loss": 0.7756,
      "step": 474
    },
    {
      "epoch": 0.0976462123548155,
      "grad_norm": 0.3043205440044403,
      "learning_rate": 8.999848251331813e-05,
      "loss": 0.8049,
      "step": 475
    },
    {
      "epoch": 0.09785178332819405,
      "grad_norm": 0.3142707943916321,
      "learning_rate": 8.999839937904024e-05,
      "loss": 0.7967,
      "step": 476
    },
    {
      "epoch": 0.09805735430157261,
      "grad_norm": 0.2932131886482239,
      "learning_rate": 8.999831402790153e-05,
      "loss": 0.8031,
      "step": 477
    },
    {
      "epoch": 0.09826292527495117,
      "grad_norm": 0.30467313528060913,
      "learning_rate": 8.999822645990621e-05,
      "loss": 0.7804,
      "step": 478
    },
    {
      "epoch": 0.09846849624832973,
      "grad_norm": 0.2950557768344879,
      "learning_rate": 8.99981366750586e-05,
      "loss": 0.8004,
      "step": 479
    },
    {
      "epoch": 0.0986740672217083,
      "grad_norm": 0.2995617091655731,
      "learning_rate": 8.99980446733631e-05,
      "loss": 0.8044,
      "step": 480
    },
    {
      "epoch": 0.09887963819508686,
      "grad_norm": 0.29080766439437866,
      "learning_rate": 8.999795045482429e-05,
      "loss": 0.7603,
      "step": 481
    },
    {
      "epoch": 0.09908520916846542,
      "grad_norm": 0.29487237334251404,
      "learning_rate": 8.999785401944675e-05,
      "loss": 0.8036,
      "step": 482
    },
    {
      "epoch": 0.09929078014184398,
      "grad_norm": 0.30198103189468384,
      "learning_rate": 8.999775536723527e-05,
      "loss": 0.7993,
      "step": 483
    },
    {
      "epoch": 0.09949635111522254,
      "grad_norm": 0.30626240372657776,
      "learning_rate": 8.999765449819471e-05,
      "loss": 0.7928,
      "step": 484
    },
    {
      "epoch": 0.09970192208860108,
      "grad_norm": 0.3268794119358063,
      "learning_rate": 8.999755141233002e-05,
      "loss": 0.7797,
      "step": 485
    },
    {
      "epoch": 0.09990749306197964,
      "grad_norm": 0.41261476278305054,
      "learning_rate": 8.99974461096463e-05,
      "loss": 0.628,
      "step": 486
    },
    {
      "epoch": 0.1001130640353582,
      "grad_norm": 0.2068365067243576,
      "learning_rate": 8.999733859014873e-05,
      "loss": 0.6014,
      "step": 487
    },
    {
      "epoch": 0.10031863500873676,
      "grad_norm": 0.6694285869598389,
      "learning_rate": 8.99972288538426e-05,
      "loss": 0.8168,
      "step": 488
    },
    {
      "epoch": 0.10052420598211532,
      "grad_norm": 0.3849710524082184,
      "learning_rate": 8.999711690073331e-05,
      "loss": 0.7958,
      "step": 489
    },
    {
      "epoch": 0.10072977695549389,
      "grad_norm": 0.4657621383666992,
      "learning_rate": 8.99970027308264e-05,
      "loss": 0.7877,
      "step": 490
    },
    {
      "epoch": 0.10093534792887245,
      "grad_norm": 0.3709288537502289,
      "learning_rate": 8.999688634412747e-05,
      "loss": 0.781,
      "step": 491
    },
    {
      "epoch": 0.101140918902251,
      "grad_norm": 0.3850356340408325,
      "learning_rate": 8.999676774064228e-05,
      "loss": 0.7822,
      "step": 492
    },
    {
      "epoch": 0.10134648987562957,
      "grad_norm": 0.32711490988731384,
      "learning_rate": 8.999664692037665e-05,
      "loss": 0.7903,
      "step": 493
    },
    {
      "epoch": 0.10155206084900811,
      "grad_norm": 0.35332190990448,
      "learning_rate": 8.999652388333654e-05,
      "loss": 0.7746,
      "step": 494
    },
    {
      "epoch": 0.10175763182238667,
      "grad_norm": 0.6354550719261169,
      "learning_rate": 8.999639862952801e-05,
      "loss": 0.6377,
      "step": 495
    },
    {
      "epoch": 0.10196320279576523,
      "grad_norm": 0.4530143737792969,
      "learning_rate": 8.999627115895724e-05,
      "loss": 0.8012,
      "step": 496
    },
    {
      "epoch": 0.1021687737691438,
      "grad_norm": 0.38917437195777893,
      "learning_rate": 8.99961414716305e-05,
      "loss": 0.7772,
      "step": 497
    },
    {
      "epoch": 0.10237434474252236,
      "grad_norm": 0.3817954361438751,
      "learning_rate": 8.999600956755417e-05,
      "loss": 0.769,
      "step": 498
    },
    {
      "epoch": 0.10257991571590092,
      "grad_norm": 0.3404269814491272,
      "learning_rate": 8.999587544673475e-05,
      "loss": 0.7832,
      "step": 499
    },
    {
      "epoch": 0.10278548668927948,
      "grad_norm": 0.29421180486679077,
      "learning_rate": 8.99957391091789e-05,
      "loss": 0.6173,
      "step": 500
    },
    {
      "epoch": 0.10299105766265804,
      "grad_norm": 0.4653105139732361,
      "learning_rate": 8.999560055489324e-05,
      "loss": 0.7835,
      "step": 501
    },
    {
      "epoch": 0.1031966286360366,
      "grad_norm": 0.3839401304721832,
      "learning_rate": 8.99954597838847e-05,
      "loss": 0.7978,
      "step": 502
    },
    {
      "epoch": 0.10340219960941516,
      "grad_norm": 0.3156857192516327,
      "learning_rate": 8.999531679616013e-05,
      "loss": 0.7589,
      "step": 503
    },
    {
      "epoch": 0.1036077705827937,
      "grad_norm": 0.3422304391860962,
      "learning_rate": 8.999517159172662e-05,
      "loss": 0.7809,
      "step": 504
    },
    {
      "epoch": 0.10381334155617226,
      "grad_norm": 0.340270072221756,
      "learning_rate": 8.999502417059132e-05,
      "loss": 0.7981,
      "step": 505
    },
    {
      "epoch": 0.10401891252955082,
      "grad_norm": 0.30371013283729553,
      "learning_rate": 8.999487453276148e-05,
      "loss": 0.7967,
      "step": 506
    },
    {
      "epoch": 0.10422448350292939,
      "grad_norm": 0.2999022901058197,
      "learning_rate": 8.999472267824447e-05,
      "loss": 0.7964,
      "step": 507
    },
    {
      "epoch": 0.10443005447630795,
      "grad_norm": 0.3306732475757599,
      "learning_rate": 8.999456860704778e-05,
      "loss": 0.7903,
      "step": 508
    },
    {
      "epoch": 0.1046356254496865,
      "grad_norm": 0.3183232843875885,
      "learning_rate": 8.999441231917901e-05,
      "loss": 0.7773,
      "step": 509
    },
    {
      "epoch": 0.10484119642306507,
      "grad_norm": 0.29510068893432617,
      "learning_rate": 8.999425381464582e-05,
      "loss": 0.7812,
      "step": 510
    },
    {
      "epoch": 0.10504676739644363,
      "grad_norm": 0.30512964725494385,
      "learning_rate": 8.999409309345609e-05,
      "loss": 0.8054,
      "step": 511
    },
    {
      "epoch": 0.10525233836982219,
      "grad_norm": 0.30337393283843994,
      "learning_rate": 8.999393015561767e-05,
      "loss": 0.767,
      "step": 512
    },
    {
      "epoch": 0.10545790934320073,
      "grad_norm": 0.32128670811653137,
      "learning_rate": 8.999376500113861e-05,
      "loss": 0.7576,
      "step": 513
    },
    {
      "epoch": 0.1056634803165793,
      "grad_norm": 0.22419625520706177,
      "learning_rate": 8.999359763002704e-05,
      "loss": 0.6232,
      "step": 514
    },
    {
      "epoch": 0.10586905128995786,
      "grad_norm": 0.35744601488113403,
      "learning_rate": 8.999342804229125e-05,
      "loss": 0.7999,
      "step": 515
    },
    {
      "epoch": 0.10607462226333642,
      "grad_norm": 0.31676504015922546,
      "learning_rate": 8.999325623793952e-05,
      "loss": 0.7892,
      "step": 516
    },
    {
      "epoch": 0.10628019323671498,
      "grad_norm": 0.3098521828651428,
      "learning_rate": 8.999308221698038e-05,
      "loss": 0.7892,
      "step": 517
    },
    {
      "epoch": 0.10648576421009354,
      "grad_norm": 0.32372260093688965,
      "learning_rate": 8.999290597942237e-05,
      "loss": 0.7697,
      "step": 518
    },
    {
      "epoch": 0.1066913351834721,
      "grad_norm": 0.3482767343521118,
      "learning_rate": 8.999272752527417e-05,
      "loss": 0.8299,
      "step": 519
    },
    {
      "epoch": 0.10689690615685066,
      "grad_norm": 0.17404678463935852,
      "learning_rate": 8.999254685454459e-05,
      "loss": 0.5814,
      "step": 520
    },
    {
      "epoch": 0.10710247713022922,
      "grad_norm": 0.36048364639282227,
      "learning_rate": 8.999236396724252e-05,
      "loss": 0.7881,
      "step": 521
    },
    {
      "epoch": 0.10730804810360776,
      "grad_norm": 0.30838942527770996,
      "learning_rate": 8.999217886337696e-05,
      "loss": 0.7818,
      "step": 522
    },
    {
      "epoch": 0.10751361907698632,
      "grad_norm": 0.3079747259616852,
      "learning_rate": 8.999199154295705e-05,
      "loss": 0.7732,
      "step": 523
    },
    {
      "epoch": 0.10771919005036489,
      "grad_norm": 0.3467218577861786,
      "learning_rate": 8.9991802005992e-05,
      "loss": 0.7969,
      "step": 524
    },
    {
      "epoch": 0.10792476102374345,
      "grad_norm": 0.29866865277290344,
      "learning_rate": 8.999161025249117e-05,
      "loss": 0.7996,
      "step": 525
    },
    {
      "epoch": 0.108130331997122,
      "grad_norm": 0.17642079293727875,
      "learning_rate": 8.999141628246398e-05,
      "loss": 0.5753,
      "step": 526
    },
    {
      "epoch": 0.10833590297050057,
      "grad_norm": 0.3251280188560486,
      "learning_rate": 8.999122009592002e-05,
      "loss": 0.7962,
      "step": 527
    },
    {
      "epoch": 0.10854147394387913,
      "grad_norm": 0.316807359457016,
      "learning_rate": 8.999102169286891e-05,
      "loss": 0.7592,
      "step": 528
    },
    {
      "epoch": 0.10874704491725769,
      "grad_norm": 0.16698336601257324,
      "learning_rate": 8.999082107332046e-05,
      "loss": 0.5955,
      "step": 529
    },
    {
      "epoch": 0.10895261589063625,
      "grad_norm": 0.30919867753982544,
      "learning_rate": 8.999061823728455e-05,
      "loss": 0.7481,
      "step": 530
    },
    {
      "epoch": 0.1091581868640148,
      "grad_norm": 0.2959042489528656,
      "learning_rate": 8.999041318477114e-05,
      "loss": 0.7795,
      "step": 531
    },
    {
      "epoch": 0.10936375783739335,
      "grad_norm": 0.15893301367759705,
      "learning_rate": 8.999020591579038e-05,
      "loss": 0.5953,
      "step": 532
    },
    {
      "epoch": 0.10956932881077192,
      "grad_norm": 0.16407330334186554,
      "learning_rate": 8.998999643035244e-05,
      "loss": 0.5873,
      "step": 533
    },
    {
      "epoch": 0.10977489978415048,
      "grad_norm": 0.3498159348964691,
      "learning_rate": 8.998978472846768e-05,
      "loss": 0.7825,
      "step": 534
    },
    {
      "epoch": 0.10998047075752904,
      "grad_norm": 0.3068999946117401,
      "learning_rate": 8.99895708101465e-05,
      "loss": 0.8112,
      "step": 535
    },
    {
      "epoch": 0.1101860417309076,
      "grad_norm": 0.28588443994522095,
      "learning_rate": 8.998935467539944e-05,
      "loss": 0.7778,
      "step": 536
    },
    {
      "epoch": 0.11039161270428616,
      "grad_norm": 0.31996187567710876,
      "learning_rate": 8.998913632423716e-05,
      "loss": 0.7736,
      "step": 537
    },
    {
      "epoch": 0.11059718367766472,
      "grad_norm": 0.3105761408805847,
      "learning_rate": 8.998891575667041e-05,
      "loss": 0.7683,
      "step": 538
    },
    {
      "epoch": 0.11080275465104328,
      "grad_norm": 0.3134320378303528,
      "learning_rate": 8.998869297271006e-05,
      "loss": 0.7877,
      "step": 539
    },
    {
      "epoch": 0.11100832562442182,
      "grad_norm": 0.2837049067020416,
      "learning_rate": 8.998846797236708e-05,
      "loss": 0.7664,
      "step": 540
    },
    {
      "epoch": 0.11121389659780039,
      "grad_norm": 0.2891695499420166,
      "learning_rate": 8.998824075565258e-05,
      "loss": 0.7862,
      "step": 541
    },
    {
      "epoch": 0.11141946757117895,
      "grad_norm": 0.2949972450733185,
      "learning_rate": 8.99880113225777e-05,
      "loss": 0.7551,
      "step": 542
    },
    {
      "epoch": 0.1116250385445575,
      "grad_norm": 0.2788076400756836,
      "learning_rate": 8.99877796731538e-05,
      "loss": 0.7657,
      "step": 543
    },
    {
      "epoch": 0.11183060951793607,
      "grad_norm": 0.237320676445961,
      "learning_rate": 8.998754580739225e-05,
      "loss": 0.6081,
      "step": 544
    },
    {
      "epoch": 0.11203618049131463,
      "grad_norm": 0.3368750810623169,
      "learning_rate": 8.99873097253046e-05,
      "loss": 0.7962,
      "step": 545
    },
    {
      "epoch": 0.11224175146469319,
      "grad_norm": 0.16897863149642944,
      "learning_rate": 8.998707142690247e-05,
      "loss": 0.5933,
      "step": 546
    },
    {
      "epoch": 0.11244732243807175,
      "grad_norm": 0.31463444232940674,
      "learning_rate": 8.99868309121976e-05,
      "loss": 0.778,
      "step": 547
    },
    {
      "epoch": 0.11265289341145031,
      "grad_norm": 0.28116437792778015,
      "learning_rate": 8.998658818120184e-05,
      "loss": 0.7677,
      "step": 548
    },
    {
      "epoch": 0.11285846438482887,
      "grad_norm": 0.2780570685863495,
      "learning_rate": 8.998634323392714e-05,
      "loss": 0.7736,
      "step": 549
    },
    {
      "epoch": 0.11306403535820742,
      "grad_norm": 0.18777993321418762,
      "learning_rate": 8.998609607038558e-05,
      "loss": 0.5928,
      "step": 550
    },
    {
      "epoch": 0.11326960633158598,
      "grad_norm": 0.3512813150882721,
      "learning_rate": 8.998584669058933e-05,
      "loss": 0.7971,
      "step": 551
    },
    {
      "epoch": 0.11347517730496454,
      "grad_norm": 0.1571076214313507,
      "learning_rate": 8.998559509455066e-05,
      "loss": 0.6026,
      "step": 552
    },
    {
      "epoch": 0.1136807482783431,
      "grad_norm": 0.1699524074792862,
      "learning_rate": 8.9985341282282e-05,
      "loss": 0.5835,
      "step": 553
    },
    {
      "epoch": 0.11388631925172166,
      "grad_norm": 0.38411441445350647,
      "learning_rate": 8.998508525379584e-05,
      "loss": 0.7829,
      "step": 554
    },
    {
      "epoch": 0.11409189022510022,
      "grad_norm": 0.2952065169811249,
      "learning_rate": 8.998482700910478e-05,
      "loss": 0.7878,
      "step": 555
    },
    {
      "epoch": 0.11429746119847878,
      "grad_norm": 0.3076973557472229,
      "learning_rate": 8.998456654822156e-05,
      "loss": 0.7988,
      "step": 556
    },
    {
      "epoch": 0.11450303217185734,
      "grad_norm": 0.30433389544487,
      "learning_rate": 8.9984303871159e-05,
      "loss": 0.78,
      "step": 557
    },
    {
      "epoch": 0.1147086031452359,
      "grad_norm": 0.30562445521354675,
      "learning_rate": 8.998403897793004e-05,
      "loss": 0.7832,
      "step": 558
    },
    {
      "epoch": 0.11491417411861445,
      "grad_norm": 0.3120015561580658,
      "learning_rate": 8.998377186854774e-05,
      "loss": 0.7989,
      "step": 559
    },
    {
      "epoch": 0.115119745091993,
      "grad_norm": 0.26990431547164917,
      "learning_rate": 8.998350254302524e-05,
      "loss": 0.7471,
      "step": 560
    },
    {
      "epoch": 0.11532531606537157,
      "grad_norm": 0.2938286364078522,
      "learning_rate": 8.998323100137585e-05,
      "loss": 0.7667,
      "step": 561
    },
    {
      "epoch": 0.11553088703875013,
      "grad_norm": 0.32502278685569763,
      "learning_rate": 8.998295724361289e-05,
      "loss": 0.7618,
      "step": 562
    },
    {
      "epoch": 0.11573645801212869,
      "grad_norm": 0.296321839094162,
      "learning_rate": 8.998268126974988e-05,
      "loss": 0.7828,
      "step": 563
    },
    {
      "epoch": 0.11594202898550725,
      "grad_norm": 0.30217137932777405,
      "learning_rate": 8.998240307980042e-05,
      "loss": 0.765,
      "step": 564
    },
    {
      "epoch": 0.11614759995888581,
      "grad_norm": 0.2876279950141907,
      "learning_rate": 8.998212267377822e-05,
      "loss": 0.7687,
      "step": 565
    },
    {
      "epoch": 0.11635317093226437,
      "grad_norm": 0.2792581021785736,
      "learning_rate": 8.998184005169706e-05,
      "loss": 0.785,
      "step": 566
    },
    {
      "epoch": 0.11655874190564293,
      "grad_norm": 0.28941112756729126,
      "learning_rate": 8.99815552135709e-05,
      "loss": 0.7732,
      "step": 567
    },
    {
      "epoch": 0.11676431287902148,
      "grad_norm": 0.28016045689582825,
      "learning_rate": 8.998126815941376e-05,
      "loss": 0.8033,
      "step": 568
    },
    {
      "epoch": 0.11696988385240004,
      "grad_norm": 0.27612999081611633,
      "learning_rate": 8.998097888923977e-05,
      "loss": 0.7811,
      "step": 569
    },
    {
      "epoch": 0.1171754548257786,
      "grad_norm": 0.2725747525691986,
      "learning_rate": 8.99806874030632e-05,
      "loss": 0.7426,
      "step": 570
    },
    {
      "epoch": 0.11738102579915716,
      "grad_norm": 0.23188281059265137,
      "learning_rate": 8.998039370089838e-05,
      "loss": 0.6119,
      "step": 571
    },
    {
      "epoch": 0.11758659677253572,
      "grad_norm": 0.329795777797699,
      "learning_rate": 8.998009778275982e-05,
      "loss": 0.7774,
      "step": 572
    },
    {
      "epoch": 0.11779216774591428,
      "grad_norm": 0.292244017124176,
      "learning_rate": 8.997979964866208e-05,
      "loss": 0.7684,
      "step": 573
    },
    {
      "epoch": 0.11799773871929284,
      "grad_norm": 0.2874715030193329,
      "learning_rate": 8.997949929861984e-05,
      "loss": 0.7606,
      "step": 574
    },
    {
      "epoch": 0.1182033096926714,
      "grad_norm": 0.3013349175453186,
      "learning_rate": 8.99791967326479e-05,
      "loss": 0.7686,
      "step": 575
    },
    {
      "epoch": 0.11840888066604996,
      "grad_norm": 0.2986513674259186,
      "learning_rate": 8.997889195076117e-05,
      "loss": 0.7651,
      "step": 576
    },
    {
      "epoch": 0.1186144516394285,
      "grad_norm": 0.2857048809528351,
      "learning_rate": 8.997858495297467e-05,
      "loss": 0.7875,
      "step": 577
    },
    {
      "epoch": 0.11882002261280707,
      "grad_norm": 0.27221107482910156,
      "learning_rate": 8.997827573930351e-05,
      "loss": 0.785,
      "step": 578
    },
    {
      "epoch": 0.11902559358618563,
      "grad_norm": 0.29440751671791077,
      "learning_rate": 8.997796430976294e-05,
      "loss": 0.7703,
      "step": 579
    },
    {
      "epoch": 0.11923116455956419,
      "grad_norm": 0.28240329027175903,
      "learning_rate": 8.99776506643683e-05,
      "loss": 0.7901,
      "step": 580
    },
    {
      "epoch": 0.11943673553294275,
      "grad_norm": 0.27463993430137634,
      "learning_rate": 8.997733480313503e-05,
      "loss": 0.7616,
      "step": 581
    },
    {
      "epoch": 0.11964230650632131,
      "grad_norm": 0.2833562195301056,
      "learning_rate": 8.99770167260787e-05,
      "loss": 0.7512,
      "step": 582
    },
    {
      "epoch": 0.11984787747969987,
      "grad_norm": 0.22366029024124146,
      "learning_rate": 8.997669643321496e-05,
      "loss": 0.6235,
      "step": 583
    },
    {
      "epoch": 0.12005344845307843,
      "grad_norm": 0.17241071164608002,
      "learning_rate": 8.997637392455963e-05,
      "loss": 0.5989,
      "step": 584
    },
    {
      "epoch": 0.12025901942645699,
      "grad_norm": 0.15749235451221466,
      "learning_rate": 8.997604920012856e-05,
      "loss": 0.5973,
      "step": 585
    },
    {
      "epoch": 0.12046459039983555,
      "grad_norm": 0.42778778076171875,
      "learning_rate": 8.997572225993778e-05,
      "loss": 0.7722,
      "step": 586
    },
    {
      "epoch": 0.1206701613732141,
      "grad_norm": 0.3165600597858429,
      "learning_rate": 8.997539310400337e-05,
      "loss": 0.7524,
      "step": 587
    },
    {
      "epoch": 0.12087573234659266,
      "grad_norm": 0.3048163950443268,
      "learning_rate": 8.997506173234156e-05,
      "loss": 0.7699,
      "step": 588
    },
    {
      "epoch": 0.12108130331997122,
      "grad_norm": 0.3166545331478119,
      "learning_rate": 8.997472814496867e-05,
      "loss": 0.7819,
      "step": 589
    },
    {
      "epoch": 0.12128687429334978,
      "grad_norm": 0.3150469958782196,
      "learning_rate": 8.997439234190113e-05,
      "loss": 0.7419,
      "step": 590
    },
    {
      "epoch": 0.12149244526672834,
      "grad_norm": 0.3222194015979767,
      "learning_rate": 8.99740543231555e-05,
      "loss": 0.7808,
      "step": 591
    },
    {
      "epoch": 0.1216980162401069,
      "grad_norm": 0.3114274740219116,
      "learning_rate": 8.99737140887484e-05,
      "loss": 0.7859,
      "step": 592
    },
    {
      "epoch": 0.12190358721348546,
      "grad_norm": 0.2929398715496063,
      "learning_rate": 8.997337163869665e-05,
      "loss": 0.8025,
      "step": 593
    },
    {
      "epoch": 0.12210915818686402,
      "grad_norm": 0.2900030016899109,
      "learning_rate": 8.997302697301706e-05,
      "loss": 0.7914,
      "step": 594
    },
    {
      "epoch": 0.12231472916024258,
      "grad_norm": 0.2980877459049225,
      "learning_rate": 8.997268009172664e-05,
      "loss": 0.7548,
      "step": 595
    },
    {
      "epoch": 0.12252030013362113,
      "grad_norm": 0.280519962310791,
      "learning_rate": 8.997233099484247e-05,
      "loss": 0.7923,
      "step": 596
    },
    {
      "epoch": 0.12272587110699969,
      "grad_norm": 0.27224200963974,
      "learning_rate": 8.997197968238175e-05,
      "loss": 0.7935,
      "step": 597
    },
    {
      "epoch": 0.12293144208037825,
      "grad_norm": 0.2736833691596985,
      "learning_rate": 8.99716261543618e-05,
      "loss": 0.7409,
      "step": 598
    },
    {
      "epoch": 0.12313701305375681,
      "grad_norm": 0.28164225816726685,
      "learning_rate": 8.99712704108e-05,
      "loss": 0.7855,
      "step": 599
    },
    {
      "epoch": 0.12334258402713537,
      "grad_norm": 0.27927008271217346,
      "learning_rate": 8.997091245171394e-05,
      "loss": 0.7768,
      "step": 600
    },
    {
      "epoch": 0.12354815500051393,
      "grad_norm": 0.2606373429298401,
      "learning_rate": 8.997055227712119e-05,
      "loss": 0.764,
      "step": 601
    },
    {
      "epoch": 0.12375372597389249,
      "grad_norm": 0.32072070240974426,
      "learning_rate": 8.997018988703953e-05,
      "loss": 0.8124,
      "step": 602
    },
    {
      "epoch": 0.12395929694727105,
      "grad_norm": 0.4943363666534424,
      "learning_rate": 8.996982528148682e-05,
      "loss": 0.6366,
      "step": 603
    },
    {
      "epoch": 0.12416486792064961,
      "grad_norm": 0.3180435299873352,
      "learning_rate": 8.996945846048098e-05,
      "loss": 0.7723,
      "step": 604
    },
    {
      "epoch": 0.12437043889402816,
      "grad_norm": 0.29927217960357666,
      "learning_rate": 8.996908942404012e-05,
      "loss": 0.7608,
      "step": 605
    },
    {
      "epoch": 0.12457600986740672,
      "grad_norm": 0.2776423990726471,
      "learning_rate": 8.99687181721824e-05,
      "loss": 0.775,
      "step": 606
    },
    {
      "epoch": 0.12478158084078528,
      "grad_norm": 0.3051820397377014,
      "learning_rate": 8.996834470492613e-05,
      "loss": 0.7923,
      "step": 607
    },
    {
      "epoch": 0.12498715181416384,
      "grad_norm": 0.2759751081466675,
      "learning_rate": 8.99679690222897e-05,
      "loss": 0.7486,
      "step": 608
    },
    {
      "epoch": 0.1251927227875424,
      "grad_norm": 0.2878243923187256,
      "learning_rate": 8.99675911242916e-05,
      "loss": 0.7774,
      "step": 609
    },
    {
      "epoch": 0.12539829376092096,
      "grad_norm": 0.2739849090576172,
      "learning_rate": 8.996721101095048e-05,
      "loss": 0.771,
      "step": 610
    },
    {
      "epoch": 0.12560386473429952,
      "grad_norm": 0.2817218601703644,
      "learning_rate": 8.996682868228505e-05,
      "loss": 0.761,
      "step": 611
    },
    {
      "epoch": 0.12580943570767808,
      "grad_norm": 0.2750679552555084,
      "learning_rate": 8.996644413831412e-05,
      "loss": 0.7739,
      "step": 612
    },
    {
      "epoch": 0.12601500668105664,
      "grad_norm": 0.26886436343193054,
      "learning_rate": 8.996605737905669e-05,
      "loss": 0.7585,
      "step": 613
    },
    {
      "epoch": 0.1262205776544352,
      "grad_norm": 0.2675554156303406,
      "learning_rate": 8.996566840453178e-05,
      "loss": 0.7639,
      "step": 614
    },
    {
      "epoch": 0.12642614862781376,
      "grad_norm": 0.2672448456287384,
      "learning_rate": 8.996527721475855e-05,
      "loss": 0.7687,
      "step": 615
    },
    {
      "epoch": 0.12663171960119232,
      "grad_norm": 0.27541592717170715,
      "learning_rate": 8.996488380975626e-05,
      "loss": 0.7702,
      "step": 616
    },
    {
      "epoch": 0.12683729057457088,
      "grad_norm": 1.3074686527252197,
      "learning_rate": 8.996448818954434e-05,
      "loss": 0.6375,
      "step": 617
    },
    {
      "epoch": 0.12704286154794944,
      "grad_norm": 0.2855135202407837,
      "learning_rate": 8.996409035414224e-05,
      "loss": 0.7633,
      "step": 618
    },
    {
      "epoch": 0.12724843252132798,
      "grad_norm": 0.6012619137763977,
      "learning_rate": 8.996369030356957e-05,
      "loss": 0.6213,
      "step": 619
    },
    {
      "epoch": 0.12745400349470654,
      "grad_norm": 0.30922386050224304,
      "learning_rate": 8.996328803784604e-05,
      "loss": 0.7827,
      "step": 620
    },
    {
      "epoch": 0.1276595744680851,
      "grad_norm": 0.29752808809280396,
      "learning_rate": 8.996288355699146e-05,
      "loss": 0.773,
      "step": 621
    },
    {
      "epoch": 0.12786514544146366,
      "grad_norm": 0.31884685158729553,
      "learning_rate": 8.996247686102577e-05,
      "loss": 0.7656,
      "step": 622
    },
    {
      "epoch": 0.12807071641484222,
      "grad_norm": 0.2772408425807953,
      "learning_rate": 8.996206794996899e-05,
      "loss": 0.7898,
      "step": 623
    },
    {
      "epoch": 0.12827628738822078,
      "grad_norm": 0.2835623323917389,
      "learning_rate": 8.996165682384129e-05,
      "loss": 0.7631,
      "step": 624
    },
    {
      "epoch": 0.12848185836159934,
      "grad_norm": 0.3379913568496704,
      "learning_rate": 8.996124348266291e-05,
      "loss": 0.7805,
      "step": 625
    },
    {
      "epoch": 0.1286874293349779,
      "grad_norm": 0.26578038930892944,
      "learning_rate": 8.996082792645419e-05,
      "loss": 0.608,
      "step": 626
    },
    {
      "epoch": 0.12889300030835646,
      "grad_norm": 0.29912567138671875,
      "learning_rate": 8.996041015523563e-05,
      "loss": 0.7565,
      "step": 627
    },
    {
      "epoch": 0.12909857128173502,
      "grad_norm": 0.3043285608291626,
      "learning_rate": 8.995999016902781e-05,
      "loss": 0.7787,
      "step": 628
    },
    {
      "epoch": 0.12930414225511358,
      "grad_norm": 0.1923503428697586,
      "learning_rate": 8.995956796785143e-05,
      "loss": 0.6051,
      "step": 629
    },
    {
      "epoch": 0.12950971322849214,
      "grad_norm": 0.29241567850112915,
      "learning_rate": 8.995914355172726e-05,
      "loss": 0.7742,
      "step": 630
    },
    {
      "epoch": 0.1297152842018707,
      "grad_norm": 0.1634470671415329,
      "learning_rate": 8.995871692067622e-05,
      "loss": 0.6009,
      "step": 631
    },
    {
      "epoch": 0.12992085517524926,
      "grad_norm": 0.1948513388633728,
      "learning_rate": 8.995828807471935e-05,
      "loss": 0.6038,
      "step": 632
    },
    {
      "epoch": 0.13012642614862782,
      "grad_norm": 0.34593167901039124,
      "learning_rate": 8.995785701387774e-05,
      "loss": 0.7712,
      "step": 633
    },
    {
      "epoch": 0.13033199712200638,
      "grad_norm": 0.2905696630477905,
      "learning_rate": 8.995742373817268e-05,
      "loss": 0.7745,
      "step": 634
    },
    {
      "epoch": 0.13053756809538494,
      "grad_norm": 0.28553932905197144,
      "learning_rate": 8.995698824762547e-05,
      "loss": 0.779,
      "step": 635
    },
    {
      "epoch": 0.1307431390687635,
      "grad_norm": 0.18538178503513336,
      "learning_rate": 8.995655054225757e-05,
      "loss": 0.623,
      "step": 636
    },
    {
      "epoch": 0.13094871004214204,
      "grad_norm": 0.32950466871261597,
      "learning_rate": 8.995611062209054e-05,
      "loss": 0.7682,
      "step": 637
    },
    {
      "epoch": 0.1311542810155206,
      "grad_norm": 0.28783705830574036,
      "learning_rate": 8.995566848714609e-05,
      "loss": 0.7534,
      "step": 638
    },
    {
      "epoch": 0.13135985198889916,
      "grad_norm": 0.2871015667915344,
      "learning_rate": 8.995522413744596e-05,
      "loss": 0.7315,
      "step": 639
    },
    {
      "epoch": 0.13156542296227772,
      "grad_norm": 0.18547143042087555,
      "learning_rate": 8.995477757301207e-05,
      "loss": 0.5805,
      "step": 640
    },
    {
      "epoch": 0.13177099393565628,
      "grad_norm": 0.34090474247932434,
      "learning_rate": 8.99543287938664e-05,
      "loss": 0.7783,
      "step": 641
    },
    {
      "epoch": 0.13197656490903484,
      "grad_norm": 0.2930915355682373,
      "learning_rate": 8.995387780003107e-05,
      "loss": 0.768,
      "step": 642
    },
    {
      "epoch": 0.1321821358824134,
      "grad_norm": 0.28531643748283386,
      "learning_rate": 8.995342459152827e-05,
      "loss": 0.7627,
      "step": 643
    },
    {
      "epoch": 0.13238770685579196,
      "grad_norm": 0.2844246029853821,
      "learning_rate": 8.995296916838038e-05,
      "loss": 0.7588,
      "step": 644
    },
    {
      "epoch": 0.13259327782917052,
      "grad_norm": 0.2866900861263275,
      "learning_rate": 8.99525115306098e-05,
      "loss": 0.7569,
      "step": 645
    },
    {
      "epoch": 0.13279884880254908,
      "grad_norm": 0.2860448360443115,
      "learning_rate": 8.995205167823908e-05,
      "loss": 0.7614,
      "step": 646
    },
    {
      "epoch": 0.13300441977592764,
      "grad_norm": 0.2673685848712921,
      "learning_rate": 8.995158961129088e-05,
      "loss": 0.7753,
      "step": 647
    },
    {
      "epoch": 0.1332099907493062,
      "grad_norm": 0.2862294316291809,
      "learning_rate": 8.995112532978798e-05,
      "loss": 0.7682,
      "step": 648
    },
    {
      "epoch": 0.13341556172268476,
      "grad_norm": 0.27633753418922424,
      "learning_rate": 8.995065883375321e-05,
      "loss": 0.7726,
      "step": 649
    },
    {
      "epoch": 0.13362113269606332,
      "grad_norm": 0.26780807971954346,
      "learning_rate": 8.995019012320959e-05,
      "loss": 0.8017,
      "step": 650
    },
    {
      "epoch": 0.13382670366944188,
      "grad_norm": 0.27239716053009033,
      "learning_rate": 8.99497191981802e-05,
      "loss": 0.7479,
      "step": 651
    },
    {
      "epoch": 0.13403227464282044,
      "grad_norm": 0.2104814648628235,
      "learning_rate": 8.994924605868824e-05,
      "loss": 0.5866,
      "step": 652
    },
    {
      "epoch": 0.134237845616199,
      "grad_norm": 0.30780890583992004,
      "learning_rate": 8.994877070475701e-05,
      "loss": 0.7577,
      "step": 653
    },
    {
      "epoch": 0.13444341658957756,
      "grad_norm": 0.2910194993019104,
      "learning_rate": 8.994829313640995e-05,
      "loss": 0.779,
      "step": 654
    },
    {
      "epoch": 0.13464898756295612,
      "grad_norm": 0.277893602848053,
      "learning_rate": 8.994781335367057e-05,
      "loss": 0.77,
      "step": 655
    },
    {
      "epoch": 0.13485455853633466,
      "grad_norm": 0.28844013810157776,
      "learning_rate": 8.994733135656252e-05,
      "loss": 0.7746,
      "step": 656
    },
    {
      "epoch": 0.13506012950971322,
      "grad_norm": 0.28865233063697815,
      "learning_rate": 8.994684714510954e-05,
      "loss": 0.7825,
      "step": 657
    },
    {
      "epoch": 0.13526570048309178,
      "grad_norm": 0.3075569272041321,
      "learning_rate": 8.994636071933546e-05,
      "loss": 0.753,
      "step": 658
    },
    {
      "epoch": 0.13547127145647034,
      "grad_norm": 0.2790246903896332,
      "learning_rate": 8.994587207926429e-05,
      "loss": 0.7341,
      "step": 659
    },
    {
      "epoch": 0.1356768424298489,
      "grad_norm": 0.27742037177085876,
      "learning_rate": 8.994538122492006e-05,
      "loss": 0.7631,
      "step": 660
    },
    {
      "epoch": 0.13588241340322746,
      "grad_norm": 0.266181617975235,
      "learning_rate": 8.994488815632699e-05,
      "loss": 0.7381,
      "step": 661
    },
    {
      "epoch": 0.13608798437660602,
      "grad_norm": 0.2639121413230896,
      "learning_rate": 8.994439287350932e-05,
      "loss": 0.7634,
      "step": 662
    },
    {
      "epoch": 0.13629355534998458,
      "grad_norm": 0.271953821182251,
      "learning_rate": 8.994389537649151e-05,
      "loss": 0.7902,
      "step": 663
    },
    {
      "epoch": 0.13649912632336314,
      "grad_norm": 0.2754836082458496,
      "learning_rate": 8.994339566529804e-05,
      "loss": 0.7708,
      "step": 664
    },
    {
      "epoch": 0.1367046972967417,
      "grad_norm": 0.30965548753738403,
      "learning_rate": 8.994289373995352e-05,
      "loss": 0.7607,
      "step": 665
    },
    {
      "epoch": 0.13691026827012026,
      "grad_norm": 0.28129950165748596,
      "learning_rate": 8.99423896004827e-05,
      "loss": 0.7701,
      "step": 666
    },
    {
      "epoch": 0.13711583924349882,
      "grad_norm": 0.23147864639759064,
      "learning_rate": 8.99418832469104e-05,
      "loss": 0.6085,
      "step": 667
    },
    {
      "epoch": 0.13732141021687738,
      "grad_norm": 0.3050214648246765,
      "learning_rate": 8.994137467926156e-05,
      "loss": 0.7704,
      "step": 668
    },
    {
      "epoch": 0.13752698119025594,
      "grad_norm": 0.15223456919193268,
      "learning_rate": 8.994086389756126e-05,
      "loss": 0.6074,
      "step": 669
    },
    {
      "epoch": 0.1377325521636345,
      "grad_norm": 0.2975500226020813,
      "learning_rate": 8.994035090183464e-05,
      "loss": 0.7422,
      "step": 670
    },
    {
      "epoch": 0.13793812313701306,
      "grad_norm": 0.28416451811790466,
      "learning_rate": 8.993983569210698e-05,
      "loss": 0.7575,
      "step": 671
    },
    {
      "epoch": 0.13814369411039162,
      "grad_norm": 0.25423794984817505,
      "learning_rate": 8.993931826840368e-05,
      "loss": 0.7617,
      "step": 672
    },
    {
      "epoch": 0.13834926508377018,
      "grad_norm": 0.2733759582042694,
      "learning_rate": 8.993879863075019e-05,
      "loss": 0.7478,
      "step": 673
    },
    {
      "epoch": 0.13855483605714872,
      "grad_norm": 0.2590562105178833,
      "learning_rate": 8.993827677917215e-05,
      "loss": 0.7578,
      "step": 674
    },
    {
      "epoch": 0.13876040703052728,
      "grad_norm": 0.26819926500320435,
      "learning_rate": 8.993775271369525e-05,
      "loss": 0.7485,
      "step": 675
    },
    {
      "epoch": 0.13896597800390584,
      "grad_norm": 0.261787474155426,
      "learning_rate": 8.993722643434532e-05,
      "loss": 0.7623,
      "step": 676
    },
    {
      "epoch": 0.1391715489772844,
      "grad_norm": 0.27696770429611206,
      "learning_rate": 8.993669794114828e-05,
      "loss": 0.5995,
      "step": 677
    },
    {
      "epoch": 0.13937711995066296,
      "grad_norm": 0.1687610000371933,
      "learning_rate": 8.993616723413015e-05,
      "loss": 0.5993,
      "step": 678
    },
    {
      "epoch": 0.13958269092404152,
      "grad_norm": 0.34388282895088196,
      "learning_rate": 8.993563431331711e-05,
      "loss": 0.7844,
      "step": 679
    },
    {
      "epoch": 0.13978826189742008,
      "grad_norm": 0.3012101948261261,
      "learning_rate": 8.993509917873539e-05,
      "loss": 0.806,
      "step": 680
    },
    {
      "epoch": 0.13999383287079864,
      "grad_norm": 0.27226656675338745,
      "learning_rate": 8.993456183041135e-05,
      "loss": 0.7302,
      "step": 681
    },
    {
      "epoch": 0.1401994038441772,
      "grad_norm": 0.2889186143875122,
      "learning_rate": 8.993402226837148e-05,
      "loss": 0.7609,
      "step": 682
    },
    {
      "epoch": 0.14040497481755576,
      "grad_norm": 0.33441823720932007,
      "learning_rate": 8.993348049264235e-05,
      "loss": 0.6023,
      "step": 683
    },
    {
      "epoch": 0.14061054579093432,
      "grad_norm": 0.21067148447036743,
      "learning_rate": 8.993293650325066e-05,
      "loss": 0.6154,
      "step": 684
    },
    {
      "epoch": 0.14081611676431288,
      "grad_norm": 0.4340059459209442,
      "learning_rate": 8.99323903002232e-05,
      "loss": 0.7965,
      "step": 685
    },
    {
      "epoch": 0.14102168773769144,
      "grad_norm": 0.3370809853076935,
      "learning_rate": 8.993184188358688e-05,
      "loss": 0.7557,
      "step": 686
    },
    {
      "epoch": 0.14122725871107,
      "grad_norm": 0.31289970874786377,
      "learning_rate": 8.993129125336873e-05,
      "loss": 0.7804,
      "step": 687
    },
    {
      "epoch": 0.14143282968444856,
      "grad_norm": 0.31972143054008484,
      "learning_rate": 8.993073840959587e-05,
      "loss": 0.7438,
      "step": 688
    },
    {
      "epoch": 0.14163840065782712,
      "grad_norm": 0.31906935572624207,
      "learning_rate": 8.993018335229552e-05,
      "loss": 0.7564,
      "step": 689
    },
    {
      "epoch": 0.14184397163120568,
      "grad_norm": 0.3015035390853882,
      "learning_rate": 8.992962608149505e-05,
      "loss": 0.7668,
      "step": 690
    },
    {
      "epoch": 0.14204954260458424,
      "grad_norm": 0.3022618591785431,
      "learning_rate": 8.99290665972219e-05,
      "loss": 0.775,
      "step": 691
    },
    {
      "epoch": 0.1422551135779628,
      "grad_norm": 0.3151668906211853,
      "learning_rate": 8.992850489950365e-05,
      "loss": 0.7715,
      "step": 692
    },
    {
      "epoch": 0.14246068455134134,
      "grad_norm": 0.29301926493644714,
      "learning_rate": 8.992794098836794e-05,
      "loss": 0.7472,
      "step": 693
    },
    {
      "epoch": 0.1426662555247199,
      "grad_norm": 0.2793315649032593,
      "learning_rate": 8.992737486384257e-05,
      "loss": 0.7795,
      "step": 694
    },
    {
      "epoch": 0.14287182649809846,
      "grad_norm": 0.28239625692367554,
      "learning_rate": 8.992680652595544e-05,
      "loss": 0.7649,
      "step": 695
    },
    {
      "epoch": 0.14307739747147702,
      "grad_norm": 0.2796134352684021,
      "learning_rate": 8.992623597473455e-05,
      "loss": 0.7207,
      "step": 696
    },
    {
      "epoch": 0.14328296844485558,
      "grad_norm": 0.2902660369873047,
      "learning_rate": 8.992566321020799e-05,
      "loss": 0.767,
      "step": 697
    },
    {
      "epoch": 0.14348853941823414,
      "grad_norm": 0.28000608086586,
      "learning_rate": 8.992508823240397e-05,
      "loss": 0.7655,
      "step": 698
    },
    {
      "epoch": 0.1436941103916127,
      "grad_norm": 0.28330516815185547,
      "learning_rate": 8.992451104135084e-05,
      "loss": 0.787,
      "step": 699
    },
    {
      "epoch": 0.14389968136499126,
      "grad_norm": 0.28026729822158813,
      "learning_rate": 8.992393163707704e-05,
      "loss": 0.774,
      "step": 700
    },
    {
      "epoch": 0.14410525233836982,
      "grad_norm": 0.5302313566207886,
      "learning_rate": 8.99233500196111e-05,
      "loss": 0.6421,
      "step": 701
    },
    {
      "epoch": 0.14431082331174838,
      "grad_norm": 0.3879426419734955,
      "learning_rate": 8.992276618898167e-05,
      "loss": 0.7804,
      "step": 702
    },
    {
      "epoch": 0.14451639428512694,
      "grad_norm": 0.34966281056404114,
      "learning_rate": 8.992218014521752e-05,
      "loss": 0.7597,
      "step": 703
    },
    {
      "epoch": 0.1447219652585055,
      "grad_norm": 0.31454893946647644,
      "learning_rate": 8.99215918883475e-05,
      "loss": 0.7709,
      "step": 704
    },
    {
      "epoch": 0.14492753623188406,
      "grad_norm": 0.3002963066101074,
      "learning_rate": 8.992100141840064e-05,
      "loss": 0.7689,
      "step": 705
    },
    {
      "epoch": 0.14513310720526262,
      "grad_norm": 0.2704041600227356,
      "learning_rate": 8.992040873540599e-05,
      "loss": 0.5956,
      "step": 706
    },
    {
      "epoch": 0.14533867817864118,
      "grad_norm": 0.37959620356559753,
      "learning_rate": 8.991981383939275e-05,
      "loss": 0.7709,
      "step": 707
    },
    {
      "epoch": 0.14554424915201974,
      "grad_norm": 0.21092139184474945,
      "learning_rate": 8.991921673039024e-05,
      "loss": 0.6133,
      "step": 708
    },
    {
      "epoch": 0.1457498201253983,
      "grad_norm": 0.3205825686454773,
      "learning_rate": 8.991861740842789e-05,
      "loss": 0.7759,
      "step": 709
    },
    {
      "epoch": 0.14595539109877687,
      "grad_norm": 0.3055117428302765,
      "learning_rate": 8.99180158735352e-05,
      "loss": 0.7601,
      "step": 710
    },
    {
      "epoch": 0.1461609620721554,
      "grad_norm": 0.2790381908416748,
      "learning_rate": 8.991741212574182e-05,
      "loss": 0.7473,
      "step": 711
    },
    {
      "epoch": 0.14636653304553396,
      "grad_norm": 0.22031188011169434,
      "learning_rate": 8.991680616507747e-05,
      "loss": 0.6042,
      "step": 712
    },
    {
      "epoch": 0.14657210401891252,
      "grad_norm": 0.18893392384052277,
      "learning_rate": 8.991619799157203e-05,
      "loss": 0.579,
      "step": 713
    },
    {
      "epoch": 0.14677767499229108,
      "grad_norm": 0.409572571516037,
      "learning_rate": 8.991558760525546e-05,
      "loss": 0.7456,
      "step": 714
    },
    {
      "epoch": 0.14698324596566964,
      "grad_norm": 0.30903562903404236,
      "learning_rate": 8.991497500615781e-05,
      "loss": 0.7597,
      "step": 715
    },
    {
      "epoch": 0.1471888169390482,
      "grad_norm": 0.3029564917087555,
      "learning_rate": 8.991436019430928e-05,
      "loss": 0.7574,
      "step": 716
    },
    {
      "epoch": 0.14739438791242676,
      "grad_norm": 0.40293097496032715,
      "learning_rate": 8.991374316974016e-05,
      "loss": 0.7726,
      "step": 717
    },
    {
      "epoch": 0.14759995888580532,
      "grad_norm": 0.2837783992290497,
      "learning_rate": 8.991312393248083e-05,
      "loss": 0.7345,
      "step": 718
    },
    {
      "epoch": 0.14780552985918388,
      "grad_norm": 0.31906503438949585,
      "learning_rate": 8.991250248256181e-05,
      "loss": 0.7493,
      "step": 719
    },
    {
      "epoch": 0.14801110083256244,
      "grad_norm": 0.28739094734191895,
      "learning_rate": 8.991187882001371e-05,
      "loss": 0.7527,
      "step": 720
    },
    {
      "epoch": 0.148216671805941,
      "grad_norm": 0.28792694211006165,
      "learning_rate": 8.991125294486727e-05,
      "loss": 0.7758,
      "step": 721
    },
    {
      "epoch": 0.14842224277931956,
      "grad_norm": 0.30004221200942993,
      "learning_rate": 8.99106248571533e-05,
      "loss": 0.774,
      "step": 722
    },
    {
      "epoch": 0.14862781375269812,
      "grad_norm": 0.2681220471858978,
      "learning_rate": 8.990999455690276e-05,
      "loss": 0.7636,
      "step": 723
    },
    {
      "epoch": 0.14883338472607668,
      "grad_norm": 0.2687060534954071,
      "learning_rate": 8.990936204414669e-05,
      "loss": 0.7763,
      "step": 724
    },
    {
      "epoch": 0.14903895569945524,
      "grad_norm": 0.3481808602809906,
      "learning_rate": 8.990872731891628e-05,
      "loss": 0.6129,
      "step": 725
    },
    {
      "epoch": 0.1492445266728338,
      "grad_norm": 0.31415244936943054,
      "learning_rate": 8.990809038124275e-05,
      "loss": 0.7789,
      "step": 726
    },
    {
      "epoch": 0.14945009764621237,
      "grad_norm": 0.2992306649684906,
      "learning_rate": 8.990745123115752e-05,
      "loss": 0.7361,
      "step": 727
    },
    {
      "epoch": 0.14965566861959093,
      "grad_norm": 0.2780331075191498,
      "learning_rate": 8.990680986869206e-05,
      "loss": 0.7657,
      "step": 728
    },
    {
      "epoch": 0.1498612395929695,
      "grad_norm": 0.20312556624412537,
      "learning_rate": 8.990616629387798e-05,
      "loss": 0.5755,
      "step": 729
    },
    {
      "epoch": 0.15006681056634802,
      "grad_norm": 0.32418328523635864,
      "learning_rate": 8.990552050674697e-05,
      "loss": 0.7537,
      "step": 730
    },
    {
      "epoch": 0.15027238153972658,
      "grad_norm": 0.30750200152397156,
      "learning_rate": 8.990487250733086e-05,
      "loss": 0.7585,
      "step": 731
    },
    {
      "epoch": 0.15047795251310514,
      "grad_norm": 0.2661309242248535,
      "learning_rate": 8.990422229566156e-05,
      "loss": 0.7454,
      "step": 732
    },
    {
      "epoch": 0.1506835234864837,
      "grad_norm": 0.2825012803077698,
      "learning_rate": 8.99035698717711e-05,
      "loss": 0.7466,
      "step": 733
    },
    {
      "epoch": 0.15088909445986226,
      "grad_norm": 0.27984434366226196,
      "learning_rate": 8.990291523569166e-05,
      "loss": 0.7558,
      "step": 734
    },
    {
      "epoch": 0.15109466543324082,
      "grad_norm": 0.20815995335578918,
      "learning_rate": 8.990225838745544e-05,
      "loss": 0.6112,
      "step": 735
    },
    {
      "epoch": 0.15130023640661938,
      "grad_norm": 0.3687712848186493,
      "learning_rate": 8.990159932709483e-05,
      "loss": 0.7705,
      "step": 736
    },
    {
      "epoch": 0.15150580737999794,
      "grad_norm": 0.28203409910202026,
      "learning_rate": 8.990093805464227e-05,
      "loss": 0.7658,
      "step": 737
    },
    {
      "epoch": 0.1517113783533765,
      "grad_norm": 0.26725029945373535,
      "learning_rate": 8.990027457013039e-05,
      "loss": 0.7545,
      "step": 738
    },
    {
      "epoch": 0.15191694932675506,
      "grad_norm": 0.27890896797180176,
      "learning_rate": 8.989960887359183e-05,
      "loss": 0.7713,
      "step": 739
    },
    {
      "epoch": 0.15212252030013362,
      "grad_norm": 0.2642592191696167,
      "learning_rate": 8.98989409650594e-05,
      "loss": 0.7418,
      "step": 740
    },
    {
      "epoch": 0.15232809127351218,
      "grad_norm": 0.28167617321014404,
      "learning_rate": 8.9898270844566e-05,
      "loss": 0.7641,
      "step": 741
    },
    {
      "epoch": 0.15253366224689074,
      "grad_norm": 0.2627207338809967,
      "learning_rate": 8.989759851214465e-05,
      "loss": 0.7453,
      "step": 742
    },
    {
      "epoch": 0.1527392332202693,
      "grad_norm": 0.28408879041671753,
      "learning_rate": 8.98969239678285e-05,
      "loss": 0.7596,
      "step": 743
    },
    {
      "epoch": 0.15294480419364787,
      "grad_norm": 0.2735441327095032,
      "learning_rate": 8.989624721165072e-05,
      "loss": 0.7715,
      "step": 744
    },
    {
      "epoch": 0.15315037516702643,
      "grad_norm": 0.18697437644004822,
      "learning_rate": 8.989556824364469e-05,
      "loss": 0.5824,
      "step": 745
    },
    {
      "epoch": 0.153355946140405,
      "grad_norm": 0.2745780646800995,
      "learning_rate": 8.989488706384386e-05,
      "loss": 0.7615,
      "step": 746
    },
    {
      "epoch": 0.15356151711378355,
      "grad_norm": 0.14835397899150848,
      "learning_rate": 8.989420367228179e-05,
      "loss": 0.5817,
      "step": 747
    },
    {
      "epoch": 0.15376708808716208,
      "grad_norm": 0.272223562002182,
      "learning_rate": 8.989351806899213e-05,
      "loss": 0.7756,
      "step": 748
    },
    {
      "epoch": 0.15397265906054064,
      "grad_norm": 0.1476040929555893,
      "learning_rate": 8.989283025400868e-05,
      "loss": 0.5714,
      "step": 749
    },
    {
      "epoch": 0.1541782300339192,
      "grad_norm": 0.29153406620025635,
      "learning_rate": 8.98921402273653e-05,
      "loss": 0.766,
      "step": 750
    },
    {
      "epoch": 0.15438380100729776,
      "grad_norm": 0.1418268382549286,
      "learning_rate": 8.989144798909598e-05,
      "loss": 0.6128,
      "step": 751
    },
    {
      "epoch": 0.15458937198067632,
      "grad_norm": 0.2692977786064148,
      "learning_rate": 8.989075353923487e-05,
      "loss": 0.7622,
      "step": 752
    },
    {
      "epoch": 0.15479494295405488,
      "grad_norm": 0.26004138588905334,
      "learning_rate": 8.989005687781615e-05,
      "loss": 0.7816,
      "step": 753
    },
    {
      "epoch": 0.15500051392743344,
      "grad_norm": 0.2757778465747833,
      "learning_rate": 8.988935800487412e-05,
      "loss": 0.7434,
      "step": 754
    },
    {
      "epoch": 0.155206084900812,
      "grad_norm": 0.255287766456604,
      "learning_rate": 8.988865692044326e-05,
      "loss": 0.7624,
      "step": 755
    },
    {
      "epoch": 0.15541165587419056,
      "grad_norm": 0.25884950160980225,
      "learning_rate": 8.988795362455807e-05,
      "loss": 0.7563,
      "step": 756
    },
    {
      "epoch": 0.15561722684756912,
      "grad_norm": 0.2563144266605377,
      "learning_rate": 8.988724811725321e-05,
      "loss": 0.7714,
      "step": 757
    },
    {
      "epoch": 0.15582279782094768,
      "grad_norm": 0.2678104639053345,
      "learning_rate": 8.988654039856344e-05,
      "loss": 0.7474,
      "step": 758
    },
    {
      "epoch": 0.15602836879432624,
      "grad_norm": 0.24936316907405853,
      "learning_rate": 8.98858304685236e-05,
      "loss": 0.7673,
      "step": 759
    },
    {
      "epoch": 0.1562339397677048,
      "grad_norm": 0.26165440678596497,
      "learning_rate": 8.988511832716873e-05,
      "loss": 0.7601,
      "step": 760
    },
    {
      "epoch": 0.15643951074108337,
      "grad_norm": 0.26390373706817627,
      "learning_rate": 8.988440397453385e-05,
      "loss": 0.771,
      "step": 761
    },
    {
      "epoch": 0.15664508171446193,
      "grad_norm": 0.2585375905036926,
      "learning_rate": 8.988368741065418e-05,
      "loss": 0.7544,
      "step": 762
    },
    {
      "epoch": 0.15685065268784049,
      "grad_norm": 0.2905960977077484,
      "learning_rate": 8.9882968635565e-05,
      "loss": 0.7778,
      "step": 763
    },
    {
      "epoch": 0.15705622366121905,
      "grad_norm": 0.25519707798957825,
      "learning_rate": 8.988224764930176e-05,
      "loss": 0.7575,
      "step": 764
    },
    {
      "epoch": 0.1572617946345976,
      "grad_norm": 0.19228395819664001,
      "learning_rate": 8.988152445189995e-05,
      "loss": 0.5991,
      "step": 765
    },
    {
      "epoch": 0.15746736560797617,
      "grad_norm": 0.3007056713104248,
      "learning_rate": 8.988079904339521e-05,
      "loss": 0.7521,
      "step": 766
    },
    {
      "epoch": 0.1576729365813547,
      "grad_norm": 0.2646825611591339,
      "learning_rate": 8.988007142382328e-05,
      "loss": 0.7681,
      "step": 767
    },
    {
      "epoch": 0.15787850755473326,
      "grad_norm": 0.25301775336265564,
      "learning_rate": 8.987934159321998e-05,
      "loss": 0.7559,
      "step": 768
    },
    {
      "epoch": 0.15808407852811182,
      "grad_norm": 0.2603342533111572,
      "learning_rate": 8.987860955162129e-05,
      "loss": 0.7328,
      "step": 769
    },
    {
      "epoch": 0.15828964950149038,
      "grad_norm": 0.2716013491153717,
      "learning_rate": 8.987787529906327e-05,
      "loss": 0.7904,
      "step": 770
    },
    {
      "epoch": 0.15849522047486894,
      "grad_norm": 0.2763035297393799,
      "learning_rate": 8.98771388355821e-05,
      "loss": 0.7466,
      "step": 771
    },
    {
      "epoch": 0.1587007914482475,
      "grad_norm": 0.20483554899692535,
      "learning_rate": 8.987640016121405e-05,
      "loss": 0.6064,
      "step": 772
    },
    {
      "epoch": 0.15890636242162606,
      "grad_norm": 0.2952456474304199,
      "learning_rate": 8.987565927599552e-05,
      "loss": 0.7767,
      "step": 773
    },
    {
      "epoch": 0.15911193339500462,
      "grad_norm": 0.262829452753067,
      "learning_rate": 8.9874916179963e-05,
      "loss": 0.7453,
      "step": 774
    },
    {
      "epoch": 0.15931750436838318,
      "grad_norm": 0.27599036693573,
      "learning_rate": 8.987417087315311e-05,
      "loss": 0.7633,
      "step": 775
    },
    {
      "epoch": 0.15952307534176174,
      "grad_norm": 0.2878960371017456,
      "learning_rate": 8.987342335560257e-05,
      "loss": 0.7264,
      "step": 776
    },
    {
      "epoch": 0.1597286463151403,
      "grad_norm": 0.27682632207870483,
      "learning_rate": 8.98726736273482e-05,
      "loss": 0.7599,
      "step": 777
    },
    {
      "epoch": 0.15993421728851887,
      "grad_norm": 0.28773486614227295,
      "learning_rate": 8.98719216884269e-05,
      "loss": 0.749,
      "step": 778
    },
    {
      "epoch": 0.16013978826189743,
      "grad_norm": 0.18678279221057892,
      "learning_rate": 8.987116753887578e-05,
      "loss": 0.5898,
      "step": 779
    },
    {
      "epoch": 0.16034535923527599,
      "grad_norm": 0.2946769595146179,
      "learning_rate": 8.987041117873195e-05,
      "loss": 0.7631,
      "step": 780
    },
    {
      "epoch": 0.16055093020865455,
      "grad_norm": 0.2669578492641449,
      "learning_rate": 8.98696526080327e-05,
      "loss": 0.7401,
      "step": 781
    },
    {
      "epoch": 0.1607565011820331,
      "grad_norm": 0.2495296746492386,
      "learning_rate": 8.986889182681537e-05,
      "loss": 0.7548,
      "step": 782
    },
    {
      "epoch": 0.16096207215541167,
      "grad_norm": 0.1537548452615738,
      "learning_rate": 8.986812883511746e-05,
      "loss": 0.5952,
      "step": 783
    },
    {
      "epoch": 0.16116764312879023,
      "grad_norm": 0.3242528736591339,
      "learning_rate": 8.986736363297657e-05,
      "loss": 0.7621,
      "step": 784
    },
    {
      "epoch": 0.16137321410216876,
      "grad_norm": 0.2763916254043579,
      "learning_rate": 8.986659622043038e-05,
      "loss": 0.7518,
      "step": 785
    },
    {
      "epoch": 0.16157878507554732,
      "grad_norm": 0.27918627858161926,
      "learning_rate": 8.986582659751668e-05,
      "loss": 0.759,
      "step": 786
    },
    {
      "epoch": 0.16178435604892588,
      "grad_norm": 0.1745089888572693,
      "learning_rate": 8.986505476427342e-05,
      "loss": 0.6015,
      "step": 787
    },
    {
      "epoch": 0.16198992702230444,
      "grad_norm": 0.2984016239643097,
      "learning_rate": 8.986428072073861e-05,
      "loss": 0.7422,
      "step": 788
    },
    {
      "epoch": 0.162195497995683,
      "grad_norm": 0.27629682421684265,
      "learning_rate": 8.986350446695038e-05,
      "loss": 0.7691,
      "step": 789
    },
    {
      "epoch": 0.16240106896906156,
      "grad_norm": 0.15922513604164124,
      "learning_rate": 8.986272600294698e-05,
      "loss": 0.594,
      "step": 790
    },
    {
      "epoch": 0.16260663994244012,
      "grad_norm": 0.14948177337646484,
      "learning_rate": 8.986194532876676e-05,
      "loss": 0.5879,
      "step": 791
    },
    {
      "epoch": 0.16281221091581868,
      "grad_norm": 0.33852294087409973,
      "learning_rate": 8.986116244444816e-05,
      "loss": 0.788,
      "step": 792
    },
    {
      "epoch": 0.16301778188919724,
      "grad_norm": 0.29658934473991394,
      "learning_rate": 8.986037735002979e-05,
      "loss": 0.7502,
      "step": 793
    },
    {
      "epoch": 0.1632233528625758,
      "grad_norm": 0.27061983942985535,
      "learning_rate": 8.98595900455503e-05,
      "loss": 0.7444,
      "step": 794
    },
    {
      "epoch": 0.16342892383595437,
      "grad_norm": 0.28159090876579285,
      "learning_rate": 8.985880053104848e-05,
      "loss": 0.7497,
      "step": 795
    },
    {
      "epoch": 0.16363449480933293,
      "grad_norm": 0.27150630950927734,
      "learning_rate": 8.985800880656322e-05,
      "loss": 0.7283,
      "step": 796
    },
    {
      "epoch": 0.16384006578271149,
      "grad_norm": 0.26862168312072754,
      "learning_rate": 8.985721487213353e-05,
      "loss": 0.7492,
      "step": 797
    },
    {
      "epoch": 0.16404563675609005,
      "grad_norm": 0.284452885389328,
      "learning_rate": 8.985641872779853e-05,
      "loss": 0.7864,
      "step": 798
    },
    {
      "epoch": 0.1642512077294686,
      "grad_norm": 0.19958379864692688,
      "learning_rate": 8.985562037359745e-05,
      "loss": 0.585,
      "step": 799
    },
    {
      "epoch": 0.16445677870284717,
      "grad_norm": 0.1591620147228241,
      "learning_rate": 8.985481980956959e-05,
      "loss": 0.5937,
      "step": 800
    },
    {
      "epoch": 0.16466234967622573,
      "grad_norm": 0.15034611523151398,
      "learning_rate": 8.985401703575444e-05,
      "loss": 0.6034,
      "step": 801
    },
    {
      "epoch": 0.1648679206496043,
      "grad_norm": 0.4189755618572235,
      "learning_rate": 8.985321205219149e-05,
      "loss": 0.7696,
      "step": 802
    },
    {
      "epoch": 0.16507349162298285,
      "grad_norm": 0.17588938772678375,
      "learning_rate": 8.985240485892043e-05,
      "loss": 0.5819,
      "step": 803
    },
    {
      "epoch": 0.16527906259636138,
      "grad_norm": 0.17400261759757996,
      "learning_rate": 8.985159545598102e-05,
      "loss": 0.5878,
      "step": 804
    },
    {
      "epoch": 0.16548463356973994,
      "grad_norm": 0.5819520354270935,
      "learning_rate": 8.985078384341314e-05,
      "loss": 0.7724,
      "step": 805
    },
    {
      "epoch": 0.1656902045431185,
      "grad_norm": 0.3000738322734833,
      "learning_rate": 8.984997002125677e-05,
      "loss": 0.7544,
      "step": 806
    },
    {
      "epoch": 0.16589577551649706,
      "grad_norm": 0.5194309949874878,
      "learning_rate": 8.984915398955201e-05,
      "loss": 0.7717,
      "step": 807
    },
    {
      "epoch": 0.16610134648987562,
      "grad_norm": 0.24588865041732788,
      "learning_rate": 8.984833574833905e-05,
      "loss": 0.5959,
      "step": 808
    },
    {
      "epoch": 0.16630691746325418,
      "grad_norm": 0.3617485761642456,
      "learning_rate": 8.984751529765823e-05,
      "loss": 0.7641,
      "step": 809
    },
    {
      "epoch": 0.16651248843663274,
      "grad_norm": 0.1757216602563858,
      "learning_rate": 8.984669263754993e-05,
      "loss": 0.5963,
      "step": 810
    },
    {
      "epoch": 0.1667180594100113,
      "grad_norm": 0.37562620639801025,
      "learning_rate": 8.98458677680547e-05,
      "loss": 0.7765,
      "step": 811
    },
    {
      "epoch": 0.16692363038338986,
      "grad_norm": 0.19446802139282227,
      "learning_rate": 8.984504068921317e-05,
      "loss": 0.5991,
      "step": 812
    },
    {
      "epoch": 0.16712920135676843,
      "grad_norm": 0.2953244149684906,
      "learning_rate": 8.98442114010661e-05,
      "loss": 0.7816,
      "step": 813
    },
    {
      "epoch": 0.16733477233014699,
      "grad_norm": 0.3022470772266388,
      "learning_rate": 8.984337990365433e-05,
      "loss": 0.7426,
      "step": 814
    },
    {
      "epoch": 0.16754034330352555,
      "grad_norm": 0.268697053194046,
      "learning_rate": 8.984254619701882e-05,
      "loss": 0.7798,
      "step": 815
    },
    {
      "epoch": 0.1677459142769041,
      "grad_norm": 0.2634507119655609,
      "learning_rate": 8.984171028120066e-05,
      "loss": 0.7499,
      "step": 816
    },
    {
      "epoch": 0.16795148525028267,
      "grad_norm": 0.2637363374233246,
      "learning_rate": 8.984087215624102e-05,
      "loss": 0.7244,
      "step": 817
    },
    {
      "epoch": 0.16815705622366123,
      "grad_norm": 0.25045761466026306,
      "learning_rate": 8.984003182218121e-05,
      "loss": 0.7206,
      "step": 818
    },
    {
      "epoch": 0.1683626271970398,
      "grad_norm": 0.24836835265159607,
      "learning_rate": 8.983918927906259e-05,
      "loss": 0.7381,
      "step": 819
    },
    {
      "epoch": 0.16856819817041835,
      "grad_norm": 0.26156720519065857,
      "learning_rate": 8.983834452692671e-05,
      "loss": 0.748,
      "step": 820
    },
    {
      "epoch": 0.1687737691437969,
      "grad_norm": 0.2660123407840729,
      "learning_rate": 8.983749756581517e-05,
      "loss": 0.7349,
      "step": 821
    },
    {
      "epoch": 0.16897934011717544,
      "grad_norm": 0.20181813836097717,
      "learning_rate": 8.983664839576969e-05,
      "loss": 0.6089,
      "step": 822
    },
    {
      "epoch": 0.169184911090554,
      "grad_norm": 0.16823935508728027,
      "learning_rate": 8.98357970168321e-05,
      "loss": 0.6203,
      "step": 823
    },
    {
      "epoch": 0.16939048206393256,
      "grad_norm": 0.36333969235420227,
      "learning_rate": 8.983494342904437e-05,
      "loss": 0.7704,
      "step": 824
    },
    {
      "epoch": 0.16959605303731112,
      "grad_norm": 0.2901283800601959,
      "learning_rate": 8.983408763244853e-05,
      "loss": 0.7484,
      "step": 825
    },
    {
      "epoch": 0.16980162401068968,
      "grad_norm": 0.2594255805015564,
      "learning_rate": 8.983322962708673e-05,
      "loss": 0.7726,
      "step": 826
    },
    {
      "epoch": 0.17000719498406824,
      "grad_norm": 0.2951291799545288,
      "learning_rate": 8.983236941300128e-05,
      "loss": 0.743,
      "step": 827
    },
    {
      "epoch": 0.1702127659574468,
      "grad_norm": 0.23186159133911133,
      "learning_rate": 8.983150699023453e-05,
      "loss": 0.6015,
      "step": 828
    },
    {
      "epoch": 0.17041833693082536,
      "grad_norm": 0.2974048852920532,
      "learning_rate": 8.983064235882896e-05,
      "loss": 0.7689,
      "step": 829
    },
    {
      "epoch": 0.17062390790420393,
      "grad_norm": 0.2741788327693939,
      "learning_rate": 8.982977551882719e-05,
      "loss": 0.7825,
      "step": 830
    },
    {
      "epoch": 0.17082947887758249,
      "grad_norm": 0.2528201639652252,
      "learning_rate": 8.982890647027191e-05,
      "loss": 0.7549,
      "step": 831
    },
    {
      "epoch": 0.17103504985096105,
      "grad_norm": 0.27328386902809143,
      "learning_rate": 8.982803521320593e-05,
      "loss": 0.7433,
      "step": 832
    },
    {
      "epoch": 0.1712406208243396,
      "grad_norm": 0.18332356214523315,
      "learning_rate": 8.98271617476722e-05,
      "loss": 0.6056,
      "step": 833
    },
    {
      "epoch": 0.17144619179771817,
      "grad_norm": 0.2897491753101349,
      "learning_rate": 8.982628607371373e-05,
      "loss": 0.7229,
      "step": 834
    },
    {
      "epoch": 0.17165176277109673,
      "grad_norm": 0.27189579606056213,
      "learning_rate": 8.982540819137363e-05,
      "loss": 0.7409,
      "step": 835
    },
    {
      "epoch": 0.1718573337444753,
      "grad_norm": 0.2686000168323517,
      "learning_rate": 8.982452810069521e-05,
      "loss": 0.7622,
      "step": 836
    },
    {
      "epoch": 0.17206290471785385,
      "grad_norm": 0.2843405306339264,
      "learning_rate": 8.98236458017218e-05,
      "loss": 0.7774,
      "step": 837
    },
    {
      "epoch": 0.1722684756912324,
      "grad_norm": 0.249932661652565,
      "learning_rate": 8.982276129449687e-05,
      "loss": 0.758,
      "step": 838
    },
    {
      "epoch": 0.17247404666461097,
      "grad_norm": 0.1650909036397934,
      "learning_rate": 8.982187457906399e-05,
      "loss": 0.6026,
      "step": 839
    },
    {
      "epoch": 0.1726796176379895,
      "grad_norm": 0.2688060700893402,
      "learning_rate": 8.982098565546684e-05,
      "loss": 0.74,
      "step": 840
    },
    {
      "epoch": 0.17288518861136806,
      "grad_norm": 0.2702515423297882,
      "learning_rate": 8.982009452374921e-05,
      "loss": 0.7454,
      "step": 841
    },
    {
      "epoch": 0.17309075958474662,
      "grad_norm": 0.2621611952781677,
      "learning_rate": 8.981920118395502e-05,
      "loss": 0.741,
      "step": 842
    },
    {
      "epoch": 0.17329633055812518,
      "grad_norm": 0.26395297050476074,
      "learning_rate": 8.981830563612828e-05,
      "loss": 0.7634,
      "step": 843
    },
    {
      "epoch": 0.17350190153150374,
      "grad_norm": 0.1796771138906479,
      "learning_rate": 8.981740788031309e-05,
      "loss": 0.5774,
      "step": 844
    },
    {
      "epoch": 0.1737074725048823,
      "grad_norm": 0.28493568301200867,
      "learning_rate": 8.98165079165537e-05,
      "loss": 0.744,
      "step": 845
    },
    {
      "epoch": 0.17391304347826086,
      "grad_norm": 0.14998356997966766,
      "learning_rate": 8.981560574489442e-05,
      "loss": 0.583,
      "step": 846
    },
    {
      "epoch": 0.17411861445163943,
      "grad_norm": 0.28660815954208374,
      "learning_rate": 8.981470136537973e-05,
      "loss": 0.7648,
      "step": 847
    },
    {
      "epoch": 0.17432418542501799,
      "grad_norm": 0.26909613609313965,
      "learning_rate": 8.981379477805416e-05,
      "loss": 0.7621,
      "step": 848
    },
    {
      "epoch": 0.17452975639839655,
      "grad_norm": 0.2543969750404358,
      "learning_rate": 8.981288598296238e-05,
      "loss": 0.7383,
      "step": 849
    },
    {
      "epoch": 0.1747353273717751,
      "grad_norm": 0.27695950865745544,
      "learning_rate": 8.981197498014916e-05,
      "loss": 0.7567,
      "step": 850
    },
    {
      "epoch": 0.17494089834515367,
      "grad_norm": 0.2635768949985504,
      "learning_rate": 8.98110617696594e-05,
      "loss": 0.7627,
      "step": 851
    },
    {
      "epoch": 0.17514646931853223,
      "grad_norm": 0.31927260756492615,
      "learning_rate": 8.981014635153806e-05,
      "loss": 0.7376,
      "step": 852
    },
    {
      "epoch": 0.1753520402919108,
      "grad_norm": 0.25446754693984985,
      "learning_rate": 8.980922872583025e-05,
      "loss": 0.7415,
      "step": 853
    },
    {
      "epoch": 0.17555761126528935,
      "grad_norm": 0.2923116683959961,
      "learning_rate": 8.980830889258118e-05,
      "loss": 0.7375,
      "step": 854
    },
    {
      "epoch": 0.1757631822386679,
      "grad_norm": 0.17673562467098236,
      "learning_rate": 8.980738685183617e-05,
      "loss": 0.5944,
      "step": 855
    },
    {
      "epoch": 0.17596875321204647,
      "grad_norm": 0.2569844424724579,
      "learning_rate": 8.980646260364063e-05,
      "loss": 0.7681,
      "step": 856
    },
    {
      "epoch": 0.17617432418542503,
      "grad_norm": 0.2668174207210541,
      "learning_rate": 8.98055361480401e-05,
      "loss": 0.753,
      "step": 857
    },
    {
      "epoch": 0.1763798951588036,
      "grad_norm": 0.15782947838306427,
      "learning_rate": 8.980460748508023e-05,
      "loss": 0.5973,
      "step": 858
    },
    {
      "epoch": 0.17658546613218212,
      "grad_norm": 0.27562811970710754,
      "learning_rate": 8.980367661480678e-05,
      "loss": 0.7613,
      "step": 859
    },
    {
      "epoch": 0.17679103710556068,
      "grad_norm": 0.2562348544597626,
      "learning_rate": 8.980274353726556e-05,
      "loss": 0.7451,
      "step": 860
    },
    {
      "epoch": 0.17699660807893924,
      "grad_norm": 0.25293174386024475,
      "learning_rate": 8.980180825250261e-05,
      "loss": 0.7285,
      "step": 861
    },
    {
      "epoch": 0.1772021790523178,
      "grad_norm": 0.2638672888278961,
      "learning_rate": 8.980087076056394e-05,
      "loss": 0.7539,
      "step": 862
    },
    {
      "epoch": 0.17740775002569636,
      "grad_norm": 0.1891278624534607,
      "learning_rate": 8.979993106149579e-05,
      "loss": 0.58,
      "step": 863
    },
    {
      "epoch": 0.17761332099907493,
      "grad_norm": 0.27774450182914734,
      "learning_rate": 8.979898915534442e-05,
      "loss": 0.7754,
      "step": 864
    },
    {
      "epoch": 0.17781889197245349,
      "grad_norm": 0.26496121287345886,
      "learning_rate": 8.979804504215624e-05,
      "loss": 0.7595,
      "step": 865
    },
    {
      "epoch": 0.17802446294583205,
      "grad_norm": 0.26245352625846863,
      "learning_rate": 8.979709872197778e-05,
      "loss": 0.7565,
      "step": 866
    },
    {
      "epoch": 0.1782300339192106,
      "grad_norm": 0.2624642252922058,
      "learning_rate": 8.979615019485564e-05,
      "loss": 0.7556,
      "step": 867
    },
    {
      "epoch": 0.17843560489258917,
      "grad_norm": 0.16684101521968842,
      "learning_rate": 8.979519946083656e-05,
      "loss": 0.6104,
      "step": 868
    },
    {
      "epoch": 0.17864117586596773,
      "grad_norm": 0.26087847352027893,
      "learning_rate": 8.979424651996738e-05,
      "loss": 0.7496,
      "step": 869
    },
    {
      "epoch": 0.1788467468393463,
      "grad_norm": 0.2627946436405182,
      "learning_rate": 8.979329137229502e-05,
      "loss": 0.7471,
      "step": 870
    },
    {
      "epoch": 0.17905231781272485,
      "grad_norm": 0.2528480887413025,
      "learning_rate": 8.979233401786657e-05,
      "loss": 0.7645,
      "step": 871
    },
    {
      "epoch": 0.1792578887861034,
      "grad_norm": 0.26880887150764465,
      "learning_rate": 8.97913744567292e-05,
      "loss": 0.7492,
      "step": 872
    },
    {
      "epoch": 0.17946345975948197,
      "grad_norm": 0.25951650738716125,
      "learning_rate": 8.979041268893014e-05,
      "loss": 0.7428,
      "step": 873
    },
    {
      "epoch": 0.17966903073286053,
      "grad_norm": 0.15437857806682587,
      "learning_rate": 8.97894487145168e-05,
      "loss": 0.5812,
      "step": 874
    },
    {
      "epoch": 0.1798746017062391,
      "grad_norm": 0.28139808773994446,
      "learning_rate": 8.978848253353668e-05,
      "loss": 0.7438,
      "step": 875
    },
    {
      "epoch": 0.18008017267961765,
      "grad_norm": 0.14730799198150635,
      "learning_rate": 8.978751414603735e-05,
      "loss": 0.5816,
      "step": 876
    },
    {
      "epoch": 0.18028574365299618,
      "grad_norm": 0.2632145285606384,
      "learning_rate": 8.978654355206654e-05,
      "loss": 0.7467,
      "step": 877
    },
    {
      "epoch": 0.18049131462637474,
      "grad_norm": 0.2908996045589447,
      "learning_rate": 8.978557075167206e-05,
      "loss": 0.74,
      "step": 878
    },
    {
      "epoch": 0.1806968855997533,
      "grad_norm": 0.24691736698150635,
      "learning_rate": 8.978459574490184e-05,
      "loss": 0.7718,
      "step": 879
    },
    {
      "epoch": 0.18090245657313186,
      "grad_norm": 0.25215819478034973,
      "learning_rate": 8.978361853180392e-05,
      "loss": 0.7481,
      "step": 880
    },
    {
      "epoch": 0.18110802754651043,
      "grad_norm": 0.2547704577445984,
      "learning_rate": 8.978263911242642e-05,
      "loss": 0.7508,
      "step": 881
    },
    {
      "epoch": 0.18131359851988899,
      "grad_norm": 0.184767946600914,
      "learning_rate": 8.97816574868176e-05,
      "loss": 0.5983,
      "step": 882
    },
    {
      "epoch": 0.18151916949326755,
      "grad_norm": 0.1742323487997055,
      "learning_rate": 8.978067365502583e-05,
      "loss": 0.6079,
      "step": 883
    },
    {
      "epoch": 0.1817247404666461,
      "grad_norm": 0.15977798402309418,
      "learning_rate": 8.977968761709958e-05,
      "loss": 0.5984,
      "step": 884
    },
    {
      "epoch": 0.18193031144002467,
      "grad_norm": 0.36065980792045593,
      "learning_rate": 8.977869937308742e-05,
      "loss": 0.7727,
      "step": 885
    },
    {
      "epoch": 0.18213588241340323,
      "grad_norm": 0.28331291675567627,
      "learning_rate": 8.977770892303802e-05,
      "loss": 0.753,
      "step": 886
    },
    {
      "epoch": 0.1823414533867818,
      "grad_norm": 0.2905336022377014,
      "learning_rate": 8.977671626700021e-05,
      "loss": 0.7554,
      "step": 887
    },
    {
      "epoch": 0.18254702436016035,
      "grad_norm": 0.2962552309036255,
      "learning_rate": 8.977572140502286e-05,
      "loss": 0.7432,
      "step": 888
    },
    {
      "epoch": 0.1827525953335389,
      "grad_norm": 0.2991376519203186,
      "learning_rate": 8.977472433715502e-05,
      "loss": 0.7562,
      "step": 889
    },
    {
      "epoch": 0.18295816630691747,
      "grad_norm": 0.22425773739814758,
      "learning_rate": 8.977372506344578e-05,
      "loss": 0.5851,
      "step": 890
    },
    {
      "epoch": 0.18316373728029603,
      "grad_norm": 0.32990381121635437,
      "learning_rate": 8.977272358394437e-05,
      "loss": 0.7482,
      "step": 891
    },
    {
      "epoch": 0.1833693082536746,
      "grad_norm": 0.17806373536586761,
      "learning_rate": 8.977171989870013e-05,
      "loss": 0.6074,
      "step": 892
    },
    {
      "epoch": 0.18357487922705315,
      "grad_norm": 0.318367063999176,
      "learning_rate": 8.977071400776253e-05,
      "loss": 0.7526,
      "step": 893
    },
    {
      "epoch": 0.1837804502004317,
      "grad_norm": 0.17434534430503845,
      "learning_rate": 8.97697059111811e-05,
      "loss": 0.5821,
      "step": 894
    },
    {
      "epoch": 0.18398602117381027,
      "grad_norm": 0.29355406761169434,
      "learning_rate": 8.976869560900552e-05,
      "loss": 0.7531,
      "step": 895
    },
    {
      "epoch": 0.1841915921471888,
      "grad_norm": 0.2709575593471527,
      "learning_rate": 8.976768310128555e-05,
      "loss": 0.7768,
      "step": 896
    },
    {
      "epoch": 0.18439716312056736,
      "grad_norm": 0.252112478017807,
      "learning_rate": 8.976666838807107e-05,
      "loss": 0.7173,
      "step": 897
    },
    {
      "epoch": 0.18460273409394592,
      "grad_norm": 0.2750721573829651,
      "learning_rate": 8.976565146941209e-05,
      "loss": 0.7365,
      "step": 898
    },
    {
      "epoch": 0.18480830506732449,
      "grad_norm": 0.2349645495414734,
      "learning_rate": 8.97646323453587e-05,
      "loss": 0.5986,
      "step": 899
    },
    {
      "epoch": 0.18501387604070305,
      "grad_norm": 0.268477201461792,
      "learning_rate": 8.976361101596108e-05,
      "loss": 0.7779,
      "step": 900
    },
    {
      "epoch": 0.1852194470140816,
      "grad_norm": 0.2666422426700592,
      "learning_rate": 8.976258748126959e-05,
      "loss": 0.7536,
      "step": 901
    },
    {
      "epoch": 0.18542501798746017,
      "grad_norm": 0.2692512571811676,
      "learning_rate": 8.976156174133462e-05,
      "loss": 0.7737,
      "step": 902
    },
    {
      "epoch": 0.18563058896083873,
      "grad_norm": 0.25315481424331665,
      "learning_rate": 8.976053379620673e-05,
      "loss": 0.7359,
      "step": 903
    },
    {
      "epoch": 0.1858361599342173,
      "grad_norm": 0.2516801953315735,
      "learning_rate": 8.975950364593655e-05,
      "loss": 0.7381,
      "step": 904
    },
    {
      "epoch": 0.18604173090759585,
      "grad_norm": 0.2789689600467682,
      "learning_rate": 8.975847129057482e-05,
      "loss": 0.7466,
      "step": 905
    },
    {
      "epoch": 0.1862473018809744,
      "grad_norm": 0.1855190098285675,
      "learning_rate": 8.975743673017243e-05,
      "loss": 0.5948,
      "step": 906
    },
    {
      "epoch": 0.18645287285435297,
      "grad_norm": 0.27560868859291077,
      "learning_rate": 8.975639996478032e-05,
      "loss": 0.737,
      "step": 907
    },
    {
      "epoch": 0.18665844382773153,
      "grad_norm": 0.26743271946907043,
      "learning_rate": 8.975536099444957e-05,
      "loss": 0.7585,
      "step": 908
    },
    {
      "epoch": 0.1868640148011101,
      "grad_norm": 0.2512650191783905,
      "learning_rate": 8.975431981923137e-05,
      "loss": 0.7318,
      "step": 909
    },
    {
      "epoch": 0.18706958577448865,
      "grad_norm": 0.2596076726913452,
      "learning_rate": 8.9753276439177e-05,
      "loss": 0.7641,
      "step": 910
    },
    {
      "epoch": 0.1872751567478672,
      "grad_norm": 0.20333601534366608,
      "learning_rate": 8.97522308543379e-05,
      "loss": 0.6016,
      "step": 911
    },
    {
      "epoch": 0.18748072772124577,
      "grad_norm": 0.2744527757167816,
      "learning_rate": 8.975118306476554e-05,
      "loss": 0.7522,
      "step": 912
    },
    {
      "epoch": 0.18768629869462433,
      "grad_norm": 0.2788070738315582,
      "learning_rate": 8.975013307051157e-05,
      "loss": 0.7487,
      "step": 913
    },
    {
      "epoch": 0.18789186966800286,
      "grad_norm": 0.25242358446121216,
      "learning_rate": 8.97490808716277e-05,
      "loss": 0.7345,
      "step": 914
    },
    {
      "epoch": 0.18809744064138142,
      "grad_norm": 0.2651404142379761,
      "learning_rate": 8.974802646816578e-05,
      "loss": 0.7281,
      "step": 915
    },
    {
      "epoch": 0.18830301161475999,
      "grad_norm": 0.2696022689342499,
      "learning_rate": 8.974696986017773e-05,
      "loss": 0.7516,
      "step": 916
    },
    {
      "epoch": 0.18850858258813855,
      "grad_norm": 0.24874137341976166,
      "learning_rate": 8.974591104771564e-05,
      "loss": 0.7413,
      "step": 917
    },
    {
      "epoch": 0.1887141535615171,
      "grad_norm": 0.2631874084472656,
      "learning_rate": 8.974485003083164e-05,
      "loss": 0.7562,
      "step": 918
    },
    {
      "epoch": 0.18891972453489567,
      "grad_norm": 0.26414451003074646,
      "learning_rate": 8.974378680957802e-05,
      "loss": 0.5997,
      "step": 919
    },
    {
      "epoch": 0.18912529550827423,
      "grad_norm": 0.28355100750923157,
      "learning_rate": 8.974272138400716e-05,
      "loss": 0.756,
      "step": 920
    },
    {
      "epoch": 0.1893308664816528,
      "grad_norm": 0.26617303490638733,
      "learning_rate": 8.974165375417155e-05,
      "loss": 0.7841,
      "step": 921
    },
    {
      "epoch": 0.18953643745503135,
      "grad_norm": 0.2054712474346161,
      "learning_rate": 8.974058392012375e-05,
      "loss": 0.575,
      "step": 922
    },
    {
      "epoch": 0.1897420084284099,
      "grad_norm": 0.27742794156074524,
      "learning_rate": 8.973951188191652e-05,
      "loss": 0.7585,
      "step": 923
    },
    {
      "epoch": 0.18994757940178847,
      "grad_norm": 0.1530211716890335,
      "learning_rate": 8.973843763960267e-05,
      "loss": 0.5826,
      "step": 924
    },
    {
      "epoch": 0.19015315037516703,
      "grad_norm": 0.2896377444267273,
      "learning_rate": 8.973736119323508e-05,
      "loss": 0.7741,
      "step": 925
    },
    {
      "epoch": 0.1903587213485456,
      "grad_norm": 0.16760393977165222,
      "learning_rate": 8.973628254286681e-05,
      "loss": 0.5857,
      "step": 926
    },
    {
      "epoch": 0.19056429232192415,
      "grad_norm": 0.26283350586891174,
      "learning_rate": 8.9735201688551e-05,
      "loss": 0.7505,
      "step": 927
    },
    {
      "epoch": 0.1907698632953027,
      "grad_norm": 0.24747183918952942,
      "learning_rate": 8.97341186303409e-05,
      "loss": 0.7227,
      "step": 928
    },
    {
      "epoch": 0.19097543426868127,
      "grad_norm": 0.27605384588241577,
      "learning_rate": 8.973303336828985e-05,
      "loss": 0.7628,
      "step": 929
    },
    {
      "epoch": 0.19118100524205983,
      "grad_norm": 0.2601989507675171,
      "learning_rate": 8.973194590245132e-05,
      "loss": 0.7559,
      "step": 930
    },
    {
      "epoch": 0.1913865762154384,
      "grad_norm": 0.18584440648555756,
      "learning_rate": 8.973085623287892e-05,
      "loss": 0.5884,
      "step": 931
    },
    {
      "epoch": 0.19159214718881695,
      "grad_norm": 0.17022742331027985,
      "learning_rate": 8.972976435962629e-05,
      "loss": 0.5944,
      "step": 932
    },
    {
      "epoch": 0.19179771816219549,
      "grad_norm": 0.34249716997146606,
      "learning_rate": 8.972867028274723e-05,
      "loss": 0.767,
      "step": 933
    },
    {
      "epoch": 0.19200328913557405,
      "grad_norm": 0.26959505677223206,
      "learning_rate": 8.972757400229565e-05,
      "loss": 0.7707,
      "step": 934
    },
    {
      "epoch": 0.1922088601089526,
      "grad_norm": 0.2650569975376129,
      "learning_rate": 8.972647551832556e-05,
      "loss": 0.7181,
      "step": 935
    },
    {
      "epoch": 0.19241443108233117,
      "grad_norm": 0.20763760805130005,
      "learning_rate": 8.972537483089107e-05,
      "loss": 0.5857,
      "step": 936
    },
    {
      "epoch": 0.19262000205570973,
      "grad_norm": 0.1736496388912201,
      "learning_rate": 8.97242719400464e-05,
      "loss": 0.5943,
      "step": 937
    },
    {
      "epoch": 0.1928255730290883,
      "grad_norm": 0.3711773157119751,
      "learning_rate": 8.97231668458459e-05,
      "loss": 0.7748,
      "step": 938
    },
    {
      "epoch": 0.19303114400246685,
      "grad_norm": 0.2923683226108551,
      "learning_rate": 8.9722059548344e-05,
      "loss": 0.756,
      "step": 939
    },
    {
      "epoch": 0.1932367149758454,
      "grad_norm": 0.2692539393901825,
      "learning_rate": 8.972095004759527e-05,
      "loss": 0.7795,
      "step": 940
    },
    {
      "epoch": 0.19344228594922397,
      "grad_norm": 0.2933458387851715,
      "learning_rate": 8.971983834365434e-05,
      "loss": 0.7411,
      "step": 941
    },
    {
      "epoch": 0.19364785692260253,
      "grad_norm": 0.25706520676612854,
      "learning_rate": 8.9718724436576e-05,
      "loss": 0.6123,
      "step": 942
    },
    {
      "epoch": 0.1938534278959811,
      "grad_norm": 0.2033473253250122,
      "learning_rate": 8.971760832641513e-05,
      "loss": 0.5855,
      "step": 943
    },
    {
      "epoch": 0.19405899886935965,
      "grad_norm": 0.3263876140117645,
      "learning_rate": 8.97164900132267e-05,
      "loss": 0.7315,
      "step": 944
    },
    {
      "epoch": 0.1942645698427382,
      "grad_norm": 0.3143511414527893,
      "learning_rate": 8.971536949706582e-05,
      "loss": 0.761,
      "step": 945
    },
    {
      "epoch": 0.19447014081611677,
      "grad_norm": 0.26773688197135925,
      "learning_rate": 8.971424677798768e-05,
      "loss": 0.7457,
      "step": 946
    },
    {
      "epoch": 0.19467571178949533,
      "grad_norm": 0.29603666067123413,
      "learning_rate": 8.971312185604759e-05,
      "loss": 0.7635,
      "step": 947
    },
    {
      "epoch": 0.1948812827628739,
      "grad_norm": 0.27570641040802,
      "learning_rate": 8.971199473130097e-05,
      "loss": 0.7524,
      "step": 948
    },
    {
      "epoch": 0.19508685373625245,
      "grad_norm": 0.2680298984050751,
      "learning_rate": 8.971086540380337e-05,
      "loss": 0.723,
      "step": 949
    },
    {
      "epoch": 0.195292424709631,
      "grad_norm": 0.2859373390674591,
      "learning_rate": 8.970973387361039e-05,
      "loss": 0.7422,
      "step": 950
    },
    {
      "epoch": 0.19549799568300955,
      "grad_norm": 0.28261512517929077,
      "learning_rate": 8.97086001407778e-05,
      "loss": 0.7666,
      "step": 951
    },
    {
      "epoch": 0.1957035666563881,
      "grad_norm": 0.2570000886917114,
      "learning_rate": 8.970746420536146e-05,
      "loss": 0.7278,
      "step": 952
    },
    {
      "epoch": 0.19590913762976667,
      "grad_norm": 0.25783413648605347,
      "learning_rate": 8.97063260674173e-05,
      "loss": 0.7684,
      "step": 953
    },
    {
      "epoch": 0.19611470860314523,
      "grad_norm": 0.36918801069259644,
      "learning_rate": 8.970518572700143e-05,
      "loss": 0.6265,
      "step": 954
    },
    {
      "epoch": 0.1963202795765238,
      "grad_norm": 0.32823050022125244,
      "learning_rate": 8.970404318417e-05,
      "loss": 0.7552,
      "step": 955
    },
    {
      "epoch": 0.19652585054990235,
      "grad_norm": 0.31253358721733093,
      "learning_rate": 8.970289843897933e-05,
      "loss": 0.7588,
      "step": 956
    },
    {
      "epoch": 0.1967314215232809,
      "grad_norm": 0.25706982612609863,
      "learning_rate": 8.970175149148577e-05,
      "loss": 0.7432,
      "step": 957
    },
    {
      "epoch": 0.19693699249665947,
      "grad_norm": 0.2800324559211731,
      "learning_rate": 8.970060234174586e-05,
      "loss": 0.7389,
      "step": 958
    },
    {
      "epoch": 0.19714256347003803,
      "grad_norm": 0.29499801993370056,
      "learning_rate": 8.969945098981621e-05,
      "loss": 0.7663,
      "step": 959
    },
    {
      "epoch": 0.1973481344434166,
      "grad_norm": 0.2643605172634125,
      "learning_rate": 8.969829743575351e-05,
      "loss": 0.7446,
      "step": 960
    },
    {
      "epoch": 0.19755370541679515,
      "grad_norm": 0.2712821662425995,
      "learning_rate": 8.969714167961463e-05,
      "loss": 0.7657,
      "step": 961
    },
    {
      "epoch": 0.1977592763901737,
      "grad_norm": 0.31495070457458496,
      "learning_rate": 8.96959837214565e-05,
      "loss": 0.6014,
      "step": 962
    },
    {
      "epoch": 0.19796484736355227,
      "grad_norm": 0.2913089394569397,
      "learning_rate": 8.969482356133615e-05,
      "loss": 0.7527,
      "step": 963
    },
    {
      "epoch": 0.19817041833693083,
      "grad_norm": 0.172258198261261,
      "learning_rate": 8.969366119931075e-05,
      "loss": 0.6048,
      "step": 964
    },
    {
      "epoch": 0.1983759893103094,
      "grad_norm": 0.29237228631973267,
      "learning_rate": 8.969249663543756e-05,
      "loss": 0.7519,
      "step": 965
    },
    {
      "epoch": 0.19858156028368795,
      "grad_norm": 0.27603963017463684,
      "learning_rate": 8.969132986977396e-05,
      "loss": 0.731,
      "step": 966
    },
    {
      "epoch": 0.1987871312570665,
      "grad_norm": 0.2580612003803253,
      "learning_rate": 8.969016090237742e-05,
      "loss": 0.723,
      "step": 967
    },
    {
      "epoch": 0.19899270223044507,
      "grad_norm": 0.27025994658470154,
      "learning_rate": 8.968898973330552e-05,
      "loss": 0.7453,
      "step": 968
    },
    {
      "epoch": 0.19919827320382363,
      "grad_norm": 0.27253222465515137,
      "learning_rate": 8.968781636261599e-05,
      "loss": 0.7455,
      "step": 969
    },
    {
      "epoch": 0.19940384417720217,
      "grad_norm": 0.25386548042297363,
      "learning_rate": 8.96866407903666e-05,
      "loss": 0.753,
      "step": 970
    },
    {
      "epoch": 0.19960941515058073,
      "grad_norm": 0.2759700417518616,
      "learning_rate": 8.96854630166153e-05,
      "loss": 0.5741,
      "step": 971
    },
    {
      "epoch": 0.1998149861239593,
      "grad_norm": 0.28211307525634766,
      "learning_rate": 8.96842830414201e-05,
      "loss": 0.7339,
      "step": 972
    },
    {
      "epoch": 0.20002055709733785,
      "grad_norm": 0.27216947078704834,
      "learning_rate": 8.96831008648391e-05,
      "loss": 0.7405,
      "step": 973
    },
    {
      "epoch": 0.2002261280707164,
      "grad_norm": 0.24992568790912628,
      "learning_rate": 8.96819164869306e-05,
      "loss": 0.7186,
      "step": 974
    },
    {
      "epoch": 0.20043169904409497,
      "grad_norm": 0.181453675031662,
      "learning_rate": 8.96807299077529e-05,
      "loss": 0.5892,
      "step": 975
    },
    {
      "epoch": 0.20063727001747353,
      "grad_norm": 0.2908715307712555,
      "learning_rate": 8.967954112736448e-05,
      "loss": 0.7462,
      "step": 976
    },
    {
      "epoch": 0.2008428409908521,
      "grad_norm": 0.2695624828338623,
      "learning_rate": 8.96783501458239e-05,
      "loss": 0.7669,
      "step": 977
    },
    {
      "epoch": 0.20104841196423065,
      "grad_norm": 0.2560322880744934,
      "learning_rate": 8.967715696318983e-05,
      "loss": 0.7682,
      "step": 978
    },
    {
      "epoch": 0.2012539829376092,
      "grad_norm": 0.25563281774520874,
      "learning_rate": 8.967596157952106e-05,
      "loss": 0.7246,
      "step": 979
    },
    {
      "epoch": 0.20145955391098777,
      "grad_norm": 0.24063649773597717,
      "learning_rate": 8.967476399487649e-05,
      "loss": 0.7328,
      "step": 980
    },
    {
      "epoch": 0.20166512488436633,
      "grad_norm": 0.2495402842760086,
      "learning_rate": 8.967356420931509e-05,
      "loss": 0.722,
      "step": 981
    },
    {
      "epoch": 0.2018706958577449,
      "grad_norm": 0.25746145844459534,
      "learning_rate": 8.9672362222896e-05,
      "loss": 0.7357,
      "step": 982
    },
    {
      "epoch": 0.20207626683112345,
      "grad_norm": 0.2592317461967468,
      "learning_rate": 8.96711580356784e-05,
      "loss": 0.746,
      "step": 983
    },
    {
      "epoch": 0.202281837804502,
      "grad_norm": 0.25513893365859985,
      "learning_rate": 8.966995164772166e-05,
      "loss": 0.7486,
      "step": 984
    },
    {
      "epoch": 0.20248740877788057,
      "grad_norm": 0.40953561663627625,
      "learning_rate": 8.966874305908516e-05,
      "loss": 0.5957,
      "step": 985
    },
    {
      "epoch": 0.20269297975125913,
      "grad_norm": 0.255729079246521,
      "learning_rate": 8.96675322698285e-05,
      "loss": 0.748,
      "step": 986
    },
    {
      "epoch": 0.2028985507246377,
      "grad_norm": 0.26324090361595154,
      "learning_rate": 8.966631928001129e-05,
      "loss": 0.7387,
      "step": 987
    },
    {
      "epoch": 0.20310412169801623,
      "grad_norm": 0.24772094190120697,
      "learning_rate": 8.966510408969329e-05,
      "loss": 0.7252,
      "step": 988
    },
    {
      "epoch": 0.2033096926713948,
      "grad_norm": 0.27024003863334656,
      "learning_rate": 8.96638866989344e-05,
      "loss": 0.7716,
      "step": 989
    },
    {
      "epoch": 0.20351526364477335,
      "grad_norm": 0.2622278928756714,
      "learning_rate": 8.966266710779454e-05,
      "loss": 0.7678,
      "step": 990
    },
    {
      "epoch": 0.2037208346181519,
      "grad_norm": 0.252861350774765,
      "learning_rate": 8.966144531633384e-05,
      "loss": 0.7769,
      "step": 991
    },
    {
      "epoch": 0.20392640559153047,
      "grad_norm": 0.3397926390171051,
      "learning_rate": 8.966022132461248e-05,
      "loss": 0.742,
      "step": 992
    },
    {
      "epoch": 0.20413197656490903,
      "grad_norm": 0.2550930380821228,
      "learning_rate": 8.965899513269076e-05,
      "loss": 0.7205,
      "step": 993
    },
    {
      "epoch": 0.2043375475382876,
      "grad_norm": 0.2502458393573761,
      "learning_rate": 8.965776674062906e-05,
      "loss": 0.7368,
      "step": 994
    },
    {
      "epoch": 0.20454311851166615,
      "grad_norm": 0.25033867359161377,
      "learning_rate": 8.965653614848793e-05,
      "loss": 0.758,
      "step": 995
    },
    {
      "epoch": 0.2047486894850447,
      "grad_norm": 0.24429009854793549,
      "learning_rate": 8.965530335632801e-05,
      "loss": 0.7466,
      "step": 996
    },
    {
      "epoch": 0.20495426045842327,
      "grad_norm": 0.24865779280662537,
      "learning_rate": 8.965406836421e-05,
      "loss": 0.7741,
      "step": 997
    },
    {
      "epoch": 0.20515983143180183,
      "grad_norm": 0.2573890686035156,
      "learning_rate": 8.965283117219475e-05,
      "loss": 0.7486,
      "step": 998
    },
    {
      "epoch": 0.2053654024051804,
      "grad_norm": 0.2486078292131424,
      "learning_rate": 8.965159178034322e-05,
      "loss": 0.7277,
      "step": 999
    },
    {
      "epoch": 0.20557097337855895,
      "grad_norm": 0.2717074155807495,
      "learning_rate": 8.965035018871647e-05,
      "loss": 0.6086,
      "step": 1000
    },
    {
      "epoch": 0.2057765443519375,
      "grad_norm": 0.2679359018802643,
      "learning_rate": 8.964910639737566e-05,
      "loss": 0.7664,
      "step": 1001
    },
    {
      "epoch": 0.20598211532531607,
      "grad_norm": 0.16115225851535797,
      "learning_rate": 8.964786040638205e-05,
      "loss": 0.5977,
      "step": 1002
    },
    {
      "epoch": 0.20618768629869463,
      "grad_norm": 0.2807529866695404,
      "learning_rate": 8.964661221579706e-05,
      "loss": 0.7348,
      "step": 1003
    },
    {
      "epoch": 0.2063932572720732,
      "grad_norm": 0.25754019618034363,
      "learning_rate": 8.964536182568215e-05,
      "loss": 0.7283,
      "step": 1004
    },
    {
      "epoch": 0.20659882824545175,
      "grad_norm": 0.2526054382324219,
      "learning_rate": 8.964410923609894e-05,
      "loss": 0.7144,
      "step": 1005
    },
    {
      "epoch": 0.20680439921883031,
      "grad_norm": 0.2148108184337616,
      "learning_rate": 8.964285444710914e-05,
      "loss": 0.5871,
      "step": 1006
    },
    {
      "epoch": 0.20700997019220885,
      "grad_norm": 0.18252213299274445,
      "learning_rate": 8.964159745877456e-05,
      "loss": 0.5956,
      "step": 1007
    },
    {
      "epoch": 0.2072155411655874,
      "grad_norm": 0.3090805113315582,
      "learning_rate": 8.964033827115713e-05,
      "loss": 0.7496,
      "step": 1008
    },
    {
      "epoch": 0.20742111213896597,
      "grad_norm": 0.2703743278980255,
      "learning_rate": 8.963907688431887e-05,
      "loss": 0.7492,
      "step": 1009
    },
    {
      "epoch": 0.20762668311234453,
      "grad_norm": 0.26899415254592896,
      "learning_rate": 8.963781329832194e-05,
      "loss": 0.7468,
      "step": 1010
    },
    {
      "epoch": 0.2078322540857231,
      "grad_norm": 0.2887749969959259,
      "learning_rate": 8.963654751322858e-05,
      "loss": 0.765,
      "step": 1011
    },
    {
      "epoch": 0.20803782505910165,
      "grad_norm": 0.2602989077568054,
      "learning_rate": 8.963527952910116e-05,
      "loss": 0.7749,
      "step": 1012
    },
    {
      "epoch": 0.2082433960324802,
      "grad_norm": 0.22857093811035156,
      "learning_rate": 8.963400934600215e-05,
      "loss": 0.5989,
      "step": 1013
    },
    {
      "epoch": 0.20844896700585877,
      "grad_norm": 0.29049423336982727,
      "learning_rate": 8.963273696399411e-05,
      "loss": 0.7406,
      "step": 1014
    },
    {
      "epoch": 0.20865453797923733,
      "grad_norm": 0.27531930804252625,
      "learning_rate": 8.963146238313975e-05,
      "loss": 0.7575,
      "step": 1015
    },
    {
      "epoch": 0.2088601089526159,
      "grad_norm": 0.2683233320713043,
      "learning_rate": 8.963018560350181e-05,
      "loss": 0.7572,
      "step": 1016
    },
    {
      "epoch": 0.20906567992599445,
      "grad_norm": 0.26720771193504333,
      "learning_rate": 8.962890662514325e-05,
      "loss": 0.7537,
      "step": 1017
    },
    {
      "epoch": 0.209271250899373,
      "grad_norm": 0.26178407669067383,
      "learning_rate": 8.962762544812705e-05,
      "loss": 0.7226,
      "step": 1018
    },
    {
      "epoch": 0.20947682187275157,
      "grad_norm": 0.25852060317993164,
      "learning_rate": 8.962634207251633e-05,
      "loss": 0.7401,
      "step": 1019
    },
    {
      "epoch": 0.20968239284613013,
      "grad_norm": 0.25970616936683655,
      "learning_rate": 8.962505649837432e-05,
      "loss": 0.7277,
      "step": 1020
    },
    {
      "epoch": 0.2098879638195087,
      "grad_norm": 0.2682318687438965,
      "learning_rate": 8.962376872576436e-05,
      "loss": 0.7638,
      "step": 1021
    },
    {
      "epoch": 0.21009353479288725,
      "grad_norm": 0.24570779502391815,
      "learning_rate": 8.962247875474989e-05,
      "loss": 0.7256,
      "step": 1022
    },
    {
      "epoch": 0.21029910576626581,
      "grad_norm": 0.2523082196712494,
      "learning_rate": 8.962118658539446e-05,
      "loss": 0.7288,
      "step": 1023
    },
    {
      "epoch": 0.21050467673964438,
      "grad_norm": 0.24562524259090424,
      "learning_rate": 8.96198922177617e-05,
      "loss": 0.7292,
      "step": 1024
    },
    {
      "epoch": 0.2107102477130229,
      "grad_norm": 0.23262366652488708,
      "learning_rate": 8.961859565191543e-05,
      "loss": 0.7401,
      "step": 1025
    },
    {
      "epoch": 0.21091581868640147,
      "grad_norm": 0.21075837314128876,
      "learning_rate": 8.961729688791949e-05,
      "loss": 0.5854,
      "step": 1026
    },
    {
      "epoch": 0.21112138965978003,
      "grad_norm": 0.2659233808517456,
      "learning_rate": 8.961599592583785e-05,
      "loss": 0.742,
      "step": 1027
    },
    {
      "epoch": 0.2113269606331586,
      "grad_norm": 0.2612632215023041,
      "learning_rate": 8.961469276573466e-05,
      "loss": 0.7212,
      "step": 1028
    },
    {
      "epoch": 0.21153253160653715,
      "grad_norm": 0.24459590017795563,
      "learning_rate": 8.961338740767407e-05,
      "loss": 0.7445,
      "step": 1029
    },
    {
      "epoch": 0.2117381025799157,
      "grad_norm": 0.2455456703901291,
      "learning_rate": 8.96120798517204e-05,
      "loss": 0.7469,
      "step": 1030
    },
    {
      "epoch": 0.21194367355329427,
      "grad_norm": 0.25947311520576477,
      "learning_rate": 8.961077009793809e-05,
      "loss": 0.7578,
      "step": 1031
    },
    {
      "epoch": 0.21214924452667283,
      "grad_norm": 0.26415055990219116,
      "learning_rate": 8.960945814639162e-05,
      "loss": 0.7453,
      "step": 1032
    },
    {
      "epoch": 0.2123548155000514,
      "grad_norm": 0.2478688508272171,
      "learning_rate": 8.960814399714568e-05,
      "loss": 0.7246,
      "step": 1033
    },
    {
      "epoch": 0.21256038647342995,
      "grad_norm": 0.21988952159881592,
      "learning_rate": 8.960682765026497e-05,
      "loss": 0.6062,
      "step": 1034
    },
    {
      "epoch": 0.2127659574468085,
      "grad_norm": 0.16625165939331055,
      "learning_rate": 8.960550910581436e-05,
      "loss": 0.5704,
      "step": 1035
    },
    {
      "epoch": 0.21297152842018707,
      "grad_norm": 0.2860580086708069,
      "learning_rate": 8.960418836385879e-05,
      "loss": 0.747,
      "step": 1036
    },
    {
      "epoch": 0.21317709939356563,
      "grad_norm": 0.2644577920436859,
      "learning_rate": 8.960286542446335e-05,
      "loss": 0.7268,
      "step": 1037
    },
    {
      "epoch": 0.2133826703669442,
      "grad_norm": 0.2598789930343628,
      "learning_rate": 8.960154028769319e-05,
      "loss": 0.7645,
      "step": 1038
    },
    {
      "epoch": 0.21358824134032275,
      "grad_norm": 0.2992006540298462,
      "learning_rate": 8.960021295361363e-05,
      "loss": 0.5999,
      "step": 1039
    },
    {
      "epoch": 0.21379381231370131,
      "grad_norm": 0.27868691086769104,
      "learning_rate": 8.959888342229001e-05,
      "loss": 0.7472,
      "step": 1040
    },
    {
      "epoch": 0.21399938328707988,
      "grad_norm": 0.2707647979259491,
      "learning_rate": 8.959755169378788e-05,
      "loss": 0.7158,
      "step": 1041
    },
    {
      "epoch": 0.21420495426045844,
      "grad_norm": 0.2671177089214325,
      "learning_rate": 8.959621776817281e-05,
      "loss": 0.7573,
      "step": 1042
    },
    {
      "epoch": 0.214410525233837,
      "grad_norm": 0.24762409925460815,
      "learning_rate": 8.959488164551055e-05,
      "loss": 0.7353,
      "step": 1043
    },
    {
      "epoch": 0.21461609620721553,
      "grad_norm": 0.24137498438358307,
      "learning_rate": 8.959354332586689e-05,
      "loss": 0.7476,
      "step": 1044
    },
    {
      "epoch": 0.2148216671805941,
      "grad_norm": 0.2598249614238739,
      "learning_rate": 8.959220280930779e-05,
      "loss": 0.7397,
      "step": 1045
    },
    {
      "epoch": 0.21502723815397265,
      "grad_norm": 0.2500339448451996,
      "learning_rate": 8.959086009589929e-05,
      "loss": 0.7525,
      "step": 1046
    },
    {
      "epoch": 0.2152328091273512,
      "grad_norm": 0.25262802839279175,
      "learning_rate": 8.958951518570753e-05,
      "loss": 0.759,
      "step": 1047
    },
    {
      "epoch": 0.21543838010072977,
      "grad_norm": 0.2515556216239929,
      "learning_rate": 8.958816807879875e-05,
      "loss": 0.7321,
      "step": 1048
    },
    {
      "epoch": 0.21564395107410833,
      "grad_norm": 0.24297581613063812,
      "learning_rate": 8.958681877523935e-05,
      "loss": 0.7444,
      "step": 1049
    },
    {
      "epoch": 0.2158495220474869,
      "grad_norm": 0.2649231255054474,
      "learning_rate": 8.958546727509578e-05,
      "loss": 0.7458,
      "step": 1050
    },
    {
      "epoch": 0.21605509302086545,
      "grad_norm": 0.2701459527015686,
      "learning_rate": 8.958411357843461e-05,
      "loss": 0.595,
      "step": 1051
    },
    {
      "epoch": 0.216260663994244,
      "grad_norm": 0.2653101682662964,
      "learning_rate": 8.958275768532258e-05,
      "loss": 0.7544,
      "step": 1052
    },
    {
      "epoch": 0.21646623496762257,
      "grad_norm": 0.2633649408817291,
      "learning_rate": 8.958139959582645e-05,
      "loss": 0.7403,
      "step": 1053
    },
    {
      "epoch": 0.21667180594100113,
      "grad_norm": 0.25117960572242737,
      "learning_rate": 8.958003931001312e-05,
      "loss": 0.7427,
      "step": 1054
    },
    {
      "epoch": 0.2168773769143797,
      "grad_norm": 0.24553567171096802,
      "learning_rate": 8.957867682794963e-05,
      "loss": 0.7264,
      "step": 1055
    },
    {
      "epoch": 0.21708294788775825,
      "grad_norm": 0.23510022461414337,
      "learning_rate": 8.95773121497031e-05,
      "loss": 0.7413,
      "step": 1056
    },
    {
      "epoch": 0.21728851886113681,
      "grad_norm": 0.2532014846801758,
      "learning_rate": 8.957594527534075e-05,
      "loss": 0.735,
      "step": 1057
    },
    {
      "epoch": 0.21749408983451538,
      "grad_norm": 0.25079968571662903,
      "learning_rate": 8.957457620492993e-05,
      "loss": 0.7478,
      "step": 1058
    },
    {
      "epoch": 0.21769966080789394,
      "grad_norm": 0.23813451826572418,
      "learning_rate": 8.957320493853805e-05,
      "loss": 0.7238,
      "step": 1059
    },
    {
      "epoch": 0.2179052317812725,
      "grad_norm": 0.24865779280662537,
      "learning_rate": 8.957183147623273e-05,
      "loss": 0.7369,
      "step": 1060
    },
    {
      "epoch": 0.21811080275465106,
      "grad_norm": 0.24684272706508636,
      "learning_rate": 8.957045581808159e-05,
      "loss": 0.7008,
      "step": 1061
    },
    {
      "epoch": 0.2183163737280296,
      "grad_norm": 0.24000217020511627,
      "learning_rate": 8.956907796415241e-05,
      "loss": 0.5949,
      "step": 1062
    },
    {
      "epoch": 0.21852194470140815,
      "grad_norm": 0.266008198261261,
      "learning_rate": 8.956769791451309e-05,
      "loss": 0.7161,
      "step": 1063
    },
    {
      "epoch": 0.2187275156747867,
      "grad_norm": 0.14858698844909668,
      "learning_rate": 8.956631566923159e-05,
      "loss": 0.5948,
      "step": 1064
    },
    {
      "epoch": 0.21893308664816527,
      "grad_norm": 0.2638164162635803,
      "learning_rate": 8.956493122837601e-05,
      "loss": 0.7347,
      "step": 1065
    },
    {
      "epoch": 0.21913865762154383,
      "grad_norm": 0.2497703582048416,
      "learning_rate": 8.956354459201459e-05,
      "loss": 0.7458,
      "step": 1066
    },
    {
      "epoch": 0.2193442285949224,
      "grad_norm": 0.22499538958072662,
      "learning_rate": 8.95621557602156e-05,
      "loss": 0.5748,
      "step": 1067
    },
    {
      "epoch": 0.21954979956830095,
      "grad_norm": 0.2625332176685333,
      "learning_rate": 8.956076473304748e-05,
      "loss": 0.748,
      "step": 1068
    },
    {
      "epoch": 0.2197553705416795,
      "grad_norm": 0.2666896879673004,
      "learning_rate": 8.955937151057876e-05,
      "loss": 0.7547,
      "step": 1069
    },
    {
      "epoch": 0.21996094151505807,
      "grad_norm": 0.25993168354034424,
      "learning_rate": 8.955797609287807e-05,
      "loss": 0.7593,
      "step": 1070
    },
    {
      "epoch": 0.22016651248843663,
      "grad_norm": 0.248934805393219,
      "learning_rate": 8.955657848001417e-05,
      "loss": 0.753,
      "step": 1071
    },
    {
      "epoch": 0.2203720834618152,
      "grad_norm": 0.24592526257038116,
      "learning_rate": 8.95551786720559e-05,
      "loss": 0.7335,
      "step": 1072
    },
    {
      "epoch": 0.22057765443519375,
      "grad_norm": 0.2522546052932739,
      "learning_rate": 8.955377666907224e-05,
      "loss": 0.7287,
      "step": 1073
    },
    {
      "epoch": 0.22078322540857231,
      "grad_norm": 0.24097007513046265,
      "learning_rate": 8.955237247113222e-05,
      "loss": 0.7178,
      "step": 1074
    },
    {
      "epoch": 0.22098879638195087,
      "grad_norm": 0.26036760210990906,
      "learning_rate": 8.955096607830506e-05,
      "loss": 0.7528,
      "step": 1075
    },
    {
      "epoch": 0.22119436735532944,
      "grad_norm": 0.2414807826280594,
      "learning_rate": 8.954955749066005e-05,
      "loss": 0.7121,
      "step": 1076
    },
    {
      "epoch": 0.221399938328708,
      "grad_norm": 0.2436942607164383,
      "learning_rate": 8.954814670826654e-05,
      "loss": 0.744,
      "step": 1077
    },
    {
      "epoch": 0.22160550930208656,
      "grad_norm": 0.2534603774547577,
      "learning_rate": 8.954673373119407e-05,
      "loss": 0.7627,
      "step": 1078
    },
    {
      "epoch": 0.22181108027546512,
      "grad_norm": 0.21081526577472687,
      "learning_rate": 8.954531855951224e-05,
      "loss": 0.5921,
      "step": 1079
    },
    {
      "epoch": 0.22201665124884365,
      "grad_norm": 0.26541346311569214,
      "learning_rate": 8.954390119329077e-05,
      "loss": 0.7452,
      "step": 1080
    },
    {
      "epoch": 0.2222222222222222,
      "grad_norm": 0.24794277548789978,
      "learning_rate": 8.954248163259949e-05,
      "loss": 0.7196,
      "step": 1081
    },
    {
      "epoch": 0.22242779319560077,
      "grad_norm": 0.25889837741851807,
      "learning_rate": 8.954105987750832e-05,
      "loss": 0.7674,
      "step": 1082
    },
    {
      "epoch": 0.22263336416897933,
      "grad_norm": 0.24961018562316895,
      "learning_rate": 8.953963592808733e-05,
      "loss": 0.7232,
      "step": 1083
    },
    {
      "epoch": 0.2228389351423579,
      "grad_norm": 0.2539832293987274,
      "learning_rate": 8.953820978440664e-05,
      "loss": 0.7559,
      "step": 1084
    },
    {
      "epoch": 0.22304450611573645,
      "grad_norm": 0.23905551433563232,
      "learning_rate": 8.953678144653653e-05,
      "loss": 0.7211,
      "step": 1085
    },
    {
      "epoch": 0.223250077089115,
      "grad_norm": 0.24047812819480896,
      "learning_rate": 8.953535091454735e-05,
      "loss": 0.7367,
      "step": 1086
    },
    {
      "epoch": 0.22345564806249357,
      "grad_norm": 0.25583919882774353,
      "learning_rate": 8.953391818850961e-05,
      "loss": 0.7573,
      "step": 1087
    },
    {
      "epoch": 0.22366121903587213,
      "grad_norm": 0.20065194368362427,
      "learning_rate": 8.953248326849386e-05,
      "loss": 0.5804,
      "step": 1088
    },
    {
      "epoch": 0.2238667900092507,
      "grad_norm": 0.18610531091690063,
      "learning_rate": 8.953104615457081e-05,
      "loss": 0.5888,
      "step": 1089
    },
    {
      "epoch": 0.22407236098262925,
      "grad_norm": 0.15629194676876068,
      "learning_rate": 8.952960684681125e-05,
      "loss": 0.5884,
      "step": 1090
    },
    {
      "epoch": 0.22427793195600781,
      "grad_norm": 0.3306218683719635,
      "learning_rate": 8.952816534528609e-05,
      "loss": 0.7454,
      "step": 1091
    },
    {
      "epoch": 0.22448350292938637,
      "grad_norm": 0.26848849654197693,
      "learning_rate": 8.952672165006635e-05,
      "loss": 0.7336,
      "step": 1092
    },
    {
      "epoch": 0.22468907390276494,
      "grad_norm": 0.20548087358474731,
      "learning_rate": 8.952527576122315e-05,
      "loss": 0.5992,
      "step": 1093
    },
    {
      "epoch": 0.2248946448761435,
      "grad_norm": 0.18607185781002045,
      "learning_rate": 8.952382767882773e-05,
      "loss": 0.5666,
      "step": 1094
    },
    {
      "epoch": 0.22510021584952206,
      "grad_norm": 0.16436809301376343,
      "learning_rate": 8.952237740295141e-05,
      "loss": 0.595,
      "step": 1095
    },
    {
      "epoch": 0.22530578682290062,
      "grad_norm": 0.46899160742759705,
      "learning_rate": 8.952092493366567e-05,
      "loss": 0.7777,
      "step": 1096
    },
    {
      "epoch": 0.22551135779627918,
      "grad_norm": 0.2985895276069641,
      "learning_rate": 8.951947027104205e-05,
      "loss": 0.7495,
      "step": 1097
    },
    {
      "epoch": 0.22571692876965774,
      "grad_norm": 0.319159597158432,
      "learning_rate": 8.95180134151522e-05,
      "loss": 0.7469,
      "step": 1098
    },
    {
      "epoch": 0.22592249974303627,
      "grad_norm": 0.324747771024704,
      "learning_rate": 8.95165543660679e-05,
      "loss": 0.7239,
      "step": 1099
    },
    {
      "epoch": 0.22612807071641483,
      "grad_norm": 0.5259039402008057,
      "learning_rate": 8.951509312386105e-05,
      "loss": 0.6189,
      "step": 1100
    },
    {
      "epoch": 0.2263336416897934,
      "grad_norm": 0.2236146181821823,
      "learning_rate": 8.951362968860361e-05,
      "loss": 0.5996,
      "step": 1101
    },
    {
      "epoch": 0.22653921266317195,
      "grad_norm": 0.4835422933101654,
      "learning_rate": 8.95121640603677e-05,
      "loss": 0.7662,
      "step": 1102
    },
    {
      "epoch": 0.2267447836365505,
      "grad_norm": 0.371629923582077,
      "learning_rate": 8.951069623922552e-05,
      "loss": 0.7393,
      "step": 1103
    },
    {
      "epoch": 0.22695035460992907,
      "grad_norm": 0.2967519164085388,
      "learning_rate": 8.950922622524938e-05,
      "loss": 0.7547,
      "step": 1104
    },
    {
      "epoch": 0.22715592558330763,
      "grad_norm": 0.3473425507545471,
      "learning_rate": 8.950775401851169e-05,
      "loss": 0.7603,
      "step": 1105
    },
    {
      "epoch": 0.2273614965566862,
      "grad_norm": 0.3515138030052185,
      "learning_rate": 8.950627961908499e-05,
      "loss": 0.729,
      "step": 1106
    },
    {
      "epoch": 0.22756706753006475,
      "grad_norm": 0.3210054039955139,
      "learning_rate": 8.950480302704193e-05,
      "loss": 0.7565,
      "step": 1107
    },
    {
      "epoch": 0.22777263850344331,
      "grad_norm": 0.5195302367210388,
      "learning_rate": 8.950332424245522e-05,
      "loss": 0.6351,
      "step": 1108
    },
    {
      "epoch": 0.22797820947682187,
      "grad_norm": 0.3467387557029724,
      "learning_rate": 8.950184326539775e-05,
      "loss": 0.7554,
      "step": 1109
    },
    {
      "epoch": 0.22818378045020044,
      "grad_norm": 0.33716848492622375,
      "learning_rate": 8.950036009594245e-05,
      "loss": 0.7558,
      "step": 1110
    },
    {
      "epoch": 0.228389351423579,
      "grad_norm": 0.27896901965141296,
      "learning_rate": 8.94988747341624e-05,
      "loss": 0.7455,
      "step": 1111
    },
    {
      "epoch": 0.22859492239695756,
      "grad_norm": 0.27595579624176025,
      "learning_rate": 8.949738718013078e-05,
      "loss": 0.7425,
      "step": 1112
    },
    {
      "epoch": 0.22880049337033612,
      "grad_norm": 0.29621824622154236,
      "learning_rate": 8.949589743392089e-05,
      "loss": 0.7416,
      "step": 1113
    },
    {
      "epoch": 0.22900606434371468,
      "grad_norm": 0.28054726123809814,
      "learning_rate": 8.94944054956061e-05,
      "loss": 0.7538,
      "step": 1114
    },
    {
      "epoch": 0.22921163531709324,
      "grad_norm": 0.25396206974983215,
      "learning_rate": 8.949291136525991e-05,
      "loss": 0.7479,
      "step": 1115
    },
    {
      "epoch": 0.2294172062904718,
      "grad_norm": 0.2706109881401062,
      "learning_rate": 8.949141504295594e-05,
      "loss": 0.7475,
      "step": 1116
    },
    {
      "epoch": 0.22962277726385033,
      "grad_norm": 0.26184260845184326,
      "learning_rate": 8.94899165287679e-05,
      "loss": 0.7383,
      "step": 1117
    },
    {
      "epoch": 0.2298283482372289,
      "grad_norm": 0.2610413134098053,
      "learning_rate": 8.948841582276963e-05,
      "loss": 0.7384,
      "step": 1118
    },
    {
      "epoch": 0.23003391921060745,
      "grad_norm": 0.2537980079650879,
      "learning_rate": 8.948691292503504e-05,
      "loss": 0.7444,
      "step": 1119
    },
    {
      "epoch": 0.230239490183986,
      "grad_norm": 0.2602024972438812,
      "learning_rate": 8.948540783563817e-05,
      "loss": 0.7306,
      "step": 1120
    },
    {
      "epoch": 0.23044506115736457,
      "grad_norm": 0.3567192256450653,
      "learning_rate": 8.94839005546532e-05,
      "loss": 0.604,
      "step": 1121
    },
    {
      "epoch": 0.23065063213074313,
      "grad_norm": 0.49138790369033813,
      "learning_rate": 8.948239108215437e-05,
      "loss": 0.7303,
      "step": 1122
    },
    {
      "epoch": 0.2308562031041217,
      "grad_norm": 0.30943894386291504,
      "learning_rate": 8.948087941821603e-05,
      "loss": 0.7535,
      "step": 1123
    },
    {
      "epoch": 0.23106177407750025,
      "grad_norm": 0.25115516781806946,
      "learning_rate": 8.947936556291267e-05,
      "loss": 0.7416,
      "step": 1124
    },
    {
      "epoch": 0.23126734505087881,
      "grad_norm": 0.24797074496746063,
      "learning_rate": 8.947784951631886e-05,
      "loss": 0.7328,
      "step": 1125
    },
    {
      "epoch": 0.23147291602425737,
      "grad_norm": 0.25195595622062683,
      "learning_rate": 8.94763312785093e-05,
      "loss": 0.7375,
      "step": 1126
    },
    {
      "epoch": 0.23167848699763594,
      "grad_norm": 0.20428021252155304,
      "learning_rate": 8.947481084955877e-05,
      "loss": 0.61,
      "step": 1127
    },
    {
      "epoch": 0.2318840579710145,
      "grad_norm": 0.27424702048301697,
      "learning_rate": 8.947328822954218e-05,
      "loss": 0.7512,
      "step": 1128
    },
    {
      "epoch": 0.23208962894439306,
      "grad_norm": 0.26351961493492126,
      "learning_rate": 8.947176341853455e-05,
      "loss": 0.7584,
      "step": 1129
    },
    {
      "epoch": 0.23229519991777162,
      "grad_norm": 0.25228413939476013,
      "learning_rate": 8.947023641661101e-05,
      "loss": 0.7629,
      "step": 1130
    },
    {
      "epoch": 0.23250077089115018,
      "grad_norm": 0.24488292634487152,
      "learning_rate": 8.946870722384676e-05,
      "loss": 0.7501,
      "step": 1131
    },
    {
      "epoch": 0.23270634186452874,
      "grad_norm": 0.2597258388996124,
      "learning_rate": 8.946717584031716e-05,
      "loss": 0.7408,
      "step": 1132
    },
    {
      "epoch": 0.2329119128379073,
      "grad_norm": 0.25343239307403564,
      "learning_rate": 8.946564226609764e-05,
      "loss": 0.7186,
      "step": 1133
    },
    {
      "epoch": 0.23311748381128586,
      "grad_norm": 0.24788786470890045,
      "learning_rate": 8.946410650126376e-05,
      "loss": 0.6838,
      "step": 1134
    },
    {
      "epoch": 0.23332305478466442,
      "grad_norm": 0.18649965524673462,
      "learning_rate": 8.946256854589118e-05,
      "loss": 0.6325,
      "step": 1135
    },
    {
      "epoch": 0.23352862575804295,
      "grad_norm": 0.26197314262390137,
      "learning_rate": 8.946102840005568e-05,
      "loss": 0.7428,
      "step": 1136
    },
    {
      "epoch": 0.2337341967314215,
      "grad_norm": 0.25486642122268677,
      "learning_rate": 8.94594860638331e-05,
      "loss": 0.7505,
      "step": 1137
    },
    {
      "epoch": 0.23393976770480007,
      "grad_norm": 0.2388404756784439,
      "learning_rate": 8.945794153729945e-05,
      "loss": 0.7296,
      "step": 1138
    },
    {
      "epoch": 0.23414533867817863,
      "grad_norm": 0.2506440579891205,
      "learning_rate": 8.945639482053081e-05,
      "loss": 0.7501,
      "step": 1139
    },
    {
      "epoch": 0.2343509096515572,
      "grad_norm": 0.2521236538887024,
      "learning_rate": 8.94548459136034e-05,
      "loss": 0.7488,
      "step": 1140
    },
    {
      "epoch": 0.23455648062493575,
      "grad_norm": 0.25158312916755676,
      "learning_rate": 8.94532948165935e-05,
      "loss": 0.7274,
      "step": 1141
    },
    {
      "epoch": 0.23476205159831431,
      "grad_norm": 0.23634850978851318,
      "learning_rate": 8.945174152957755e-05,
      "loss": 0.7306,
      "step": 1142
    },
    {
      "epoch": 0.23496762257169287,
      "grad_norm": 0.1795545369386673,
      "learning_rate": 8.945018605263205e-05,
      "loss": 0.5908,
      "step": 1143
    },
    {
      "epoch": 0.23517319354507144,
      "grad_norm": 0.26744595170021057,
      "learning_rate": 8.944862838583364e-05,
      "loss": 0.747,
      "step": 1144
    },
    {
      "epoch": 0.23537876451845,
      "grad_norm": 0.23531249165534973,
      "learning_rate": 8.944706852925908e-05,
      "loss": 0.7097,
      "step": 1145
    },
    {
      "epoch": 0.23558433549182856,
      "grad_norm": 0.2423231452703476,
      "learning_rate": 8.944550648298519e-05,
      "loss": 0.7536,
      "step": 1146
    },
    {
      "epoch": 0.23578990646520712,
      "grad_norm": 0.24406969547271729,
      "learning_rate": 8.944394224708892e-05,
      "loss": 0.7459,
      "step": 1147
    },
    {
      "epoch": 0.23599547743858568,
      "grad_norm": 0.2516055405139923,
      "learning_rate": 8.944237582164736e-05,
      "loss": 0.748,
      "step": 1148
    },
    {
      "epoch": 0.23620104841196424,
      "grad_norm": 0.23662374913692474,
      "learning_rate": 8.944080720673766e-05,
      "loss": 0.7272,
      "step": 1149
    },
    {
      "epoch": 0.2364066193853428,
      "grad_norm": 0.25914058089256287,
      "learning_rate": 8.943923640243712e-05,
      "loss": 0.7286,
      "step": 1150
    },
    {
      "epoch": 0.23661219035872136,
      "grad_norm": 0.16088080406188965,
      "learning_rate": 8.943766340882309e-05,
      "loss": 0.5913,
      "step": 1151
    },
    {
      "epoch": 0.23681776133209992,
      "grad_norm": 0.15930064022541046,
      "learning_rate": 8.943608822597309e-05,
      "loss": 0.5927,
      "step": 1152
    },
    {
      "epoch": 0.23702333230547848,
      "grad_norm": 0.2877768576145172,
      "learning_rate": 8.943451085396473e-05,
      "loss": 0.7462,
      "step": 1153
    },
    {
      "epoch": 0.237228903278857,
      "grad_norm": 0.2618594169616699,
      "learning_rate": 8.94329312928757e-05,
      "loss": 0.7506,
      "step": 1154
    },
    {
      "epoch": 0.23743447425223557,
      "grad_norm": 0.24599005281925201,
      "learning_rate": 8.943134954278383e-05,
      "loss": 0.7052,
      "step": 1155
    },
    {
      "epoch": 0.23764004522561413,
      "grad_norm": 0.2675454318523407,
      "learning_rate": 8.942976560376703e-05,
      "loss": 0.7396,
      "step": 1156
    },
    {
      "epoch": 0.2378456161989927,
      "grad_norm": 0.2358483374118805,
      "learning_rate": 8.942817947590333e-05,
      "loss": 0.7131,
      "step": 1157
    },
    {
      "epoch": 0.23805118717237125,
      "grad_norm": 0.24510863423347473,
      "learning_rate": 8.94265911592709e-05,
      "loss": 0.735,
      "step": 1158
    },
    {
      "epoch": 0.23825675814574981,
      "grad_norm": 0.24396325647830963,
      "learning_rate": 8.942500065394798e-05,
      "loss": 0.7286,
      "step": 1159
    },
    {
      "epoch": 0.23846232911912837,
      "grad_norm": 0.24989542365074158,
      "learning_rate": 8.942340796001291e-05,
      "loss": 0.7614,
      "step": 1160
    },
    {
      "epoch": 0.23866790009250693,
      "grad_norm": 0.22477596998214722,
      "learning_rate": 8.942181307754416e-05,
      "loss": 0.7065,
      "step": 1161
    },
    {
      "epoch": 0.2388734710658855,
      "grad_norm": 0.27181369066238403,
      "learning_rate": 8.942021600662033e-05,
      "loss": 0.7612,
      "step": 1162
    },
    {
      "epoch": 0.23907904203926406,
      "grad_norm": 0.2516171336174011,
      "learning_rate": 8.941861674732005e-05,
      "loss": 0.7506,
      "step": 1163
    },
    {
      "epoch": 0.23928461301264262,
      "grad_norm": 0.23005805909633636,
      "learning_rate": 8.941701529972216e-05,
      "loss": 0.7287,
      "step": 1164
    },
    {
      "epoch": 0.23949018398602118,
      "grad_norm": 0.24049928784370422,
      "learning_rate": 8.941541166390549e-05,
      "loss": 0.7337,
      "step": 1165
    },
    {
      "epoch": 0.23969575495939974,
      "grad_norm": 0.2356685847043991,
      "learning_rate": 8.941380583994912e-05,
      "loss": 0.7066,
      "step": 1166
    },
    {
      "epoch": 0.2399013259327783,
      "grad_norm": 0.21500107645988464,
      "learning_rate": 8.941219782793211e-05,
      "loss": 0.5845,
      "step": 1167
    },
    {
      "epoch": 0.24010689690615686,
      "grad_norm": 0.24245062470436096,
      "learning_rate": 8.941058762793371e-05,
      "loss": 0.7339,
      "step": 1168
    },
    {
      "epoch": 0.24031246787953542,
      "grad_norm": 0.24114523828029633,
      "learning_rate": 8.940897524003322e-05,
      "loss": 0.7167,
      "step": 1169
    },
    {
      "epoch": 0.24051803885291398,
      "grad_norm": 0.2341417521238327,
      "learning_rate": 8.94073606643101e-05,
      "loss": 0.7557,
      "step": 1170
    },
    {
      "epoch": 0.24072360982629254,
      "grad_norm": 0.24253100156784058,
      "learning_rate": 8.940574390084385e-05,
      "loss": 0.7522,
      "step": 1171
    },
    {
      "epoch": 0.2409291807996711,
      "grad_norm": 0.17679694294929504,
      "learning_rate": 8.940412494971418e-05,
      "loss": 0.5978,
      "step": 1172
    },
    {
      "epoch": 0.24113475177304963,
      "grad_norm": 0.2966403067111969,
      "learning_rate": 8.940250381100081e-05,
      "loss": 0.7489,
      "step": 1173
    },
    {
      "epoch": 0.2413403227464282,
      "grad_norm": 0.2602713108062744,
      "learning_rate": 8.94008804847836e-05,
      "loss": 0.737,
      "step": 1174
    },
    {
      "epoch": 0.24154589371980675,
      "grad_norm": 0.24620187282562256,
      "learning_rate": 8.939925497114255e-05,
      "loss": 0.7612,
      "step": 1175
    },
    {
      "epoch": 0.24175146469318531,
      "grad_norm": 1.3907586336135864,
      "learning_rate": 8.939762727015773e-05,
      "loss": 0.7424,
      "step": 1176
    },
    {
      "epoch": 0.24195703566656387,
      "grad_norm": 0.25489339232444763,
      "learning_rate": 8.939599738190933e-05,
      "loss": 0.7292,
      "step": 1177
    },
    {
      "epoch": 0.24216260663994243,
      "grad_norm": 0.24630793929100037,
      "learning_rate": 8.939436530647765e-05,
      "loss": 0.7201,
      "step": 1178
    },
    {
      "epoch": 0.242368177613321,
      "grad_norm": 0.2420111447572708,
      "learning_rate": 8.939273104394307e-05,
      "loss": 0.7593,
      "step": 1179
    },
    {
      "epoch": 0.24257374858669956,
      "grad_norm": 0.24446842074394226,
      "learning_rate": 8.939109459438614e-05,
      "loss": 0.7191,
      "step": 1180
    },
    {
      "epoch": 0.24277931956007812,
      "grad_norm": 0.2652778625488281,
      "learning_rate": 8.938945595788746e-05,
      "loss": 0.7417,
      "step": 1181
    },
    {
      "epoch": 0.24298489053345668,
      "grad_norm": 0.2472565621137619,
      "learning_rate": 8.938781513452775e-05,
      "loss": 0.7128,
      "step": 1182
    },
    {
      "epoch": 0.24319046150683524,
      "grad_norm": 0.25744304060935974,
      "learning_rate": 8.938617212438786e-05,
      "loss": 0.7433,
      "step": 1183
    },
    {
      "epoch": 0.2433960324802138,
      "grad_norm": 0.2481434941291809,
      "learning_rate": 8.938452692754874e-05,
      "loss": 0.6043,
      "step": 1184
    },
    {
      "epoch": 0.24360160345359236,
      "grad_norm": 0.27799829840660095,
      "learning_rate": 8.938287954409143e-05,
      "loss": 0.7457,
      "step": 1185
    },
    {
      "epoch": 0.24380717442697092,
      "grad_norm": 0.1753695160150528,
      "learning_rate": 8.938122997409709e-05,
      "loss": 0.5978,
      "step": 1186
    },
    {
      "epoch": 0.24401274540034948,
      "grad_norm": 0.16633495688438416,
      "learning_rate": 8.937957821764698e-05,
      "loss": 0.6047,
      "step": 1187
    },
    {
      "epoch": 0.24421831637372804,
      "grad_norm": 0.2707998752593994,
      "learning_rate": 8.937792427482249e-05,
      "loss": 0.7181,
      "step": 1188
    },
    {
      "epoch": 0.2444238873471066,
      "grad_norm": 0.1617717742919922,
      "learning_rate": 8.937626814570507e-05,
      "loss": 0.6032,
      "step": 1189
    },
    {
      "epoch": 0.24462945832048516,
      "grad_norm": 0.15513579547405243,
      "learning_rate": 8.937460983037636e-05,
      "loss": 0.5983,
      "step": 1190
    },
    {
      "epoch": 0.2448350292938637,
      "grad_norm": 0.2588478624820709,
      "learning_rate": 8.9372949328918e-05,
      "loss": 0.7395,
      "step": 1191
    },
    {
      "epoch": 0.24504060026724225,
      "grad_norm": 0.2583847939968109,
      "learning_rate": 8.937128664141184e-05,
      "loss": 0.7442,
      "step": 1192
    },
    {
      "epoch": 0.2452461712406208,
      "grad_norm": 0.23951515555381775,
      "learning_rate": 8.936962176793979e-05,
      "loss": 0.7309,
      "step": 1193
    },
    {
      "epoch": 0.24545174221399937,
      "grad_norm": 0.23284120857715607,
      "learning_rate": 8.936795470858385e-05,
      "loss": 0.7122,
      "step": 1194
    },
    {
      "epoch": 0.24565731318737793,
      "grad_norm": 0.2364392876625061,
      "learning_rate": 8.936628546342617e-05,
      "loss": 0.7452,
      "step": 1195
    },
    {
      "epoch": 0.2458628841607565,
      "grad_norm": 0.19968503713607788,
      "learning_rate": 8.936461403254895e-05,
      "loss": 0.6054,
      "step": 1196
    },
    {
      "epoch": 0.24606845513413506,
      "grad_norm": 0.25698399543762207,
      "learning_rate": 8.936294041603457e-05,
      "loss": 0.7542,
      "step": 1197
    },
    {
      "epoch": 0.24627402610751362,
      "grad_norm": 0.2551160454750061,
      "learning_rate": 8.936126461396545e-05,
      "loss": 0.729,
      "step": 1198
    },
    {
      "epoch": 0.24647959708089218,
      "grad_norm": 0.2407594472169876,
      "learning_rate": 8.935958662642419e-05,
      "loss": 0.7331,
      "step": 1199
    },
    {
      "epoch": 0.24668516805427074,
      "grad_norm": 0.19667823612689972,
      "learning_rate": 8.935790645349342e-05,
      "loss": 0.5818,
      "step": 1200
    },
    {
      "epoch": 0.2468907390276493,
      "grad_norm": 0.25005340576171875,
      "learning_rate": 8.935622409525593e-05,
      "loss": 0.7355,
      "step": 1201
    },
    {
      "epoch": 0.24709631000102786,
      "grad_norm": 0.15851576626300812,
      "learning_rate": 8.93545395517946e-05,
      "loss": 0.6147,
      "step": 1202
    },
    {
      "epoch": 0.24730188097440642,
      "grad_norm": 0.2595955431461334,
      "learning_rate": 8.935285282319242e-05,
      "loss": 0.7344,
      "step": 1203
    },
    {
      "epoch": 0.24750745194778498,
      "grad_norm": 0.2531373202800751,
      "learning_rate": 8.935116390953249e-05,
      "loss": 0.7206,
      "step": 1204
    },
    {
      "epoch": 0.24771302292116354,
      "grad_norm": 0.2330513596534729,
      "learning_rate": 8.9349472810898e-05,
      "loss": 0.7487,
      "step": 1205
    },
    {
      "epoch": 0.2479185938945421,
      "grad_norm": 0.23262523114681244,
      "learning_rate": 8.934777952737228e-05,
      "loss": 0.7268,
      "step": 1206
    },
    {
      "epoch": 0.24812416486792066,
      "grad_norm": 0.2461225688457489,
      "learning_rate": 8.934608405903875e-05,
      "loss": 0.7272,
      "step": 1207
    },
    {
      "epoch": 0.24832973584129922,
      "grad_norm": 0.23531411588191986,
      "learning_rate": 8.934438640598092e-05,
      "loss": 0.7249,
      "step": 1208
    },
    {
      "epoch": 0.24853530681467778,
      "grad_norm": 0.19100695848464966,
      "learning_rate": 8.934268656828244e-05,
      "loss": 0.6049,
      "step": 1209
    },
    {
      "epoch": 0.2487408777880563,
      "grad_norm": 0.25513240694999695,
      "learning_rate": 8.934098454602704e-05,
      "loss": 0.7281,
      "step": 1210
    },
    {
      "epoch": 0.24894644876143487,
      "grad_norm": 0.24409835040569305,
      "learning_rate": 8.93392803392986e-05,
      "loss": 0.7533,
      "step": 1211
    },
    {
      "epoch": 0.24915201973481343,
      "grad_norm": 0.24540594220161438,
      "learning_rate": 8.933757394818104e-05,
      "loss": 0.7218,
      "step": 1212
    },
    {
      "epoch": 0.249357590708192,
      "grad_norm": 0.24975821375846863,
      "learning_rate": 8.933586537275846e-05,
      "loss": 0.7528,
      "step": 1213
    },
    {
      "epoch": 0.24956316168157056,
      "grad_norm": 0.17961885035037994,
      "learning_rate": 8.933415461311502e-05,
      "loss": 0.5881,
      "step": 1214
    },
    {
      "epoch": 0.24976873265494912,
      "grad_norm": 0.26504039764404297,
      "learning_rate": 8.9332441669335e-05,
      "loss": 0.7393,
      "step": 1215
    },
    {
      "epoch": 0.24997430362832768,
      "grad_norm": 0.24959856271743774,
      "learning_rate": 8.933072654150277e-05,
      "loss": 0.7333,
      "step": 1216
    },
    {
      "epoch": 0.25017987460170626,
      "grad_norm": 0.25788456201553345,
      "learning_rate": 8.932900922970287e-05,
      "loss": 0.7524,
      "step": 1217
    },
    {
      "epoch": 0.2503854455750848,
      "grad_norm": 0.2299453467130661,
      "learning_rate": 8.932728973401986e-05,
      "loss": 0.7532,
      "step": 1218
    },
    {
      "epoch": 0.25059101654846333,
      "grad_norm": 0.23602120578289032,
      "learning_rate": 8.932556805453847e-05,
      "loss": 0.7446,
      "step": 1219
    },
    {
      "epoch": 0.2507965875218419,
      "grad_norm": 0.24988947808742523,
      "learning_rate": 8.932384419134352e-05,
      "loss": 0.7275,
      "step": 1220
    },
    {
      "epoch": 0.25100215849522045,
      "grad_norm": 0.22750410437583923,
      "learning_rate": 8.932211814451995e-05,
      "loss": 0.7284,
      "step": 1221
    },
    {
      "epoch": 0.25120772946859904,
      "grad_norm": 0.22385790944099426,
      "learning_rate": 8.932038991415277e-05,
      "loss": 0.753,
      "step": 1222
    },
    {
      "epoch": 0.25141330044197757,
      "grad_norm": 0.22648993134498596,
      "learning_rate": 8.931865950032713e-05,
      "loss": 0.7171,
      "step": 1223
    },
    {
      "epoch": 0.25161887141535616,
      "grad_norm": 0.22896623611450195,
      "learning_rate": 8.931692690312828e-05,
      "loss": 0.7164,
      "step": 1224
    },
    {
      "epoch": 0.2518244423887347,
      "grad_norm": 0.2378738969564438,
      "learning_rate": 8.931519212264157e-05,
      "loss": 0.6969,
      "step": 1225
    },
    {
      "epoch": 0.2520300133621133,
      "grad_norm": 0.23377791047096252,
      "learning_rate": 8.931345515895248e-05,
      "loss": 0.7102,
      "step": 1226
    },
    {
      "epoch": 0.2522355843354918,
      "grad_norm": 0.23156873881816864,
      "learning_rate": 8.93117160121466e-05,
      "loss": 0.7426,
      "step": 1227
    },
    {
      "epoch": 0.2524411553088704,
      "grad_norm": 0.2447620928287506,
      "learning_rate": 8.930997468230956e-05,
      "loss": 0.7254,
      "step": 1228
    },
    {
      "epoch": 0.25264672628224893,
      "grad_norm": 0.24257569015026093,
      "learning_rate": 8.930823116952717e-05,
      "loss": 0.7551,
      "step": 1229
    },
    {
      "epoch": 0.2528522972556275,
      "grad_norm": 0.23060962557792664,
      "learning_rate": 8.930648547388534e-05,
      "loss": 0.7411,
      "step": 1230
    },
    {
      "epoch": 0.25305786822900606,
      "grad_norm": 0.23297728598117828,
      "learning_rate": 8.930473759547005e-05,
      "loss": 0.731,
      "step": 1231
    },
    {
      "epoch": 0.25326343920238464,
      "grad_norm": 0.18401369452476501,
      "learning_rate": 8.930298753436741e-05,
      "loss": 0.6025,
      "step": 1232
    },
    {
      "epoch": 0.2534690101757632,
      "grad_norm": 0.25541701912879944,
      "learning_rate": 8.930123529066365e-05,
      "loss": 0.7314,
      "step": 1233
    },
    {
      "epoch": 0.25367458114914176,
      "grad_norm": 0.2430264949798584,
      "learning_rate": 8.929948086444512e-05,
      "loss": 0.7115,
      "step": 1234
    },
    {
      "epoch": 0.2538801521225203,
      "grad_norm": 0.2397884875535965,
      "learning_rate": 8.929772425579818e-05,
      "loss": 0.7065,
      "step": 1235
    },
    {
      "epoch": 0.2540857230958989,
      "grad_norm": 0.2442830502986908,
      "learning_rate": 8.929596546480944e-05,
      "loss": 0.7252,
      "step": 1236
    },
    {
      "epoch": 0.2542912940692774,
      "grad_norm": 0.2494584023952484,
      "learning_rate": 8.92942044915655e-05,
      "loss": 0.7292,
      "step": 1237
    },
    {
      "epoch": 0.25449686504265595,
      "grad_norm": 0.23975245654582977,
      "learning_rate": 8.929244133615314e-05,
      "loss": 0.7256,
      "step": 1238
    },
    {
      "epoch": 0.25470243601603454,
      "grad_norm": 0.24557578563690186,
      "learning_rate": 8.929067599865924e-05,
      "loss": 0.7126,
      "step": 1239
    },
    {
      "epoch": 0.25490800698941307,
      "grad_norm": 0.2466876208782196,
      "learning_rate": 8.928890847917073e-05,
      "loss": 0.7397,
      "step": 1240
    },
    {
      "epoch": 0.25511357796279166,
      "grad_norm": 0.236251562833786,
      "learning_rate": 8.92871387777747e-05,
      "loss": 0.7578,
      "step": 1241
    },
    {
      "epoch": 0.2553191489361702,
      "grad_norm": 0.23271340131759644,
      "learning_rate": 8.928536689455835e-05,
      "loss": 0.7126,
      "step": 1242
    },
    {
      "epoch": 0.2555247199095488,
      "grad_norm": 0.2597436010837555,
      "learning_rate": 8.928359282960896e-05,
      "loss": 0.7506,
      "step": 1243
    },
    {
      "epoch": 0.2557302908829273,
      "grad_norm": 0.2491491734981537,
      "learning_rate": 8.928181658301394e-05,
      "loss": 0.7396,
      "step": 1244
    },
    {
      "epoch": 0.2559358618563059,
      "grad_norm": 0.2302912026643753,
      "learning_rate": 8.928003815486078e-05,
      "loss": 0.7074,
      "step": 1245
    },
    {
      "epoch": 0.25614143282968443,
      "grad_norm": 0.22792287170886993,
      "learning_rate": 8.927825754523711e-05,
      "loss": 0.705,
      "step": 1246
    },
    {
      "epoch": 0.256347003803063,
      "grad_norm": 0.20026971399784088,
      "learning_rate": 8.927647475423064e-05,
      "loss": 0.597,
      "step": 1247
    },
    {
      "epoch": 0.25655257477644156,
      "grad_norm": 0.2631547749042511,
      "learning_rate": 8.92746897819292e-05,
      "loss": 0.7552,
      "step": 1248
    },
    {
      "epoch": 0.25675814574982014,
      "grad_norm": 0.24641458690166473,
      "learning_rate": 8.927290262842075e-05,
      "loss": 0.7049,
      "step": 1249
    },
    {
      "epoch": 0.2569637167231987,
      "grad_norm": 0.24111877381801605,
      "learning_rate": 8.927111329379331e-05,
      "loss": 0.7467,
      "step": 1250
    },
    {
      "epoch": 0.25716928769657726,
      "grad_norm": 0.23682504892349243,
      "learning_rate": 8.926932177813505e-05,
      "loss": 0.7529,
      "step": 1251
    },
    {
      "epoch": 0.2573748586699558,
      "grad_norm": 0.2335578352212906,
      "learning_rate": 8.92675280815342e-05,
      "loss": 0.7186,
      "step": 1252
    },
    {
      "epoch": 0.2575804296433344,
      "grad_norm": 0.25901028513908386,
      "learning_rate": 8.926573220407918e-05,
      "loss": 0.7339,
      "step": 1253
    },
    {
      "epoch": 0.2577860006167129,
      "grad_norm": 0.2469077706336975,
      "learning_rate": 8.92639341458584e-05,
      "loss": 0.744,
      "step": 1254
    },
    {
      "epoch": 0.2579915715900915,
      "grad_norm": 0.17402611672878265,
      "learning_rate": 8.926213390696048e-05,
      "loss": 0.5948,
      "step": 1255
    },
    {
      "epoch": 0.25819714256347004,
      "grad_norm": 0.2638707160949707,
      "learning_rate": 8.926033148747412e-05,
      "loss": 0.7456,
      "step": 1256
    },
    {
      "epoch": 0.25840271353684857,
      "grad_norm": 0.15191468596458435,
      "learning_rate": 8.925852688748808e-05,
      "loss": 0.6055,
      "step": 1257
    },
    {
      "epoch": 0.25860828451022716,
      "grad_norm": 0.25375521183013916,
      "learning_rate": 8.92567201070913e-05,
      "loss": 0.7441,
      "step": 1258
    },
    {
      "epoch": 0.2588138554836057,
      "grad_norm": 0.24398963153362274,
      "learning_rate": 8.925491114637277e-05,
      "loss": 0.7551,
      "step": 1259
    },
    {
      "epoch": 0.2590194264569843,
      "grad_norm": 0.15817205607891083,
      "learning_rate": 8.925310000542161e-05,
      "loss": 0.5987,
      "step": 1260
    },
    {
      "epoch": 0.2592249974303628,
      "grad_norm": 0.15531690418720245,
      "learning_rate": 8.925128668432705e-05,
      "loss": 0.5948,
      "step": 1261
    },
    {
      "epoch": 0.2594305684037414,
      "grad_norm": 0.25315144658088684,
      "learning_rate": 8.924947118317844e-05,
      "loss": 0.7374,
      "step": 1262
    },
    {
      "epoch": 0.25963613937711993,
      "grad_norm": 0.24230562150478363,
      "learning_rate": 8.924765350206519e-05,
      "loss": 0.7363,
      "step": 1263
    },
    {
      "epoch": 0.2598417103504985,
      "grad_norm": 0.22478878498077393,
      "learning_rate": 8.924583364107687e-05,
      "loss": 0.7269,
      "step": 1264
    },
    {
      "epoch": 0.26004728132387706,
      "grad_norm": 0.24388407170772552,
      "learning_rate": 8.924401160030313e-05,
      "loss": 0.7349,
      "step": 1265
    },
    {
      "epoch": 0.26025285229725564,
      "grad_norm": 0.24955937266349792,
      "learning_rate": 8.924218737983373e-05,
      "loss": 0.73,
      "step": 1266
    },
    {
      "epoch": 0.2604584232706342,
      "grad_norm": 0.24500887095928192,
      "learning_rate": 8.924036097975856e-05,
      "loss": 0.7247,
      "step": 1267
    },
    {
      "epoch": 0.26066399424401276,
      "grad_norm": 0.20046253502368927,
      "learning_rate": 8.923853240016757e-05,
      "loss": 0.5842,
      "step": 1268
    },
    {
      "epoch": 0.2608695652173913,
      "grad_norm": 0.25663238763809204,
      "learning_rate": 8.923670164115087e-05,
      "loss": 0.7296,
      "step": 1269
    },
    {
      "epoch": 0.2610751361907699,
      "grad_norm": 0.25753530859947205,
      "learning_rate": 8.923486870279863e-05,
      "loss": 0.7367,
      "step": 1270
    },
    {
      "epoch": 0.2612807071641484,
      "grad_norm": 0.23126912117004395,
      "learning_rate": 8.923303358520117e-05,
      "loss": 0.7257,
      "step": 1271
    },
    {
      "epoch": 0.261486278137527,
      "grad_norm": 0.24083848297595978,
      "learning_rate": 8.923119628844889e-05,
      "loss": 0.7335,
      "step": 1272
    },
    {
      "epoch": 0.26169184911090554,
      "grad_norm": 0.17281857132911682,
      "learning_rate": 8.92293568126323e-05,
      "loss": 0.5799,
      "step": 1273
    },
    {
      "epoch": 0.26189742008428407,
      "grad_norm": 0.1575915813446045,
      "learning_rate": 8.922751515784204e-05,
      "loss": 0.5796,
      "step": 1274
    },
    {
      "epoch": 0.26210299105766266,
      "grad_norm": 0.31265151500701904,
      "learning_rate": 8.922567132416881e-05,
      "loss": 0.7426,
      "step": 1275
    },
    {
      "epoch": 0.2623085620310412,
      "grad_norm": 0.257569819688797,
      "learning_rate": 8.922382531170347e-05,
      "loss": 0.7183,
      "step": 1276
    },
    {
      "epoch": 0.2625141330044198,
      "grad_norm": 0.23766203224658966,
      "learning_rate": 8.922197712053697e-05,
      "loss": 0.7331,
      "step": 1277
    },
    {
      "epoch": 0.2627197039777983,
      "grad_norm": 0.25914183259010315,
      "learning_rate": 8.922012675076034e-05,
      "loss": 0.7342,
      "step": 1278
    },
    {
      "epoch": 0.2629252749511769,
      "grad_norm": 0.26477503776550293,
      "learning_rate": 8.921827420246473e-05,
      "loss": 0.7313,
      "step": 1279
    },
    {
      "epoch": 0.26313084592455543,
      "grad_norm": 0.3233232796192169,
      "learning_rate": 8.921641947574145e-05,
      "loss": 0.7345,
      "step": 1280
    },
    {
      "epoch": 0.263336416897934,
      "grad_norm": 0.20394398272037506,
      "learning_rate": 8.921456257068186e-05,
      "loss": 0.5848,
      "step": 1281
    },
    {
      "epoch": 0.26354198787131256,
      "grad_norm": 0.28951147198677063,
      "learning_rate": 8.921270348737741e-05,
      "loss": 0.7507,
      "step": 1282
    },
    {
      "epoch": 0.26374755884469114,
      "grad_norm": 0.26492390036582947,
      "learning_rate": 8.921084222591971e-05,
      "loss": 0.7124,
      "step": 1283
    },
    {
      "epoch": 0.2639531298180697,
      "grad_norm": 0.2661970555782318,
      "learning_rate": 8.920897878640046e-05,
      "loss": 0.7556,
      "step": 1284
    },
    {
      "epoch": 0.26415870079144826,
      "grad_norm": 0.17668524384498596,
      "learning_rate": 8.920711316891145e-05,
      "loss": 0.5874,
      "step": 1285
    },
    {
      "epoch": 0.2643642717648268,
      "grad_norm": 0.2812560796737671,
      "learning_rate": 8.92052453735446e-05,
      "loss": 0.744,
      "step": 1286
    },
    {
      "epoch": 0.2645698427382054,
      "grad_norm": 0.25487664341926575,
      "learning_rate": 8.920337540039193e-05,
      "loss": 0.7414,
      "step": 1287
    },
    {
      "epoch": 0.2647754137115839,
      "grad_norm": 0.26109081506729126,
      "learning_rate": 8.920150324954557e-05,
      "loss": 0.7305,
      "step": 1288
    },
    {
      "epoch": 0.2649809846849625,
      "grad_norm": 0.2654556334018707,
      "learning_rate": 8.919962892109772e-05,
      "loss": 0.7105,
      "step": 1289
    },
    {
      "epoch": 0.26518655565834104,
      "grad_norm": 0.25440090894699097,
      "learning_rate": 8.919775241514075e-05,
      "loss": 0.7567,
      "step": 1290
    },
    {
      "epoch": 0.2653921266317196,
      "grad_norm": 0.26158374547958374,
      "learning_rate": 8.91958737317671e-05,
      "loss": 0.7656,
      "step": 1291
    },
    {
      "epoch": 0.26559769760509816,
      "grad_norm": 0.25178900361061096,
      "learning_rate": 8.919399287106933e-05,
      "loss": 0.7342,
      "step": 1292
    },
    {
      "epoch": 0.2658032685784767,
      "grad_norm": 0.2315172553062439,
      "learning_rate": 8.91921098331401e-05,
      "loss": 0.7527,
      "step": 1293
    },
    {
      "epoch": 0.2660088395518553,
      "grad_norm": 0.2387528419494629,
      "learning_rate": 8.919022461807215e-05,
      "loss": 0.7414,
      "step": 1294
    },
    {
      "epoch": 0.2662144105252338,
      "grad_norm": 0.24964243173599243,
      "learning_rate": 8.918833722595838e-05,
      "loss": 0.7538,
      "step": 1295
    },
    {
      "epoch": 0.2664199814986124,
      "grad_norm": 0.43933603167533875,
      "learning_rate": 8.918644765689179e-05,
      "loss": 0.738,
      "step": 1296
    },
    {
      "epoch": 0.26662555247199093,
      "grad_norm": 0.23242905735969543,
      "learning_rate": 8.918455591096543e-05,
      "loss": 0.7456,
      "step": 1297
    },
    {
      "epoch": 0.2668311234453695,
      "grad_norm": 0.2441163808107376,
      "learning_rate": 8.918266198827252e-05,
      "loss": 0.7278,
      "step": 1298
    },
    {
      "epoch": 0.26703669441874806,
      "grad_norm": 0.2470923811197281,
      "learning_rate": 8.918076588890637e-05,
      "loss": 0.7274,
      "step": 1299
    },
    {
      "epoch": 0.26724226539212664,
      "grad_norm": 0.23086468875408173,
      "learning_rate": 8.917886761296039e-05,
      "loss": 0.7503,
      "step": 1300
    },
    {
      "epoch": 0.2674478363655052,
      "grad_norm": 0.24466407299041748,
      "learning_rate": 8.917696716052808e-05,
      "loss": 0.6128,
      "step": 1301
    },
    {
      "epoch": 0.26765340733888376,
      "grad_norm": 0.24658440053462982,
      "learning_rate": 8.91750645317031e-05,
      "loss": 0.7356,
      "step": 1302
    },
    {
      "epoch": 0.2678589783122623,
      "grad_norm": 0.24751920998096466,
      "learning_rate": 8.917315972657915e-05,
      "loss": 0.7394,
      "step": 1303
    },
    {
      "epoch": 0.2680645492856409,
      "grad_norm": 0.2545618414878845,
      "learning_rate": 8.91712527452501e-05,
      "loss": 0.7412,
      "step": 1304
    },
    {
      "epoch": 0.2682701202590194,
      "grad_norm": 0.23690831661224365,
      "learning_rate": 8.916934358780986e-05,
      "loss": 0.7224,
      "step": 1305
    },
    {
      "epoch": 0.268475691232398,
      "grad_norm": 0.24612128734588623,
      "learning_rate": 8.916743225435252e-05,
      "loss": 0.7441,
      "step": 1306
    },
    {
      "epoch": 0.26868126220577654,
      "grad_norm": 0.24375763535499573,
      "learning_rate": 8.916551874497223e-05,
      "loss": 0.735,
      "step": 1307
    },
    {
      "epoch": 0.2688868331791551,
      "grad_norm": 0.22968213260173798,
      "learning_rate": 8.916360305976326e-05,
      "loss": 0.7453,
      "step": 1308
    },
    {
      "epoch": 0.26909240415253366,
      "grad_norm": 0.23660656809806824,
      "learning_rate": 8.916168519881999e-05,
      "loss": 0.7201,
      "step": 1309
    },
    {
      "epoch": 0.26929797512591225,
      "grad_norm": 0.2977808713912964,
      "learning_rate": 8.915976516223691e-05,
      "loss": 0.6098,
      "step": 1310
    },
    {
      "epoch": 0.2695035460992908,
      "grad_norm": 0.2509056031703949,
      "learning_rate": 8.915784295010859e-05,
      "loss": 0.7539,
      "step": 1311
    },
    {
      "epoch": 0.2697091170726693,
      "grad_norm": 0.2543947696685791,
      "learning_rate": 8.915591856252973e-05,
      "loss": 0.7508,
      "step": 1312
    },
    {
      "epoch": 0.2699146880460479,
      "grad_norm": 0.24036121368408203,
      "learning_rate": 8.915399199959516e-05,
      "loss": 0.7149,
      "step": 1313
    },
    {
      "epoch": 0.27012025901942643,
      "grad_norm": 0.2512202560901642,
      "learning_rate": 8.915206326139978e-05,
      "loss": 0.6823,
      "step": 1314
    },
    {
      "epoch": 0.270325829992805,
      "grad_norm": 0.24787308275699615,
      "learning_rate": 8.915013234803863e-05,
      "loss": 0.7399,
      "step": 1315
    },
    {
      "epoch": 0.27053140096618356,
      "grad_norm": 0.24503572285175323,
      "learning_rate": 8.914819925960679e-05,
      "loss": 0.7347,
      "step": 1316
    },
    {
      "epoch": 0.27073697193956214,
      "grad_norm": 0.23503392934799194,
      "learning_rate": 8.914626399619951e-05,
      "loss": 0.7262,
      "step": 1317
    },
    {
      "epoch": 0.2709425429129407,
      "grad_norm": 0.23490577936172485,
      "learning_rate": 8.914432655791217e-05,
      "loss": 0.7333,
      "step": 1318
    },
    {
      "epoch": 0.27114811388631926,
      "grad_norm": 0.2428707480430603,
      "learning_rate": 8.914238694484016e-05,
      "loss": 0.7087,
      "step": 1319
    },
    {
      "epoch": 0.2713536848596978,
      "grad_norm": 0.24492257833480835,
      "learning_rate": 8.91404451570791e-05,
      "loss": 0.7164,
      "step": 1320
    },
    {
      "epoch": 0.2715592558330764,
      "grad_norm": 0.2504068911075592,
      "learning_rate": 8.913850119472461e-05,
      "loss": 0.7406,
      "step": 1321
    },
    {
      "epoch": 0.2717648268064549,
      "grad_norm": 0.24984775483608246,
      "learning_rate": 8.913655505787246e-05,
      "loss": 0.7324,
      "step": 1322
    },
    {
      "epoch": 0.2719703977798335,
      "grad_norm": 0.23938335478305817,
      "learning_rate": 8.913460674661854e-05,
      "loss": 0.7147,
      "step": 1323
    },
    {
      "epoch": 0.27217596875321204,
      "grad_norm": 0.24494026601314545,
      "learning_rate": 8.913265626105883e-05,
      "loss": 0.7476,
      "step": 1324
    },
    {
      "epoch": 0.2723815397265906,
      "grad_norm": 0.23465509712696075,
      "learning_rate": 8.913070360128941e-05,
      "loss": 0.7203,
      "step": 1325
    },
    {
      "epoch": 0.27258711069996916,
      "grad_norm": 0.2233608067035675,
      "learning_rate": 8.912874876740651e-05,
      "loss": 0.7189,
      "step": 1326
    },
    {
      "epoch": 0.27279268167334775,
      "grad_norm": 0.23633797466754913,
      "learning_rate": 8.912679175950641e-05,
      "loss": 0.7257,
      "step": 1327
    },
    {
      "epoch": 0.2729982526467263,
      "grad_norm": 0.22821030020713806,
      "learning_rate": 8.912483257768551e-05,
      "loss": 0.726,
      "step": 1328
    },
    {
      "epoch": 0.27320382362010487,
      "grad_norm": 0.2244369387626648,
      "learning_rate": 8.912287122204038e-05,
      "loss": 0.709,
      "step": 1329
    },
    {
      "epoch": 0.2734093945934834,
      "grad_norm": 0.23471800982952118,
      "learning_rate": 8.912090769266758e-05,
      "loss": 0.7163,
      "step": 1330
    },
    {
      "epoch": 0.27361496556686193,
      "grad_norm": 0.23954612016677856,
      "learning_rate": 8.911894198966391e-05,
      "loss": 0.7477,
      "step": 1331
    },
    {
      "epoch": 0.2738205365402405,
      "grad_norm": 0.33054718375205994,
      "learning_rate": 8.911697411312616e-05,
      "loss": 0.616,
      "step": 1332
    },
    {
      "epoch": 0.27402610751361905,
      "grad_norm": 0.26455309987068176,
      "learning_rate": 8.91150040631513e-05,
      "loss": 0.7477,
      "step": 1333
    },
    {
      "epoch": 0.27423167848699764,
      "grad_norm": 0.15511548519134521,
      "learning_rate": 8.911303183983639e-05,
      "loss": 0.5804,
      "step": 1334
    },
    {
      "epoch": 0.2744372494603762,
      "grad_norm": 0.2723095715045929,
      "learning_rate": 8.911105744327858e-05,
      "loss": 0.7527,
      "step": 1335
    },
    {
      "epoch": 0.27464282043375476,
      "grad_norm": 0.2615657150745392,
      "learning_rate": 8.910908087357515e-05,
      "loss": 0.7228,
      "step": 1336
    },
    {
      "epoch": 0.2748483914071333,
      "grad_norm": 0.2343035191297531,
      "learning_rate": 8.910710213082346e-05,
      "loss": 0.7435,
      "step": 1337
    },
    {
      "epoch": 0.2750539623805119,
      "grad_norm": 0.27343472838401794,
      "learning_rate": 8.910512121512101e-05,
      "loss": 0.7415,
      "step": 1338
    },
    {
      "epoch": 0.2752595333538904,
      "grad_norm": 0.2690789997577667,
      "learning_rate": 8.910313812656539e-05,
      "loss": 0.7301,
      "step": 1339
    },
    {
      "epoch": 0.275465104327269,
      "grad_norm": 0.23863738775253296,
      "learning_rate": 8.910115286525428e-05,
      "loss": 0.7114,
      "step": 1340
    },
    {
      "epoch": 0.27567067530064754,
      "grad_norm": 0.26206308603286743,
      "learning_rate": 8.909916543128551e-05,
      "loss": 0.5967,
      "step": 1341
    },
    {
      "epoch": 0.2758762462740261,
      "grad_norm": 0.27798014879226685,
      "learning_rate": 8.909717582475695e-05,
      "loss": 0.7337,
      "step": 1342
    },
    {
      "epoch": 0.27608181724740466,
      "grad_norm": 0.23681025207042694,
      "learning_rate": 8.909518404576668e-05,
      "loss": 0.7287,
      "step": 1343
    },
    {
      "epoch": 0.27628738822078325,
      "grad_norm": 0.2664317786693573,
      "learning_rate": 8.90931900944128e-05,
      "loss": 0.7151,
      "step": 1344
    },
    {
      "epoch": 0.2764929591941618,
      "grad_norm": 0.2881788909435272,
      "learning_rate": 8.909119397079349e-05,
      "loss": 0.7289,
      "step": 1345
    },
    {
      "epoch": 0.27669853016754037,
      "grad_norm": 0.248192697763443,
      "learning_rate": 8.908919567500718e-05,
      "loss": 0.7233,
      "step": 1346
    },
    {
      "epoch": 0.2769041011409189,
      "grad_norm": 0.2383420318365097,
      "learning_rate": 8.908719520715224e-05,
      "loss": 0.7178,
      "step": 1347
    },
    {
      "epoch": 0.27710967211429743,
      "grad_norm": 0.23679983615875244,
      "learning_rate": 8.908519256732727e-05,
      "loss": 0.717,
      "step": 1348
    },
    {
      "epoch": 0.277315243087676,
      "grad_norm": 0.2335837185382843,
      "learning_rate": 8.908318775563092e-05,
      "loss": 0.7167,
      "step": 1349
    },
    {
      "epoch": 0.27752081406105455,
      "grad_norm": 0.247580885887146,
      "learning_rate": 8.908118077216194e-05,
      "loss": 0.7467,
      "step": 1350
    },
    {
      "epoch": 0.27772638503443314,
      "grad_norm": 0.24042358994483948,
      "learning_rate": 8.907917161701923e-05,
      "loss": 0.7615,
      "step": 1351
    },
    {
      "epoch": 0.2779319560078117,
      "grad_norm": 0.24658474326133728,
      "learning_rate": 8.907716029030174e-05,
      "loss": 0.7096,
      "step": 1352
    },
    {
      "epoch": 0.27813752698119026,
      "grad_norm": 0.24043896794319153,
      "learning_rate": 8.90751467921086e-05,
      "loss": 0.735,
      "step": 1353
    },
    {
      "epoch": 0.2783430979545688,
      "grad_norm": 0.2515980303287506,
      "learning_rate": 8.907313112253898e-05,
      "loss": 0.7167,
      "step": 1354
    },
    {
      "epoch": 0.2785486689279474,
      "grad_norm": 0.23116926848888397,
      "learning_rate": 8.907111328169219e-05,
      "loss": 0.6996,
      "step": 1355
    },
    {
      "epoch": 0.2787542399013259,
      "grad_norm": 0.23852792382240295,
      "learning_rate": 8.906909326966762e-05,
      "loss": 0.7252,
      "step": 1356
    },
    {
      "epoch": 0.2789598108747045,
      "grad_norm": 0.2699477970600128,
      "learning_rate": 8.906707108656481e-05,
      "loss": 0.5933,
      "step": 1357
    },
    {
      "epoch": 0.27916538184808304,
      "grad_norm": 0.171479269862175,
      "learning_rate": 8.906504673248338e-05,
      "loss": 0.583,
      "step": 1358
    },
    {
      "epoch": 0.2793709528214616,
      "grad_norm": 0.1635981947183609,
      "learning_rate": 8.906302020752306e-05,
      "loss": 0.592,
      "step": 1359
    },
    {
      "epoch": 0.27957652379484016,
      "grad_norm": 0.3277224898338318,
      "learning_rate": 8.906099151178368e-05,
      "loss": 0.7403,
      "step": 1360
    },
    {
      "epoch": 0.27978209476821875,
      "grad_norm": 0.27374133467674255,
      "learning_rate": 8.905896064536519e-05,
      "loss": 0.7438,
      "step": 1361
    },
    {
      "epoch": 0.2799876657415973,
      "grad_norm": 0.2909560203552246,
      "learning_rate": 8.905692760836765e-05,
      "loss": 0.5838,
      "step": 1362
    },
    {
      "epoch": 0.28019323671497587,
      "grad_norm": 0.34569621086120605,
      "learning_rate": 8.905489240089119e-05,
      "loss": 0.7456,
      "step": 1363
    },
    {
      "epoch": 0.2803988076883544,
      "grad_norm": 0.32318931818008423,
      "learning_rate": 8.90528550230361e-05,
      "loss": 0.7337,
      "step": 1364
    },
    {
      "epoch": 0.280604378661733,
      "grad_norm": 0.24782495200634003,
      "learning_rate": 8.905081547490276e-05,
      "loss": 0.7135,
      "step": 1365
    },
    {
      "epoch": 0.2808099496351115,
      "grad_norm": 0.25972336530685425,
      "learning_rate": 8.904877375659163e-05,
      "loss": 0.7076,
      "step": 1366
    },
    {
      "epoch": 0.28101552060849005,
      "grad_norm": 0.28636348247528076,
      "learning_rate": 8.904672986820328e-05,
      "loss": 0.7406,
      "step": 1367
    },
    {
      "epoch": 0.28122109158186864,
      "grad_norm": 0.21100643277168274,
      "learning_rate": 8.904468380983843e-05,
      "loss": 0.6081,
      "step": 1368
    },
    {
      "epoch": 0.2814266625552472,
      "grad_norm": 0.2907034456729889,
      "learning_rate": 8.904263558159788e-05,
      "loss": 0.7046,
      "step": 1369
    },
    {
      "epoch": 0.28163223352862576,
      "grad_norm": 0.2622237205505371,
      "learning_rate": 8.904058518358253e-05,
      "loss": 0.7578,
      "step": 1370
    },
    {
      "epoch": 0.2818378045020043,
      "grad_norm": 0.2604566812515259,
      "learning_rate": 8.903853261589339e-05,
      "loss": 0.75,
      "step": 1371
    },
    {
      "epoch": 0.2820433754753829,
      "grad_norm": 0.27299514412879944,
      "learning_rate": 8.90364778786316e-05,
      "loss": 0.7491,
      "step": 1372
    },
    {
      "epoch": 0.2822489464487614,
      "grad_norm": 0.25931867957115173,
      "learning_rate": 8.903442097189835e-05,
      "loss": 0.6978,
      "step": 1373
    },
    {
      "epoch": 0.28245451742214,
      "grad_norm": 0.2450464367866516,
      "learning_rate": 8.9032361895795e-05,
      "loss": 0.7276,
      "step": 1374
    },
    {
      "epoch": 0.28266008839551854,
      "grad_norm": 0.20911885797977448,
      "learning_rate": 8.903030065042298e-05,
      "loss": 0.5984,
      "step": 1375
    },
    {
      "epoch": 0.2828656593688971,
      "grad_norm": 0.2976955473423004,
      "learning_rate": 8.902823723588385e-05,
      "loss": 0.7332,
      "step": 1376
    },
    {
      "epoch": 0.28307123034227566,
      "grad_norm": 0.2745811641216278,
      "learning_rate": 8.902617165227928e-05,
      "loss": 0.7369,
      "step": 1377
    },
    {
      "epoch": 0.28327680131565425,
      "grad_norm": 0.23596425354480743,
      "learning_rate": 8.902410389971099e-05,
      "loss": 0.7253,
      "step": 1378
    },
    {
      "epoch": 0.2834823722890328,
      "grad_norm": 0.25958871841430664,
      "learning_rate": 8.902203397828086e-05,
      "loss": 0.7494,
      "step": 1379
    },
    {
      "epoch": 0.28368794326241137,
      "grad_norm": 0.2587198317050934,
      "learning_rate": 8.901996188809088e-05,
      "loss": 0.7001,
      "step": 1380
    },
    {
      "epoch": 0.2838935142357899,
      "grad_norm": 0.2621273696422577,
      "learning_rate": 8.901788762924313e-05,
      "loss": 0.728,
      "step": 1381
    },
    {
      "epoch": 0.2840990852091685,
      "grad_norm": 0.18734264373779297,
      "learning_rate": 8.901581120183979e-05,
      "loss": 0.6061,
      "step": 1382
    },
    {
      "epoch": 0.284304656182547,
      "grad_norm": 0.16175542771816254,
      "learning_rate": 8.901373260598317e-05,
      "loss": 0.6072,
      "step": 1383
    },
    {
      "epoch": 0.2845102271559256,
      "grad_norm": 0.30578863620758057,
      "learning_rate": 8.901165184177567e-05,
      "loss": 0.7373,
      "step": 1384
    },
    {
      "epoch": 0.28471579812930414,
      "grad_norm": 0.26835259795188904,
      "learning_rate": 8.900956890931979e-05,
      "loss": 0.7249,
      "step": 1385
    },
    {
      "epoch": 0.2849213691026827,
      "grad_norm": 0.221610888838768,
      "learning_rate": 8.900748380871814e-05,
      "loss": 0.5865,
      "step": 1386
    },
    {
      "epoch": 0.28512694007606126,
      "grad_norm": 0.27838990092277527,
      "learning_rate": 8.900539654007346e-05,
      "loss": 0.7224,
      "step": 1387
    },
    {
      "epoch": 0.2853325110494398,
      "grad_norm": 0.24998264014720917,
      "learning_rate": 8.900330710348857e-05,
      "loss": 0.7112,
      "step": 1388
    },
    {
      "epoch": 0.2855380820228184,
      "grad_norm": 0.2573053240776062,
      "learning_rate": 8.900121549906642e-05,
      "loss": 0.7395,
      "step": 1389
    },
    {
      "epoch": 0.2857436529961969,
      "grad_norm": 0.24121756851673126,
      "learning_rate": 8.899912172691004e-05,
      "loss": 0.747,
      "step": 1390
    },
    {
      "epoch": 0.2859492239695755,
      "grad_norm": 0.2541133463382721,
      "learning_rate": 8.899702578712256e-05,
      "loss": 0.7226,
      "step": 1391
    },
    {
      "epoch": 0.28615479494295404,
      "grad_norm": 0.24340660870075226,
      "learning_rate": 8.899492767980729e-05,
      "loss": 0.698,
      "step": 1392
    },
    {
      "epoch": 0.2863603659163326,
      "grad_norm": 0.24495667219161987,
      "learning_rate": 8.899282740506756e-05,
      "loss": 0.7535,
      "step": 1393
    },
    {
      "epoch": 0.28656593688971116,
      "grad_norm": 0.2280047982931137,
      "learning_rate": 8.899072496300684e-05,
      "loss": 0.7219,
      "step": 1394
    },
    {
      "epoch": 0.28677150786308975,
      "grad_norm": 0.23093637824058533,
      "learning_rate": 8.898862035372872e-05,
      "loss": 0.7135,
      "step": 1395
    },
    {
      "epoch": 0.2869770788364683,
      "grad_norm": 0.24832944571971893,
      "learning_rate": 8.898651357733686e-05,
      "loss": 0.7522,
      "step": 1396
    },
    {
      "epoch": 0.28718264980984687,
      "grad_norm": 0.23297333717346191,
      "learning_rate": 8.898440463393508e-05,
      "loss": 0.7546,
      "step": 1397
    },
    {
      "epoch": 0.2873882207832254,
      "grad_norm": 0.21482457220554352,
      "learning_rate": 8.898229352362727e-05,
      "loss": 0.5847,
      "step": 1398
    },
    {
      "epoch": 0.287593791756604,
      "grad_norm": 0.16317768394947052,
      "learning_rate": 8.898018024651742e-05,
      "loss": 0.5954,
      "step": 1399
    },
    {
      "epoch": 0.2877993627299825,
      "grad_norm": 0.3127588629722595,
      "learning_rate": 8.897806480270967e-05,
      "loss": 0.7413,
      "step": 1400
    },
    {
      "epoch": 0.2880049337033611,
      "grad_norm": 0.2599581182003021,
      "learning_rate": 8.897594719230821e-05,
      "loss": 0.7315,
      "step": 1401
    },
    {
      "epoch": 0.28821050467673964,
      "grad_norm": 0.23986676335334778,
      "learning_rate": 8.897382741541737e-05,
      "loss": 0.7528,
      "step": 1402
    },
    {
      "epoch": 0.28841607565011823,
      "grad_norm": 0.2908901870250702,
      "learning_rate": 8.897170547214159e-05,
      "loss": 0.7404,
      "step": 1403
    },
    {
      "epoch": 0.28862164662349676,
      "grad_norm": 0.3151310682296753,
      "learning_rate": 8.896958136258541e-05,
      "loss": 0.6033,
      "step": 1404
    },
    {
      "epoch": 0.2888272175968753,
      "grad_norm": 0.2576965391635895,
      "learning_rate": 8.896745508685346e-05,
      "loss": 0.7326,
      "step": 1405
    },
    {
      "epoch": 0.2890327885702539,
      "grad_norm": 0.2626875340938568,
      "learning_rate": 8.896532664505051e-05,
      "loss": 0.7408,
      "step": 1406
    },
    {
      "epoch": 0.2892383595436324,
      "grad_norm": 0.24406549334526062,
      "learning_rate": 8.896319603728141e-05,
      "loss": 0.7326,
      "step": 1407
    },
    {
      "epoch": 0.289443930517011,
      "grad_norm": 0.24385593831539154,
      "learning_rate": 8.896106326365112e-05,
      "loss": 0.7503,
      "step": 1408
    },
    {
      "epoch": 0.28964950149038954,
      "grad_norm": 0.24427802860736847,
      "learning_rate": 8.89589283242647e-05,
      "loss": 0.7341,
      "step": 1409
    },
    {
      "epoch": 0.2898550724637681,
      "grad_norm": 0.24131245911121368,
      "learning_rate": 8.895679121922738e-05,
      "loss": 0.7313,
      "step": 1410
    },
    {
      "epoch": 0.29006064343714666,
      "grad_norm": 0.24251912534236908,
      "learning_rate": 8.895465194864439e-05,
      "loss": 0.7138,
      "step": 1411
    },
    {
      "epoch": 0.29026621441052525,
      "grad_norm": 0.22263044118881226,
      "learning_rate": 8.895251051262115e-05,
      "loss": 0.6891,
      "step": 1412
    },
    {
      "epoch": 0.2904717853839038,
      "grad_norm": 0.23494918644428253,
      "learning_rate": 8.895036691126314e-05,
      "loss": 0.732,
      "step": 1413
    },
    {
      "epoch": 0.29067735635728237,
      "grad_norm": 0.22686836123466492,
      "learning_rate": 8.894822114467598e-05,
      "loss": 0.7274,
      "step": 1414
    },
    {
      "epoch": 0.2908829273306609,
      "grad_norm": 0.24379804730415344,
      "learning_rate": 8.894607321296538e-05,
      "loss": 0.74,
      "step": 1415
    },
    {
      "epoch": 0.2910884983040395,
      "grad_norm": 0.23114730417728424,
      "learning_rate": 8.894392311623714e-05,
      "loss": 0.7377,
      "step": 1416
    },
    {
      "epoch": 0.291294069277418,
      "grad_norm": 0.23655329644680023,
      "learning_rate": 8.894177085459722e-05,
      "loss": 0.7493,
      "step": 1417
    },
    {
      "epoch": 0.2914996402507966,
      "grad_norm": 0.2256159633398056,
      "learning_rate": 8.893961642815163e-05,
      "loss": 0.6974,
      "step": 1418
    },
    {
      "epoch": 0.29170521122417514,
      "grad_norm": 0.20934060215950012,
      "learning_rate": 8.893745983700652e-05,
      "loss": 0.5891,
      "step": 1419
    },
    {
      "epoch": 0.29191078219755373,
      "grad_norm": 0.1600976139307022,
      "learning_rate": 8.893530108126811e-05,
      "loss": 0.6138,
      "step": 1420
    },
    {
      "epoch": 0.29211635317093226,
      "grad_norm": 0.1524209976196289,
      "learning_rate": 8.893314016104278e-05,
      "loss": 0.5702,
      "step": 1421
    },
    {
      "epoch": 0.2923219241443108,
      "grad_norm": 0.31443774700164795,
      "learning_rate": 8.893097707643697e-05,
      "loss": 0.6969,
      "step": 1422
    },
    {
      "epoch": 0.2925274951176894,
      "grad_norm": 0.2652696669101715,
      "learning_rate": 8.892881182755727e-05,
      "loss": 0.7177,
      "step": 1423
    },
    {
      "epoch": 0.2927330660910679,
      "grad_norm": 0.23116344213485718,
      "learning_rate": 8.892664441451031e-05,
      "loss": 0.6064,
      "step": 1424
    },
    {
      "epoch": 0.2929386370644465,
      "grad_norm": 0.2783909738063812,
      "learning_rate": 8.892447483740291e-05,
      "loss": 0.7301,
      "step": 1425
    },
    {
      "epoch": 0.29314420803782504,
      "grad_norm": 0.2517321705818176,
      "learning_rate": 8.892230309634192e-05,
      "loss": 0.7447,
      "step": 1426
    },
    {
      "epoch": 0.2933497790112036,
      "grad_norm": 0.2492847889661789,
      "learning_rate": 8.892012919143436e-05,
      "loss": 0.7529,
      "step": 1427
    },
    {
      "epoch": 0.29355534998458216,
      "grad_norm": 0.23372922837734222,
      "learning_rate": 8.891795312278732e-05,
      "loss": 0.7302,
      "step": 1428
    },
    {
      "epoch": 0.29376092095796075,
      "grad_norm": 0.260433167219162,
      "learning_rate": 8.8915774890508e-05,
      "loss": 0.7388,
      "step": 1429
    },
    {
      "epoch": 0.2939664919313393,
      "grad_norm": 0.24735549092292786,
      "learning_rate": 8.89135944947037e-05,
      "loss": 0.6851,
      "step": 1430
    },
    {
      "epoch": 0.29417206290471787,
      "grad_norm": 0.24530264735221863,
      "learning_rate": 8.891141193548188e-05,
      "loss": 0.7483,
      "step": 1431
    },
    {
      "epoch": 0.2943776338780964,
      "grad_norm": 0.24232807755470276,
      "learning_rate": 8.890922721295e-05,
      "loss": 0.7272,
      "step": 1432
    },
    {
      "epoch": 0.294583204851475,
      "grad_norm": 0.23810634016990662,
      "learning_rate": 8.890704032721575e-05,
      "loss": 0.6853,
      "step": 1433
    },
    {
      "epoch": 0.2947887758248535,
      "grad_norm": 0.23144571483135223,
      "learning_rate": 8.890485127838684e-05,
      "loss": 0.7317,
      "step": 1434
    },
    {
      "epoch": 0.2949943467982321,
      "grad_norm": 0.23867613077163696,
      "learning_rate": 8.890266006657111e-05,
      "loss": 0.7378,
      "step": 1435
    },
    {
      "epoch": 0.29519991777161064,
      "grad_norm": 0.2355402261018753,
      "learning_rate": 8.890046669187653e-05,
      "loss": 0.7183,
      "step": 1436
    },
    {
      "epoch": 0.29540548874498923,
      "grad_norm": 0.2344846874475479,
      "learning_rate": 8.889827115441114e-05,
      "loss": 0.6113,
      "step": 1437
    },
    {
      "epoch": 0.29561105971836776,
      "grad_norm": 0.25104036927223206,
      "learning_rate": 8.88960734542831e-05,
      "loss": 0.716,
      "step": 1438
    },
    {
      "epoch": 0.29581663069174635,
      "grad_norm": 0.2465832382440567,
      "learning_rate": 8.88938735916007e-05,
      "loss": 0.7588,
      "step": 1439
    },
    {
      "epoch": 0.2960222016651249,
      "grad_norm": 0.24674251675605774,
      "learning_rate": 8.889167156647231e-05,
      "loss": 0.7221,
      "step": 1440
    },
    {
      "epoch": 0.2962277726385034,
      "grad_norm": 0.25955334305763245,
      "learning_rate": 8.888946737900642e-05,
      "loss": 0.742,
      "step": 1441
    },
    {
      "epoch": 0.296433343611882,
      "grad_norm": 0.2384418547153473,
      "learning_rate": 8.888726102931159e-05,
      "loss": 0.7298,
      "step": 1442
    },
    {
      "epoch": 0.29663891458526054,
      "grad_norm": 0.2418283224105835,
      "learning_rate": 8.888505251749655e-05,
      "loss": 0.7149,
      "step": 1443
    },
    {
      "epoch": 0.2968444855586391,
      "grad_norm": 0.2591508626937866,
      "learning_rate": 8.88828418436701e-05,
      "loss": 0.7281,
      "step": 1444
    },
    {
      "epoch": 0.29705005653201766,
      "grad_norm": 0.2347528338432312,
      "learning_rate": 8.888062900794113e-05,
      "loss": 0.741,
      "step": 1445
    },
    {
      "epoch": 0.29725562750539625,
      "grad_norm": 0.22745028138160706,
      "learning_rate": 8.887841401041865e-05,
      "loss": 0.7347,
      "step": 1446
    },
    {
      "epoch": 0.2974611984787748,
      "grad_norm": 0.236216738820076,
      "learning_rate": 8.887619685121183e-05,
      "loss": 0.7229,
      "step": 1447
    },
    {
      "epoch": 0.29766676945215337,
      "grad_norm": 0.22409434616565704,
      "learning_rate": 8.887397753042985e-05,
      "loss": 0.5921,
      "step": 1448
    },
    {
      "epoch": 0.2978723404255319,
      "grad_norm": 0.24046771228313446,
      "learning_rate": 8.887175604818206e-05,
      "loss": 0.6934,
      "step": 1449
    },
    {
      "epoch": 0.2980779113989105,
      "grad_norm": 0.25511425733566284,
      "learning_rate": 8.886953240457791e-05,
      "loss": 0.7177,
      "step": 1450
    },
    {
      "epoch": 0.298283482372289,
      "grad_norm": 0.23517939448356628,
      "learning_rate": 8.886730659972696e-05,
      "loss": 0.744,
      "step": 1451
    },
    {
      "epoch": 0.2984890533456676,
      "grad_norm": 0.23165474832057953,
      "learning_rate": 8.886507863373883e-05,
      "loss": 0.72,
      "step": 1452
    },
    {
      "epoch": 0.29869462431904614,
      "grad_norm": 0.22487609088420868,
      "learning_rate": 8.88628485067233e-05,
      "loss": 0.6993,
      "step": 1453
    },
    {
      "epoch": 0.29890019529242473,
      "grad_norm": 0.2359279990196228,
      "learning_rate": 8.886061621879024e-05,
      "loss": 0.7148,
      "step": 1454
    },
    {
      "epoch": 0.29910576626580326,
      "grad_norm": 0.23191282153129578,
      "learning_rate": 8.885838177004964e-05,
      "loss": 0.73,
      "step": 1455
    },
    {
      "epoch": 0.29931133723918185,
      "grad_norm": 0.2255670130252838,
      "learning_rate": 8.885614516061156e-05,
      "loss": 0.7192,
      "step": 1456
    },
    {
      "epoch": 0.2995169082125604,
      "grad_norm": 0.21794365346431732,
      "learning_rate": 8.885390639058617e-05,
      "loss": 0.7126,
      "step": 1457
    },
    {
      "epoch": 0.299722479185939,
      "grad_norm": 0.22137753665447235,
      "learning_rate": 8.88516654600838e-05,
      "loss": 0.6953,
      "step": 1458
    },
    {
      "epoch": 0.2999280501593175,
      "grad_norm": 0.23347578942775726,
      "learning_rate": 8.884942236921483e-05,
      "loss": 0.7275,
      "step": 1459
    },
    {
      "epoch": 0.30013362113269604,
      "grad_norm": 0.22592391073703766,
      "learning_rate": 8.884717711808976e-05,
      "loss": 0.7011,
      "step": 1460
    },
    {
      "epoch": 0.3003391921060746,
      "grad_norm": 0.2333751916885376,
      "learning_rate": 8.884492970681924e-05,
      "loss": 0.5993,
      "step": 1461
    },
    {
      "epoch": 0.30054476307945316,
      "grad_norm": 0.23949290812015533,
      "learning_rate": 8.884268013551395e-05,
      "loss": 0.7246,
      "step": 1462
    },
    {
      "epoch": 0.30075033405283175,
      "grad_norm": 0.22439618408679962,
      "learning_rate": 8.884042840428473e-05,
      "loss": 0.7257,
      "step": 1463
    },
    {
      "epoch": 0.3009559050262103,
      "grad_norm": 0.2332451343536377,
      "learning_rate": 8.883817451324253e-05,
      "loss": 0.7344,
      "step": 1464
    },
    {
      "epoch": 0.30116147599958887,
      "grad_norm": 0.2470991313457489,
      "learning_rate": 8.883591846249834e-05,
      "loss": 0.7396,
      "step": 1465
    },
    {
      "epoch": 0.3013670469729674,
      "grad_norm": 0.23062336444854736,
      "learning_rate": 8.883366025216336e-05,
      "loss": 0.715,
      "step": 1466
    },
    {
      "epoch": 0.301572617946346,
      "grad_norm": 0.2705153226852417,
      "learning_rate": 8.88313998823488e-05,
      "loss": 0.7202,
      "step": 1467
    },
    {
      "epoch": 0.3017781889197245,
      "grad_norm": 0.2432517409324646,
      "learning_rate": 8.882913735316604e-05,
      "loss": 0.7346,
      "step": 1468
    },
    {
      "epoch": 0.3019837598931031,
      "grad_norm": 0.20731572806835175,
      "learning_rate": 8.882687266472655e-05,
      "loss": 0.6029,
      "step": 1469
    },
    {
      "epoch": 0.30218933086648164,
      "grad_norm": 0.24890613555908203,
      "learning_rate": 8.882460581714188e-05,
      "loss": 0.743,
      "step": 1470
    },
    {
      "epoch": 0.30239490183986023,
      "grad_norm": 0.23934966325759888,
      "learning_rate": 8.882233681052371e-05,
      "loss": 0.7102,
      "step": 1471
    },
    {
      "epoch": 0.30260047281323876,
      "grad_norm": 0.2529708743095398,
      "learning_rate": 8.882006564498385e-05,
      "loss": 0.7366,
      "step": 1472
    },
    {
      "epoch": 0.30280604378661735,
      "grad_norm": 0.22400988638401031,
      "learning_rate": 8.881779232063416e-05,
      "loss": 0.7295,
      "step": 1473
    },
    {
      "epoch": 0.3030116147599959,
      "grad_norm": 0.23044519126415253,
      "learning_rate": 8.881551683758664e-05,
      "loss": 0.7332,
      "step": 1474
    },
    {
      "epoch": 0.3032171857333745,
      "grad_norm": 0.2295847088098526,
      "learning_rate": 8.881323919595341e-05,
      "loss": 0.6939,
      "step": 1475
    },
    {
      "epoch": 0.303422756706753,
      "grad_norm": 0.22964751720428467,
      "learning_rate": 8.881095939584667e-05,
      "loss": 0.7197,
      "step": 1476
    },
    {
      "epoch": 0.30362832768013154,
      "grad_norm": 0.2278130203485489,
      "learning_rate": 8.880867743737873e-05,
      "loss": 0.7366,
      "step": 1477
    },
    {
      "epoch": 0.3038338986535101,
      "grad_norm": 0.17138256132602692,
      "learning_rate": 8.8806393320662e-05,
      "loss": 0.585,
      "step": 1478
    },
    {
      "epoch": 0.30403946962688866,
      "grad_norm": 0.23692992329597473,
      "learning_rate": 8.880410704580904e-05,
      "loss": 0.7368,
      "step": 1479
    },
    {
      "epoch": 0.30424504060026725,
      "grad_norm": 0.23937001824378967,
      "learning_rate": 8.880181861293245e-05,
      "loss": 0.7465,
      "step": 1480
    },
    {
      "epoch": 0.3044506115736458,
      "grad_norm": 0.2425798624753952,
      "learning_rate": 8.879952802214498e-05,
      "loss": 0.7235,
      "step": 1481
    },
    {
      "epoch": 0.30465618254702437,
      "grad_norm": 0.22199256718158722,
      "learning_rate": 8.87972352735595e-05,
      "loss": 0.7266,
      "step": 1482
    },
    {
      "epoch": 0.3048617535204029,
      "grad_norm": 0.22652393579483032,
      "learning_rate": 8.879494036728895e-05,
      "loss": 0.7196,
      "step": 1483
    },
    {
      "epoch": 0.3050673244937815,
      "grad_norm": 0.23339220881462097,
      "learning_rate": 8.879264330344637e-05,
      "loss": 0.6907,
      "step": 1484
    },
    {
      "epoch": 0.30527289546716,
      "grad_norm": 0.17793652415275574,
      "learning_rate": 8.879034408214495e-05,
      "loss": 0.5843,
      "step": 1485
    },
    {
      "epoch": 0.3054784664405386,
      "grad_norm": 0.14778107404708862,
      "learning_rate": 8.878804270349794e-05,
      "loss": 0.5915,
      "step": 1486
    },
    {
      "epoch": 0.30568403741391714,
      "grad_norm": 0.25510430335998535,
      "learning_rate": 8.878573916761875e-05,
      "loss": 0.7359,
      "step": 1487
    },
    {
      "epoch": 0.30588960838729573,
      "grad_norm": 0.245680570602417,
      "learning_rate": 8.878343347462083e-05,
      "loss": 0.7232,
      "step": 1488
    },
    {
      "epoch": 0.30609517936067426,
      "grad_norm": 0.22665980458259583,
      "learning_rate": 8.878112562461781e-05,
      "loss": 0.72,
      "step": 1489
    },
    {
      "epoch": 0.30630075033405285,
      "grad_norm": 0.23110273480415344,
      "learning_rate": 8.877881561772334e-05,
      "loss": 0.7333,
      "step": 1490
    },
    {
      "epoch": 0.3065063213074314,
      "grad_norm": 0.2374107986688614,
      "learning_rate": 8.877650345405124e-05,
      "loss": 0.7047,
      "step": 1491
    },
    {
      "epoch": 0.30671189228081,
      "grad_norm": 0.23222175240516663,
      "learning_rate": 8.877418913371543e-05,
      "loss": 0.7247,
      "step": 1492
    },
    {
      "epoch": 0.3069174632541885,
      "grad_norm": 0.2248169332742691,
      "learning_rate": 8.877187265682993e-05,
      "loss": 0.731,
      "step": 1493
    },
    {
      "epoch": 0.3071230342275671,
      "grad_norm": 0.22877496480941772,
      "learning_rate": 8.876955402350885e-05,
      "loss": 0.7317,
      "step": 1494
    },
    {
      "epoch": 0.3073286052009456,
      "grad_norm": 0.23524411022663116,
      "learning_rate": 8.876723323386642e-05,
      "loss": 0.7243,
      "step": 1495
    },
    {
      "epoch": 0.30753417617432416,
      "grad_norm": 0.23392078280448914,
      "learning_rate": 8.876491028801698e-05,
      "loss": 0.7291,
      "step": 1496
    },
    {
      "epoch": 0.30773974714770275,
      "grad_norm": 0.23218654096126556,
      "learning_rate": 8.876258518607496e-05,
      "loss": 0.7185,
      "step": 1497
    },
    {
      "epoch": 0.3079453181210813,
      "grad_norm": 0.22467701137065887,
      "learning_rate": 8.876025792815493e-05,
      "loss": 0.6027,
      "step": 1498
    },
    {
      "epoch": 0.30815088909445987,
      "grad_norm": 0.16272898018360138,
      "learning_rate": 8.875792851437153e-05,
      "loss": 0.5879,
      "step": 1499
    },
    {
      "epoch": 0.3083564600678384,
      "grad_norm": 0.3116845190525055,
      "learning_rate": 8.875559694483949e-05,
      "loss": 0.7104,
      "step": 1500
    },
    {
      "epoch": 0.308562031041217,
      "grad_norm": 0.27991852164268494,
      "learning_rate": 8.875326321967371e-05,
      "loss": 0.7103,
      "step": 1501
    },
    {
      "epoch": 0.3087676020145955,
      "grad_norm": 0.2318386435508728,
      "learning_rate": 8.875092733898917e-05,
      "loss": 0.7377,
      "step": 1502
    },
    {
      "epoch": 0.3089731729879741,
      "grad_norm": 0.2598876655101776,
      "learning_rate": 8.874858930290091e-05,
      "loss": 0.6015,
      "step": 1503
    },
    {
      "epoch": 0.30917874396135264,
      "grad_norm": 0.29758408665657043,
      "learning_rate": 8.874624911152415e-05,
      "loss": 0.7181,
      "step": 1504
    },
    {
      "epoch": 0.30938431493473123,
      "grad_norm": 0.27736955881118774,
      "learning_rate": 8.874390676497416e-05,
      "loss": 0.7206,
      "step": 1505
    },
    {
      "epoch": 0.30958988590810976,
      "grad_norm": 0.2458835244178772,
      "learning_rate": 8.874156226336634e-05,
      "loss": 0.7499,
      "step": 1506
    },
    {
      "epoch": 0.30979545688148835,
      "grad_norm": 0.22762452065944672,
      "learning_rate": 8.873921560681619e-05,
      "loss": 0.5821,
      "step": 1507
    },
    {
      "epoch": 0.3100010278548669,
      "grad_norm": 0.27454984188079834,
      "learning_rate": 8.873686679543934e-05,
      "loss": 0.7146,
      "step": 1508
    },
    {
      "epoch": 0.3102065988282455,
      "grad_norm": 0.26772287487983704,
      "learning_rate": 8.873451582935148e-05,
      "loss": 0.7536,
      "step": 1509
    },
    {
      "epoch": 0.310412169801624,
      "grad_norm": 0.23362015187740326,
      "learning_rate": 8.873216270866843e-05,
      "loss": 0.6984,
      "step": 1510
    },
    {
      "epoch": 0.3106177407750026,
      "grad_norm": 0.23610959947109222,
      "learning_rate": 8.872980743350613e-05,
      "loss": 0.7171,
      "step": 1511
    },
    {
      "epoch": 0.3108233117483811,
      "grad_norm": 0.25894349813461304,
      "learning_rate": 8.872745000398062e-05,
      "loss": 0.7187,
      "step": 1512
    },
    {
      "epoch": 0.3110288827217597,
      "grad_norm": 0.26054081320762634,
      "learning_rate": 8.872509042020803e-05,
      "loss": 0.7203,
      "step": 1513
    },
    {
      "epoch": 0.31123445369513825,
      "grad_norm": 0.2335205376148224,
      "learning_rate": 8.872272868230461e-05,
      "loss": 0.7009,
      "step": 1514
    },
    {
      "epoch": 0.3114400246685168,
      "grad_norm": 0.24587051570415497,
      "learning_rate": 8.872036479038669e-05,
      "loss": 0.7399,
      "step": 1515
    },
    {
      "epoch": 0.31164559564189537,
      "grad_norm": 0.24924126267433167,
      "learning_rate": 8.871799874457075e-05,
      "loss": 0.7493,
      "step": 1516
    },
    {
      "epoch": 0.3118511666152739,
      "grad_norm": 0.24950510263442993,
      "learning_rate": 8.871563054497335e-05,
      "loss": 0.7178,
      "step": 1517
    },
    {
      "epoch": 0.3120567375886525,
      "grad_norm": 0.25369346141815186,
      "learning_rate": 8.871326019171117e-05,
      "loss": 0.6963,
      "step": 1518
    },
    {
      "epoch": 0.312262308562031,
      "grad_norm": 0.2488810122013092,
      "learning_rate": 8.871088768490098e-05,
      "loss": 0.7619,
      "step": 1519
    },
    {
      "epoch": 0.3124678795354096,
      "grad_norm": 0.24383045732975006,
      "learning_rate": 8.870851302465962e-05,
      "loss": 0.711,
      "step": 1520
    },
    {
      "epoch": 0.31267345050878814,
      "grad_norm": 0.2425009161233902,
      "learning_rate": 8.870613621110415e-05,
      "loss": 0.7177,
      "step": 1521
    },
    {
      "epoch": 0.31287902148216673,
      "grad_norm": 0.240753635764122,
      "learning_rate": 8.870375724435162e-05,
      "loss": 0.7244,
      "step": 1522
    },
    {
      "epoch": 0.31308459245554526,
      "grad_norm": 0.23214225471019745,
      "learning_rate": 8.870137612451926e-05,
      "loss": 0.7576,
      "step": 1523
    },
    {
      "epoch": 0.31329016342892385,
      "grad_norm": 0.2381378412246704,
      "learning_rate": 8.869899285172435e-05,
      "loss": 0.7379,
      "step": 1524
    },
    {
      "epoch": 0.3134957344023024,
      "grad_norm": 0.24119152128696442,
      "learning_rate": 8.869660742608429e-05,
      "loss": 0.5884,
      "step": 1525
    },
    {
      "epoch": 0.31370130537568097,
      "grad_norm": 0.1588635891675949,
      "learning_rate": 8.869421984771664e-05,
      "loss": 0.5977,
      "step": 1526
    },
    {
      "epoch": 0.3139068763490595,
      "grad_norm": 0.30175936222076416,
      "learning_rate": 8.869183011673899e-05,
      "loss": 0.7523,
      "step": 1527
    },
    {
      "epoch": 0.3141124473224381,
      "grad_norm": 0.2720763385295868,
      "learning_rate": 8.868943823326911e-05,
      "loss": 0.7369,
      "step": 1528
    },
    {
      "epoch": 0.3143180182958166,
      "grad_norm": 0.25000452995300293,
      "learning_rate": 8.868704419742477e-05,
      "loss": 0.7248,
      "step": 1529
    },
    {
      "epoch": 0.3145235892691952,
      "grad_norm": 0.24794606864452362,
      "learning_rate": 8.8684648009324e-05,
      "loss": 0.716,
      "step": 1530
    },
    {
      "epoch": 0.31472916024257375,
      "grad_norm": 0.2837069630622864,
      "learning_rate": 8.868224966908477e-05,
      "loss": 0.7167,
      "step": 1531
    },
    {
      "epoch": 0.31493473121595233,
      "grad_norm": 0.2553151845932007,
      "learning_rate": 8.867984917682529e-05,
      "loss": 0.728,
      "step": 1532
    },
    {
      "epoch": 0.31514030218933087,
      "grad_norm": 0.2584458589553833,
      "learning_rate": 8.86774465326638e-05,
      "loss": 0.7546,
      "step": 1533
    },
    {
      "epoch": 0.3153458731627094,
      "grad_norm": 0.3400932252407074,
      "learning_rate": 8.867504173671866e-05,
      "loss": 0.6503,
      "step": 1534
    },
    {
      "epoch": 0.315551444136088,
      "grad_norm": 0.22265098989009857,
      "learning_rate": 8.867263478910834e-05,
      "loss": 0.6126,
      "step": 1535
    },
    {
      "epoch": 0.3157570151094665,
      "grad_norm": 0.3153107464313507,
      "learning_rate": 8.867022568995144e-05,
      "loss": 0.7263,
      "step": 1536
    },
    {
      "epoch": 0.3159625860828451,
      "grad_norm": 0.2766020596027374,
      "learning_rate": 8.866781443936664e-05,
      "loss": 0.7219,
      "step": 1537
    },
    {
      "epoch": 0.31616815705622364,
      "grad_norm": 0.24225422739982605,
      "learning_rate": 8.866540103747273e-05,
      "loss": 0.7171,
      "step": 1538
    },
    {
      "epoch": 0.31637372802960223,
      "grad_norm": 0.25176170468330383,
      "learning_rate": 8.866298548438859e-05,
      "loss": 0.7344,
      "step": 1539
    },
    {
      "epoch": 0.31657929900298076,
      "grad_norm": 0.25651928782463074,
      "learning_rate": 8.866056778023322e-05,
      "loss": 0.7413,
      "step": 1540
    },
    {
      "epoch": 0.31678486997635935,
      "grad_norm": 0.2334342896938324,
      "learning_rate": 8.865814792512578e-05,
      "loss": 0.7253,
      "step": 1541
    },
    {
      "epoch": 0.3169904409497379,
      "grad_norm": 0.2274434119462967,
      "learning_rate": 8.865572591918542e-05,
      "loss": 0.7159,
      "step": 1542
    },
    {
      "epoch": 0.31719601192311647,
      "grad_norm": 0.2403416633605957,
      "learning_rate": 8.86533017625315e-05,
      "loss": 0.7181,
      "step": 1543
    },
    {
      "epoch": 0.317401582896495,
      "grad_norm": 0.4360656142234802,
      "learning_rate": 8.865087545528343e-05,
      "loss": 0.621,
      "step": 1544
    },
    {
      "epoch": 0.3176071538698736,
      "grad_norm": 0.267894983291626,
      "learning_rate": 8.864844699756077e-05,
      "loss": 0.7211,
      "step": 1545
    },
    {
      "epoch": 0.3178127248432521,
      "grad_norm": 0.28000763058662415,
      "learning_rate": 8.864601638948313e-05,
      "loss": 0.7417,
      "step": 1546
    },
    {
      "epoch": 0.3180182958166307,
      "grad_norm": 0.25448542833328247,
      "learning_rate": 8.864358363117026e-05,
      "loss": 0.7456,
      "step": 1547
    },
    {
      "epoch": 0.31822386679000925,
      "grad_norm": 0.22277960181236267,
      "learning_rate": 8.864114872274201e-05,
      "loss": 0.7509,
      "step": 1548
    },
    {
      "epoch": 0.31842943776338783,
      "grad_norm": 0.25154295563697815,
      "learning_rate": 8.863871166431835e-05,
      "loss": 0.7561,
      "step": 1549
    },
    {
      "epoch": 0.31863500873676637,
      "grad_norm": 0.24481630325317383,
      "learning_rate": 8.863627245601933e-05,
      "loss": 0.7205,
      "step": 1550
    },
    {
      "epoch": 0.3188405797101449,
      "grad_norm": 0.2636171877384186,
      "learning_rate": 8.863383109796514e-05,
      "loss": 0.6225,
      "step": 1551
    },
    {
      "epoch": 0.3190461506835235,
      "grad_norm": 0.24895146489143372,
      "learning_rate": 8.863138759027601e-05,
      "loss": 0.713,
      "step": 1552
    },
    {
      "epoch": 0.319251721656902,
      "grad_norm": 0.23717238008975983,
      "learning_rate": 8.862894193307234e-05,
      "loss": 0.7009,
      "step": 1553
    },
    {
      "epoch": 0.3194572926302806,
      "grad_norm": 0.17063067853450775,
      "learning_rate": 8.862649412647463e-05,
      "loss": 0.609,
      "step": 1554
    },
    {
      "epoch": 0.31966286360365914,
      "grad_norm": 0.24430248141288757,
      "learning_rate": 8.862404417060348e-05,
      "loss": 0.7329,
      "step": 1555
    },
    {
      "epoch": 0.31986843457703773,
      "grad_norm": 0.22696368396282196,
      "learning_rate": 8.862159206557955e-05,
      "loss": 0.7189,
      "step": 1556
    },
    {
      "epoch": 0.32007400555041626,
      "grad_norm": 0.23269693553447723,
      "learning_rate": 8.861913781152368e-05,
      "loss": 0.72,
      "step": 1557
    },
    {
      "epoch": 0.32027957652379485,
      "grad_norm": 0.23606634140014648,
      "learning_rate": 8.861668140855677e-05,
      "loss": 0.7273,
      "step": 1558
    },
    {
      "epoch": 0.3204851474971734,
      "grad_norm": 0.2232600301504135,
      "learning_rate": 8.861422285679982e-05,
      "loss": 0.7271,
      "step": 1559
    },
    {
      "epoch": 0.32069071847055197,
      "grad_norm": 0.22926129400730133,
      "learning_rate": 8.861176215637396e-05,
      "loss": 0.7046,
      "step": 1560
    },
    {
      "epoch": 0.3208962894439305,
      "grad_norm": 0.21815744042396545,
      "learning_rate": 8.860929930740043e-05,
      "loss": 0.7145,
      "step": 1561
    },
    {
      "epoch": 0.3211018604173091,
      "grad_norm": 0.2220899611711502,
      "learning_rate": 8.860683431000055e-05,
      "loss": 0.7517,
      "step": 1562
    },
    {
      "epoch": 0.3213074313906876,
      "grad_norm": 0.23148676753044128,
      "learning_rate": 8.860436716429576e-05,
      "loss": 0.7425,
      "step": 1563
    },
    {
      "epoch": 0.3215130023640662,
      "grad_norm": 0.2475571632385254,
      "learning_rate": 8.86018978704076e-05,
      "loss": 0.7373,
      "step": 1564
    },
    {
      "epoch": 0.32171857333744475,
      "grad_norm": 0.22201502323150635,
      "learning_rate": 8.859942642845773e-05,
      "loss": 0.739,
      "step": 1565
    },
    {
      "epoch": 0.32192414431082333,
      "grad_norm": 0.23228532075881958,
      "learning_rate": 8.859695283856791e-05,
      "loss": 0.7181,
      "step": 1566
    },
    {
      "epoch": 0.32212971528420187,
      "grad_norm": 0.22633086144924164,
      "learning_rate": 8.859447710085998e-05,
      "loss": 0.7264,
      "step": 1567
    },
    {
      "epoch": 0.32233528625758046,
      "grad_norm": 0.2289307564496994,
      "learning_rate": 8.859199921545595e-05,
      "loss": 0.6861,
      "step": 1568
    },
    {
      "epoch": 0.322540857230959,
      "grad_norm": 0.2249763160943985,
      "learning_rate": 8.858951918247784e-05,
      "loss": 0.7251,
      "step": 1569
    },
    {
      "epoch": 0.3227464282043375,
      "grad_norm": 0.21789641678333282,
      "learning_rate": 8.858703700204787e-05,
      "loss": 0.5872,
      "step": 1570
    },
    {
      "epoch": 0.3229519991777161,
      "grad_norm": 0.32843679189682007,
      "learning_rate": 8.85845526742883e-05,
      "loss": 0.7297,
      "step": 1571
    },
    {
      "epoch": 0.32315757015109464,
      "grad_norm": 0.2552517354488373,
      "learning_rate": 8.858206619932154e-05,
      "loss": 0.7297,
      "step": 1572
    },
    {
      "epoch": 0.32336314112447323,
      "grad_norm": 0.1595383882522583,
      "learning_rate": 8.857957757727008e-05,
      "loss": 0.5928,
      "step": 1573
    },
    {
      "epoch": 0.32356871209785176,
      "grad_norm": 0.23427622020244598,
      "learning_rate": 8.857708680825654e-05,
      "loss": 0.7416,
      "step": 1574
    },
    {
      "epoch": 0.32377428307123035,
      "grad_norm": 0.2303827553987503,
      "learning_rate": 8.85745938924036e-05,
      "loss": 0.7506,
      "step": 1575
    },
    {
      "epoch": 0.3239798540446089,
      "grad_norm": 0.2222229540348053,
      "learning_rate": 8.857209882983408e-05,
      "loss": 0.7212,
      "step": 1576
    },
    {
      "epoch": 0.32418542501798747,
      "grad_norm": 0.21901166439056396,
      "learning_rate": 8.856960162067091e-05,
      "loss": 0.7307,
      "step": 1577
    },
    {
      "epoch": 0.324390995991366,
      "grad_norm": 1.646615743637085,
      "learning_rate": 8.85671022650371e-05,
      "loss": 0.7284,
      "step": 1578
    },
    {
      "epoch": 0.3245965669647446,
      "grad_norm": 0.22739437222480774,
      "learning_rate": 8.856460076305581e-05,
      "loss": 0.7468,
      "step": 1579
    },
    {
      "epoch": 0.3248021379381231,
      "grad_norm": 0.22001872956752777,
      "learning_rate": 8.856209711485026e-05,
      "loss": 0.6801,
      "step": 1580
    },
    {
      "epoch": 0.3250077089115017,
      "grad_norm": 0.2490796595811844,
      "learning_rate": 8.855959132054379e-05,
      "loss": 0.7225,
      "step": 1581
    },
    {
      "epoch": 0.32521327988488025,
      "grad_norm": 0.23509925603866577,
      "learning_rate": 8.855708338025985e-05,
      "loss": 0.7126,
      "step": 1582
    },
    {
      "epoch": 0.32541885085825883,
      "grad_norm": 0.26781192421913147,
      "learning_rate": 8.8554573294122e-05,
      "loss": 0.7345,
      "step": 1583
    },
    {
      "epoch": 0.32562442183163737,
      "grad_norm": 0.23214460909366608,
      "learning_rate": 8.85520610622539e-05,
      "loss": 0.7287,
      "step": 1584
    },
    {
      "epoch": 0.32582999280501596,
      "grad_norm": 0.24188122153282166,
      "learning_rate": 8.854954668477931e-05,
      "loss": 0.7169,
      "step": 1585
    },
    {
      "epoch": 0.3260355637783945,
      "grad_norm": 0.22148127853870392,
      "learning_rate": 8.85470301618221e-05,
      "loss": 0.7128,
      "step": 1586
    },
    {
      "epoch": 0.3262411347517731,
      "grad_norm": 0.6666994690895081,
      "learning_rate": 8.854451149350625e-05,
      "loss": 0.6192,
      "step": 1587
    },
    {
      "epoch": 0.3264467057251516,
      "grad_norm": 0.24034947156906128,
      "learning_rate": 8.854199067995585e-05,
      "loss": 0.724,
      "step": 1588
    },
    {
      "epoch": 0.32665227669853014,
      "grad_norm": 0.23072193562984467,
      "learning_rate": 8.85394677212951e-05,
      "loss": 0.727,
      "step": 1589
    },
    {
      "epoch": 0.32685784767190873,
      "grad_norm": 0.23429062962532043,
      "learning_rate": 8.853694261764826e-05,
      "loss": 0.7165,
      "step": 1590
    },
    {
      "epoch": 0.32706341864528726,
      "grad_norm": 0.23310211300849915,
      "learning_rate": 8.853441536913976e-05,
      "loss": 0.7284,
      "step": 1591
    },
    {
      "epoch": 0.32726898961866585,
      "grad_norm": 0.2373618483543396,
      "learning_rate": 8.853188597589409e-05,
      "loss": 0.7347,
      "step": 1592
    },
    {
      "epoch": 0.3274745605920444,
      "grad_norm": 0.22494561970233917,
      "learning_rate": 8.852935443803587e-05,
      "loss": 0.73,
      "step": 1593
    },
    {
      "epoch": 0.32768013156542297,
      "grad_norm": 0.22148995101451874,
      "learning_rate": 8.85268207556898e-05,
      "loss": 0.7105,
      "step": 1594
    },
    {
      "epoch": 0.3278857025388015,
      "grad_norm": 0.23605044186115265,
      "learning_rate": 8.852428492898071e-05,
      "loss": 0.7147,
      "step": 1595
    },
    {
      "epoch": 0.3280912735121801,
      "grad_norm": 0.23167657852172852,
      "learning_rate": 8.852174695803355e-05,
      "loss": 0.7129,
      "step": 1596
    },
    {
      "epoch": 0.3282968444855586,
      "grad_norm": 0.2309151291847229,
      "learning_rate": 8.851920684297333e-05,
      "loss": 0.7087,
      "step": 1597
    },
    {
      "epoch": 0.3285024154589372,
      "grad_norm": 0.22455458343029022,
      "learning_rate": 8.85166645839252e-05,
      "loss": 0.7316,
      "step": 1598
    },
    {
      "epoch": 0.32870798643231575,
      "grad_norm": 0.2276565134525299,
      "learning_rate": 8.85141201810144e-05,
      "loss": 0.719,
      "step": 1599
    },
    {
      "epoch": 0.32891355740569433,
      "grad_norm": 0.23086774349212646,
      "learning_rate": 8.851157363436628e-05,
      "loss": 0.7065,
      "step": 1600
    },
    {
      "epoch": 0.32911912837907287,
      "grad_norm": 0.23493504524230957,
      "learning_rate": 8.850902494410631e-05,
      "loss": 0.7245,
      "step": 1601
    },
    {
      "epoch": 0.32932469935245146,
      "grad_norm": 0.24357451498508453,
      "learning_rate": 8.850647411036003e-05,
      "loss": 0.7151,
      "step": 1602
    },
    {
      "epoch": 0.32953027032583,
      "grad_norm": 0.24102084338665009,
      "learning_rate": 8.850392113325312e-05,
      "loss": 0.7389,
      "step": 1603
    },
    {
      "epoch": 0.3297358412992086,
      "grad_norm": 0.2216963768005371,
      "learning_rate": 8.850136601291137e-05,
      "loss": 0.703,
      "step": 1604
    },
    {
      "epoch": 0.3299414122725871,
      "grad_norm": 0.22978007793426514,
      "learning_rate": 8.849880874946062e-05,
      "loss": 0.7402,
      "step": 1605
    },
    {
      "epoch": 0.3301469832459657,
      "grad_norm": 0.23540645837783813,
      "learning_rate": 8.849624934302689e-05,
      "loss": 0.6975,
      "step": 1606
    },
    {
      "epoch": 0.33035255421934423,
      "grad_norm": 1.370906949043274,
      "learning_rate": 8.849368779373625e-05,
      "loss": 0.8282,
      "step": 1607
    },
    {
      "epoch": 0.33055812519272276,
      "grad_norm": 0.2301483154296875,
      "learning_rate": 8.84911241017149e-05,
      "loss": 0.7083,
      "step": 1608
    },
    {
      "epoch": 0.33076369616610135,
      "grad_norm": 0.24278217554092407,
      "learning_rate": 8.848855826708914e-05,
      "loss": 0.724,
      "step": 1609
    },
    {
      "epoch": 0.3309692671394799,
      "grad_norm": 0.25511378049850464,
      "learning_rate": 8.848599028998538e-05,
      "loss": 0.7214,
      "step": 1610
    },
    {
      "epoch": 0.33117483811285847,
      "grad_norm": 0.2384072244167328,
      "learning_rate": 8.848342017053015e-05,
      "loss": 0.7211,
      "step": 1611
    },
    {
      "epoch": 0.331380409086237,
      "grad_norm": 0.31351780891418457,
      "learning_rate": 8.848084790885003e-05,
      "loss": 0.6297,
      "step": 1612
    },
    {
      "epoch": 0.3315859800596156,
      "grad_norm": 0.262350469827652,
      "learning_rate": 8.847827350507177e-05,
      "loss": 0.7176,
      "step": 1613
    },
    {
      "epoch": 0.3317915510329941,
      "grad_norm": 0.2178378701210022,
      "learning_rate": 8.847569695932219e-05,
      "loss": 0.5897,
      "step": 1614
    },
    {
      "epoch": 0.3319971220063727,
      "grad_norm": 0.2447414994239807,
      "learning_rate": 8.847311827172822e-05,
      "loss": 0.7119,
      "step": 1615
    },
    {
      "epoch": 0.33220269297975125,
      "grad_norm": 0.23930813372135162,
      "learning_rate": 8.84705374424169e-05,
      "loss": 0.7297,
      "step": 1616
    },
    {
      "epoch": 0.33240826395312983,
      "grad_norm": 0.18309295177459717,
      "learning_rate": 8.846795447151539e-05,
      "loss": 0.6059,
      "step": 1617
    },
    {
      "epoch": 0.33261383492650837,
      "grad_norm": 0.23922927677631378,
      "learning_rate": 8.846536935915093e-05,
      "loss": 0.709,
      "step": 1618
    },
    {
      "epoch": 0.33281940589988696,
      "grad_norm": 0.24151726067066193,
      "learning_rate": 8.846278210545089e-05,
      "loss": 0.7009,
      "step": 1619
    },
    {
      "epoch": 0.3330249768732655,
      "grad_norm": 0.23320122063159943,
      "learning_rate": 8.846019271054272e-05,
      "loss": 0.702,
      "step": 1620
    },
    {
      "epoch": 0.3332305478466441,
      "grad_norm": 0.24178290367126465,
      "learning_rate": 8.845760117455397e-05,
      "loss": 0.7359,
      "step": 1621
    },
    {
      "epoch": 0.3334361188200226,
      "grad_norm": 0.6629179120063782,
      "learning_rate": 8.845500749761233e-05,
      "loss": 0.7394,
      "step": 1622
    },
    {
      "epoch": 0.3336416897934012,
      "grad_norm": 0.2403455376625061,
      "learning_rate": 8.84524116798456e-05,
      "loss": 0.7285,
      "step": 1623
    },
    {
      "epoch": 0.33384726076677973,
      "grad_norm": 0.19743573665618896,
      "learning_rate": 8.844981372138162e-05,
      "loss": 0.6283,
      "step": 1624
    },
    {
      "epoch": 0.33405283174015826,
      "grad_norm": 0.2429579198360443,
      "learning_rate": 8.844721362234841e-05,
      "loss": 0.7409,
      "step": 1625
    },
    {
      "epoch": 0.33425840271353685,
      "grad_norm": 0.24667932093143463,
      "learning_rate": 8.844461138287406e-05,
      "loss": 0.7242,
      "step": 1626
    },
    {
      "epoch": 0.3344639736869154,
      "grad_norm": 0.2274756133556366,
      "learning_rate": 8.844200700308677e-05,
      "loss": 0.7241,
      "step": 1627
    },
    {
      "epoch": 0.33466954466029397,
      "grad_norm": 0.24319452047348022,
      "learning_rate": 8.843940048311484e-05,
      "loss": 0.7248,
      "step": 1628
    },
    {
      "epoch": 0.3348751156336725,
      "grad_norm": 0.23962891101837158,
      "learning_rate": 8.843679182308668e-05,
      "loss": 0.7236,
      "step": 1629
    },
    {
      "epoch": 0.3350806866070511,
      "grad_norm": 0.23430408537387848,
      "learning_rate": 8.84341810231308e-05,
      "loss": 0.7255,
      "step": 1630
    },
    {
      "epoch": 0.3352862575804296,
      "grad_norm": 0.2336353361606598,
      "learning_rate": 8.843156808337585e-05,
      "loss": 0.7229,
      "step": 1631
    },
    {
      "epoch": 0.3354918285538082,
      "grad_norm": 0.22381432354450226,
      "learning_rate": 8.842895300395054e-05,
      "loss": 0.7248,
      "step": 1632
    },
    {
      "epoch": 0.33569739952718675,
      "grad_norm": 0.2316228300333023,
      "learning_rate": 8.842633578498368e-05,
      "loss": 0.7343,
      "step": 1633
    },
    {
      "epoch": 0.33590297050056533,
      "grad_norm": 0.22491221129894257,
      "learning_rate": 8.842371642660424e-05,
      "loss": 0.718,
      "step": 1634
    },
    {
      "epoch": 0.33610854147394387,
      "grad_norm": 0.2314968854188919,
      "learning_rate": 8.842109492894127e-05,
      "loss": 0.7289,
      "step": 1635
    },
    {
      "epoch": 0.33631411244732246,
      "grad_norm": 0.23885907232761383,
      "learning_rate": 8.841847129212389e-05,
      "loss": 0.7338,
      "step": 1636
    },
    {
      "epoch": 0.336519683420701,
      "grad_norm": 0.22755815088748932,
      "learning_rate": 8.841584551628136e-05,
      "loss": 0.7238,
      "step": 1637
    },
    {
      "epoch": 0.3367252543940796,
      "grad_norm": 0.2223365604877472,
      "learning_rate": 8.841321760154306e-05,
      "loss": 0.729,
      "step": 1638
    },
    {
      "epoch": 0.3369308253674581,
      "grad_norm": 0.23648889362812042,
      "learning_rate": 8.841058754803844e-05,
      "loss": 0.7479,
      "step": 1639
    },
    {
      "epoch": 0.3371363963408367,
      "grad_norm": 0.22464527189731598,
      "learning_rate": 8.840795535589706e-05,
      "loss": 0.7364,
      "step": 1640
    },
    {
      "epoch": 0.33734196731421523,
      "grad_norm": 0.22983680665493011,
      "learning_rate": 8.840532102524861e-05,
      "loss": 0.7288,
      "step": 1641
    },
    {
      "epoch": 0.3375475382875938,
      "grad_norm": 0.22532789409160614,
      "learning_rate": 8.840268455622288e-05,
      "loss": 0.7626,
      "step": 1642
    },
    {
      "epoch": 0.33775310926097235,
      "grad_norm": 0.22486740350723267,
      "learning_rate": 8.840004594894974e-05,
      "loss": 0.7198,
      "step": 1643
    },
    {
      "epoch": 0.3379586802343509,
      "grad_norm": 0.220737487077713,
      "learning_rate": 8.839740520355918e-05,
      "loss": 0.7467,
      "step": 1644
    },
    {
      "epoch": 0.33816425120772947,
      "grad_norm": 0.23781028389930725,
      "learning_rate": 8.839476232018131e-05,
      "loss": 0.7162,
      "step": 1645
    },
    {
      "epoch": 0.338369822181108,
      "grad_norm": 0.22306212782859802,
      "learning_rate": 8.839211729894634e-05,
      "loss": 0.7024,
      "step": 1646
    },
    {
      "epoch": 0.3385753931544866,
      "grad_norm": 0.22637905180454254,
      "learning_rate": 8.838947013998454e-05,
      "loss": 0.7227,
      "step": 1647
    },
    {
      "epoch": 0.3387809641278651,
      "grad_norm": 0.21539071202278137,
      "learning_rate": 8.838682084342637e-05,
      "loss": 0.715,
      "step": 1648
    },
    {
      "epoch": 0.3389865351012437,
      "grad_norm": 0.21236176788806915,
      "learning_rate": 8.838416940940232e-05,
      "loss": 0.6935,
      "step": 1649
    },
    {
      "epoch": 0.33919210607462225,
      "grad_norm": 0.21903282403945923,
      "learning_rate": 8.838151583804302e-05,
      "loss": 0.6875,
      "step": 1650
    },
    {
      "epoch": 0.33939767704800083,
      "grad_norm": 0.22233720123767853,
      "learning_rate": 8.83788601294792e-05,
      "loss": 0.7196,
      "step": 1651
    },
    {
      "epoch": 0.33960324802137937,
      "grad_norm": 0.21296600997447968,
      "learning_rate": 8.837620228384169e-05,
      "loss": 0.7383,
      "step": 1652
    },
    {
      "epoch": 0.33980881899475796,
      "grad_norm": 0.21336333453655243,
      "learning_rate": 8.837354230126144e-05,
      "loss": 0.7222,
      "step": 1653
    },
    {
      "epoch": 0.3400143899681365,
      "grad_norm": 0.22977587580680847,
      "learning_rate": 8.837088018186948e-05,
      "loss": 0.7053,
      "step": 1654
    },
    {
      "epoch": 0.3402199609415151,
      "grad_norm": 0.22435788810253143,
      "learning_rate": 8.836821592579697e-05,
      "loss": 0.6154,
      "step": 1655
    },
    {
      "epoch": 0.3404255319148936,
      "grad_norm": 0.23182466626167297,
      "learning_rate": 8.836554953317518e-05,
      "loss": 0.7294,
      "step": 1656
    },
    {
      "epoch": 0.3406311028882722,
      "grad_norm": 0.2296569049358368,
      "learning_rate": 8.836288100413543e-05,
      "loss": 0.7147,
      "step": 1657
    },
    {
      "epoch": 0.34083667386165073,
      "grad_norm": 0.22955302894115448,
      "learning_rate": 8.836021033880922e-05,
      "loss": 0.7228,
      "step": 1658
    },
    {
      "epoch": 0.3410422448350293,
      "grad_norm": 0.28406065702438354,
      "learning_rate": 8.83575375373281e-05,
      "loss": 0.722,
      "step": 1659
    },
    {
      "epoch": 0.34124781580840785,
      "grad_norm": 0.22933915257453918,
      "learning_rate": 8.835486259982378e-05,
      "loss": 0.7365,
      "step": 1660
    },
    {
      "epoch": 0.34145338678178644,
      "grad_norm": 0.18561038374900818,
      "learning_rate": 8.835218552642801e-05,
      "loss": 0.6073,
      "step": 1661
    },
    {
      "epoch": 0.34165895775516497,
      "grad_norm": 0.22962850332260132,
      "learning_rate": 8.834950631727269e-05,
      "loss": 0.7329,
      "step": 1662
    },
    {
      "epoch": 0.3418645287285435,
      "grad_norm": 0.22192583978176117,
      "learning_rate": 8.83468249724898e-05,
      "loss": 0.6966,
      "step": 1663
    },
    {
      "epoch": 0.3420700997019221,
      "grad_norm": 0.2303367406129837,
      "learning_rate": 8.834414149221145e-05,
      "loss": 0.7083,
      "step": 1664
    },
    {
      "epoch": 0.3422756706753006,
      "grad_norm": 0.21235564351081848,
      "learning_rate": 8.834145587656984e-05,
      "loss": 0.7054,
      "step": 1665
    },
    {
      "epoch": 0.3424812416486792,
      "grad_norm": 0.22414252161979675,
      "learning_rate": 8.833876812569728e-05,
      "loss": 0.7094,
      "step": 1666
    },
    {
      "epoch": 0.34268681262205775,
      "grad_norm": 0.21854104101657867,
      "learning_rate": 8.833607823972617e-05,
      "loss": 0.7009,
      "step": 1667
    },
    {
      "epoch": 0.34289238359543633,
      "grad_norm": 0.21945634484291077,
      "learning_rate": 8.833338621878904e-05,
      "loss": 0.7214,
      "step": 1668
    },
    {
      "epoch": 0.34309795456881487,
      "grad_norm": 0.22008635103702545,
      "learning_rate": 8.833069206301852e-05,
      "loss": 0.7231,
      "step": 1669
    },
    {
      "epoch": 0.34330352554219346,
      "grad_norm": 0.2222408652305603,
      "learning_rate": 8.832799577254734e-05,
      "loss": 0.7249,
      "step": 1670
    },
    {
      "epoch": 0.343509096515572,
      "grad_norm": 0.22058893740177155,
      "learning_rate": 8.83252973475083e-05,
      "loss": 0.7196,
      "step": 1671
    },
    {
      "epoch": 0.3437146674889506,
      "grad_norm": 0.2201676368713379,
      "learning_rate": 8.832259678803437e-05,
      "loss": 0.7226,
      "step": 1672
    },
    {
      "epoch": 0.3439202384623291,
      "grad_norm": 0.21815598011016846,
      "learning_rate": 8.831989409425857e-05,
      "loss": 0.6943,
      "step": 1673
    },
    {
      "epoch": 0.3441258094357077,
      "grad_norm": 0.22216841578483582,
      "learning_rate": 8.831718926631409e-05,
      "loss": 0.7259,
      "step": 1674
    },
    {
      "epoch": 0.34433138040908623,
      "grad_norm": 0.21504633128643036,
      "learning_rate": 8.831448230433415e-05,
      "loss": 0.7286,
      "step": 1675
    },
    {
      "epoch": 0.3445369513824648,
      "grad_norm": 0.20685335993766785,
      "learning_rate": 8.83117732084521e-05,
      "loss": 0.6891,
      "step": 1676
    },
    {
      "epoch": 0.34474252235584335,
      "grad_norm": 0.1763618290424347,
      "learning_rate": 8.830906197880146e-05,
      "loss": 0.6218,
      "step": 1677
    },
    {
      "epoch": 0.34494809332922194,
      "grad_norm": 0.24009843170642853,
      "learning_rate": 8.830634861551573e-05,
      "loss": 0.7337,
      "step": 1678
    },
    {
      "epoch": 0.34515366430260047,
      "grad_norm": 0.21924906969070435,
      "learning_rate": 8.830363311872862e-05,
      "loss": 0.7194,
      "step": 1679
    },
    {
      "epoch": 0.345359235275979,
      "grad_norm": 0.22524218261241913,
      "learning_rate": 8.830091548857392e-05,
      "loss": 0.728,
      "step": 1680
    },
    {
      "epoch": 0.3455648062493576,
      "grad_norm": 0.15049724280834198,
      "learning_rate": 8.829819572518549e-05,
      "loss": 0.5879,
      "step": 1681
    },
    {
      "epoch": 0.3457703772227361,
      "grad_norm": 0.23018436133861542,
      "learning_rate": 8.829547382869734e-05,
      "loss": 0.7318,
      "step": 1682
    },
    {
      "epoch": 0.3459759481961147,
      "grad_norm": 0.14980974793434143,
      "learning_rate": 8.829274979924355e-05,
      "loss": 0.6082,
      "step": 1683
    },
    {
      "epoch": 0.34618151916949325,
      "grad_norm": 0.23299898207187653,
      "learning_rate": 8.829002363695834e-05,
      "loss": 0.6979,
      "step": 1684
    },
    {
      "epoch": 0.34638709014287183,
      "grad_norm": 0.22874654829502106,
      "learning_rate": 8.828729534197599e-05,
      "loss": 0.7117,
      "step": 1685
    },
    {
      "epoch": 0.34659266111625037,
      "grad_norm": 0.14617690443992615,
      "learning_rate": 8.828456491443093e-05,
      "loss": 0.5823,
      "step": 1686
    },
    {
      "epoch": 0.34679823208962895,
      "grad_norm": 0.14507731795310974,
      "learning_rate": 8.828183235445767e-05,
      "loss": 0.6002,
      "step": 1687
    },
    {
      "epoch": 0.3470038030630075,
      "grad_norm": 0.15053583681583405,
      "learning_rate": 8.827909766219082e-05,
      "loss": 0.6047,
      "step": 1688
    },
    {
      "epoch": 0.3472093740363861,
      "grad_norm": 0.1374531388282776,
      "learning_rate": 8.827636083776512e-05,
      "loss": 0.6148,
      "step": 1689
    },
    {
      "epoch": 0.3474149450097646,
      "grad_norm": 0.2662424147129059,
      "learning_rate": 8.827362188131539e-05,
      "loss": 0.7147,
      "step": 1690
    },
    {
      "epoch": 0.3476205159831432,
      "grad_norm": 0.24824592471122742,
      "learning_rate": 8.827088079297658e-05,
      "loss": 0.749,
      "step": 1691
    },
    {
      "epoch": 0.34782608695652173,
      "grad_norm": 0.17181143164634705,
      "learning_rate": 8.826813757288371e-05,
      "loss": 0.605,
      "step": 1692
    },
    {
      "epoch": 0.3480316579299003,
      "grad_norm": 0.2484540492296219,
      "learning_rate": 8.826539222117195e-05,
      "loss": 0.7012,
      "step": 1693
    },
    {
      "epoch": 0.34823722890327885,
      "grad_norm": 0.17473895847797394,
      "learning_rate": 8.826264473797651e-05,
      "loss": 0.5969,
      "step": 1694
    },
    {
      "epoch": 0.34844279987665744,
      "grad_norm": 0.14865082502365112,
      "learning_rate": 8.825989512343281e-05,
      "loss": 0.6109,
      "step": 1695
    },
    {
      "epoch": 0.34864837085003597,
      "grad_norm": 0.26978155970573425,
      "learning_rate": 8.825714337767625e-05,
      "loss": 0.7122,
      "step": 1696
    },
    {
      "epoch": 0.34885394182341456,
      "grad_norm": 0.15846404433250427,
      "learning_rate": 8.825438950084241e-05,
      "loss": 0.5924,
      "step": 1697
    },
    {
      "epoch": 0.3490595127967931,
      "grad_norm": 0.23453454673290253,
      "learning_rate": 8.8251633493067e-05,
      "loss": 0.7328,
      "step": 1698
    },
    {
      "epoch": 0.3492650837701716,
      "grad_norm": 0.22266656160354614,
      "learning_rate": 8.824887535448574e-05,
      "loss": 0.7041,
      "step": 1699
    },
    {
      "epoch": 0.3494706547435502,
      "grad_norm": 0.2392280548810959,
      "learning_rate": 8.824611508523455e-05,
      "loss": 0.7133,
      "step": 1700
    },
    {
      "epoch": 0.34967622571692875,
      "grad_norm": 0.22809362411499023,
      "learning_rate": 8.82433526854494e-05,
      "loss": 0.7258,
      "step": 1701
    },
    {
      "epoch": 0.34988179669030733,
      "grad_norm": 0.2222517728805542,
      "learning_rate": 8.824058815526637e-05,
      "loss": 0.7114,
      "step": 1702
    },
    {
      "epoch": 0.35008736766368587,
      "grad_norm": 0.23900644481182098,
      "learning_rate": 8.823782149482169e-05,
      "loss": 0.7146,
      "step": 1703
    },
    {
      "epoch": 0.35029293863706445,
      "grad_norm": 0.2216804325580597,
      "learning_rate": 8.823505270425162e-05,
      "loss": 0.712,
      "step": 1704
    },
    {
      "epoch": 0.350498509610443,
      "grad_norm": 0.22626622021198273,
      "learning_rate": 8.823228178369259e-05,
      "loss": 0.7145,
      "step": 1705
    },
    {
      "epoch": 0.3507040805838216,
      "grad_norm": 0.23051661252975464,
      "learning_rate": 8.82295087332811e-05,
      "loss": 0.7246,
      "step": 1706
    },
    {
      "epoch": 0.3509096515572001,
      "grad_norm": 0.19165797531604767,
      "learning_rate": 8.822673355315376e-05,
      "loss": 0.6022,
      "step": 1707
    },
    {
      "epoch": 0.3511152225305787,
      "grad_norm": 0.15455321967601776,
      "learning_rate": 8.822395624344733e-05,
      "loss": 0.5952,
      "step": 1708
    },
    {
      "epoch": 0.35132079350395723,
      "grad_norm": 0.25851893424987793,
      "learning_rate": 8.822117680429856e-05,
      "loss": 0.7155,
      "step": 1709
    },
    {
      "epoch": 0.3515263644773358,
      "grad_norm": 0.14911410212516785,
      "learning_rate": 8.821839523584446e-05,
      "loss": 0.6002,
      "step": 1710
    },
    {
      "epoch": 0.35173193545071435,
      "grad_norm": 0.2250581830739975,
      "learning_rate": 8.821561153822202e-05,
      "loss": 0.694,
      "step": 1711
    },
    {
      "epoch": 0.35193750642409294,
      "grad_norm": 0.17733228206634521,
      "learning_rate": 8.821282571156838e-05,
      "loss": 0.5743,
      "step": 1712
    },
    {
      "epoch": 0.35214307739747147,
      "grad_norm": 0.23851247131824493,
      "learning_rate": 8.82100377560208e-05,
      "loss": 0.7278,
      "step": 1713
    },
    {
      "epoch": 0.35234864837085006,
      "grad_norm": 0.23099485039710999,
      "learning_rate": 8.820724767171662e-05,
      "loss": 0.7387,
      "step": 1714
    },
    {
      "epoch": 0.3525542193442286,
      "grad_norm": 0.22473661601543427,
      "learning_rate": 8.82044554587933e-05,
      "loss": 0.7185,
      "step": 1715
    },
    {
      "epoch": 0.3527597903176072,
      "grad_norm": 0.22726485133171082,
      "learning_rate": 8.820166111738839e-05,
      "loss": 0.7141,
      "step": 1716
    },
    {
      "epoch": 0.3529653612909857,
      "grad_norm": 0.2528528869152069,
      "learning_rate": 8.819886464763958e-05,
      "loss": 0.725,
      "step": 1717
    },
    {
      "epoch": 0.35317093226436425,
      "grad_norm": 0.1892632395029068,
      "learning_rate": 8.81960660496846e-05,
      "loss": 0.5938,
      "step": 1718
    },
    {
      "epoch": 0.35337650323774283,
      "grad_norm": 0.22239932417869568,
      "learning_rate": 8.819326532366134e-05,
      "loss": 0.7044,
      "step": 1719
    },
    {
      "epoch": 0.35358207421112137,
      "grad_norm": 0.22476689517498016,
      "learning_rate": 8.81904624697078e-05,
      "loss": 0.7243,
      "step": 1720
    },
    {
      "epoch": 0.35378764518449995,
      "grad_norm": 0.2231576144695282,
      "learning_rate": 8.818765748796204e-05,
      "loss": 0.7159,
      "step": 1721
    },
    {
      "epoch": 0.3539932161578785,
      "grad_norm": 0.21081259846687317,
      "learning_rate": 8.818485037856224e-05,
      "loss": 0.7144,
      "step": 1722
    },
    {
      "epoch": 0.3541987871312571,
      "grad_norm": 0.22331789135932922,
      "learning_rate": 8.818204114164673e-05,
      "loss": 0.7398,
      "step": 1723
    },
    {
      "epoch": 0.3544043581046356,
      "grad_norm": 0.1838466078042984,
      "learning_rate": 8.817922977735387e-05,
      "loss": 0.6238,
      "step": 1724
    },
    {
      "epoch": 0.3546099290780142,
      "grad_norm": 0.2340015321969986,
      "learning_rate": 8.81764162858222e-05,
      "loss": 0.7226,
      "step": 1725
    },
    {
      "epoch": 0.35481550005139273,
      "grad_norm": 0.14466704428195953,
      "learning_rate": 8.817360066719027e-05,
      "loss": 0.5699,
      "step": 1726
    },
    {
      "epoch": 0.3550210710247713,
      "grad_norm": 0.23499037325382233,
      "learning_rate": 8.817078292159686e-05,
      "loss": 0.71,
      "step": 1727
    },
    {
      "epoch": 0.35522664199814985,
      "grad_norm": 0.24169334769248962,
      "learning_rate": 8.816796304918072e-05,
      "loss": 0.7195,
      "step": 1728
    },
    {
      "epoch": 0.35543221297152844,
      "grad_norm": 0.16424809396266937,
      "learning_rate": 8.816514105008086e-05,
      "loss": 0.5792,
      "step": 1729
    },
    {
      "epoch": 0.35563778394490697,
      "grad_norm": 0.2632940113544464,
      "learning_rate": 8.816231692443621e-05,
      "loss": 0.7313,
      "step": 1730
    },
    {
      "epoch": 0.35584335491828556,
      "grad_norm": 0.23430821299552917,
      "learning_rate": 8.815949067238596e-05,
      "loss": 0.7073,
      "step": 1731
    },
    {
      "epoch": 0.3560489258916641,
      "grad_norm": 0.22487561404705048,
      "learning_rate": 8.815666229406932e-05,
      "loss": 0.7182,
      "step": 1732
    },
    {
      "epoch": 0.3562544968650427,
      "grad_norm": 0.24197392165660858,
      "learning_rate": 8.815383178962566e-05,
      "loss": 0.7196,
      "step": 1733
    },
    {
      "epoch": 0.3564600678384212,
      "grad_norm": 0.22599098086357117,
      "learning_rate": 8.81509991591944e-05,
      "loss": 0.7165,
      "step": 1734
    },
    {
      "epoch": 0.3566656388117998,
      "grad_norm": 0.22369571030139923,
      "learning_rate": 8.814816440291509e-05,
      "loss": 0.7385,
      "step": 1735
    },
    {
      "epoch": 0.35687120978517833,
      "grad_norm": 0.23025518655776978,
      "learning_rate": 8.81453275209274e-05,
      "loss": 0.7184,
      "step": 1736
    },
    {
      "epoch": 0.35707678075855687,
      "grad_norm": 0.22964996099472046,
      "learning_rate": 8.81424885133711e-05,
      "loss": 0.7192,
      "step": 1737
    },
    {
      "epoch": 0.35728235173193545,
      "grad_norm": 0.19159770011901855,
      "learning_rate": 8.813964738038602e-05,
      "loss": 0.6025,
      "step": 1738
    },
    {
      "epoch": 0.357487922705314,
      "grad_norm": 0.2504747211933136,
      "learning_rate": 8.813680412211216e-05,
      "loss": 0.6964,
      "step": 1739
    },
    {
      "epoch": 0.3576934936786926,
      "grad_norm": 0.23766383528709412,
      "learning_rate": 8.813395873868956e-05,
      "loss": 0.7021,
      "step": 1740
    },
    {
      "epoch": 0.3578990646520711,
      "grad_norm": 0.2447771579027176,
      "learning_rate": 8.813111123025844e-05,
      "loss": 0.7185,
      "step": 1741
    },
    {
      "epoch": 0.3581046356254497,
      "grad_norm": 0.23200775682926178,
      "learning_rate": 8.812826159695907e-05,
      "loss": 0.7188,
      "step": 1742
    },
    {
      "epoch": 0.35831020659882823,
      "grad_norm": 0.22907336056232452,
      "learning_rate": 8.812540983893181e-05,
      "loss": 0.6909,
      "step": 1743
    },
    {
      "epoch": 0.3585157775722068,
      "grad_norm": 0.22600993514060974,
      "learning_rate": 8.812255595631719e-05,
      "loss": 0.7074,
      "step": 1744
    },
    {
      "epoch": 0.35872134854558535,
      "grad_norm": 0.2269076704978943,
      "learning_rate": 8.811969994925578e-05,
      "loss": 0.6814,
      "step": 1745
    },
    {
      "epoch": 0.35892691951896394,
      "grad_norm": 0.21256834268569946,
      "learning_rate": 8.811684181788831e-05,
      "loss": 0.7353,
      "step": 1746
    },
    {
      "epoch": 0.35913249049234247,
      "grad_norm": 0.22337260842323303,
      "learning_rate": 8.811398156235557e-05,
      "loss": 0.7398,
      "step": 1747
    },
    {
      "epoch": 0.35933806146572106,
      "grad_norm": 0.2335451990365982,
      "learning_rate": 8.811111918279847e-05,
      "loss": 0.7205,
      "step": 1748
    },
    {
      "epoch": 0.3595436324390996,
      "grad_norm": 0.21998728811740875,
      "learning_rate": 8.810825467935802e-05,
      "loss": 0.6947,
      "step": 1749
    },
    {
      "epoch": 0.3597492034124782,
      "grad_norm": 0.272847443819046,
      "learning_rate": 8.810538805217535e-05,
      "loss": 0.6987,
      "step": 1750
    },
    {
      "epoch": 0.3599547743858567,
      "grad_norm": 0.22549496591091156,
      "learning_rate": 8.810251930139169e-05,
      "loss": 0.7159,
      "step": 1751
    },
    {
      "epoch": 0.3601603453592353,
      "grad_norm": 0.21950645744800568,
      "learning_rate": 8.809964842714837e-05,
      "loss": 0.7493,
      "step": 1752
    },
    {
      "epoch": 0.36036591633261383,
      "grad_norm": 0.21935752034187317,
      "learning_rate": 8.809677542958681e-05,
      "loss": 0.6923,
      "step": 1753
    },
    {
      "epoch": 0.36057148730599237,
      "grad_norm": 0.2425873726606369,
      "learning_rate": 8.809390030884856e-05,
      "loss": 0.7055,
      "step": 1754
    },
    {
      "epoch": 0.36077705827937095,
      "grad_norm": 0.21217839419841766,
      "learning_rate": 8.809102306507527e-05,
      "loss": 0.7261,
      "step": 1755
    },
    {
      "epoch": 0.3609826292527495,
      "grad_norm": 0.22305883467197418,
      "learning_rate": 8.808814369840867e-05,
      "loss": 0.6804,
      "step": 1756
    },
    {
      "epoch": 0.3611882002261281,
      "grad_norm": 0.23050794005393982,
      "learning_rate": 8.808526220899063e-05,
      "loss": 0.7209,
      "step": 1757
    },
    {
      "epoch": 0.3613937711995066,
      "grad_norm": 0.21624812483787537,
      "learning_rate": 8.80823785969631e-05,
      "loss": 0.733,
      "step": 1758
    },
    {
      "epoch": 0.3615993421728852,
      "grad_norm": 0.2256494164466858,
      "learning_rate": 8.807949286246814e-05,
      "loss": 0.7133,
      "step": 1759
    },
    {
      "epoch": 0.36180491314626373,
      "grad_norm": 0.2232973873615265,
      "learning_rate": 8.807660500564793e-05,
      "loss": 0.7099,
      "step": 1760
    },
    {
      "epoch": 0.3620104841196423,
      "grad_norm": 0.21484389901161194,
      "learning_rate": 8.807371502664473e-05,
      "loss": 0.7089,
      "step": 1761
    },
    {
      "epoch": 0.36221605509302085,
      "grad_norm": 0.22121310234069824,
      "learning_rate": 8.807082292560089e-05,
      "loss": 0.7098,
      "step": 1762
    },
    {
      "epoch": 0.36242162606639944,
      "grad_norm": 0.22262440621852875,
      "learning_rate": 8.806792870265895e-05,
      "loss": 0.7494,
      "step": 1763
    },
    {
      "epoch": 0.36262719703977797,
      "grad_norm": 0.22367548942565918,
      "learning_rate": 8.806503235796145e-05,
      "loss": 0.7334,
      "step": 1764
    },
    {
      "epoch": 0.36283276801315656,
      "grad_norm": 0.22336241602897644,
      "learning_rate": 8.806213389165109e-05,
      "loss": 0.7028,
      "step": 1765
    },
    {
      "epoch": 0.3630383389865351,
      "grad_norm": 0.21695300936698914,
      "learning_rate": 8.805923330387067e-05,
      "loss": 0.7131,
      "step": 1766
    },
    {
      "epoch": 0.3632439099599137,
      "grad_norm": 0.2211865484714508,
      "learning_rate": 8.805633059476307e-05,
      "loss": 0.7493,
      "step": 1767
    },
    {
      "epoch": 0.3634494809332922,
      "grad_norm": 0.2145841121673584,
      "learning_rate": 8.80534257644713e-05,
      "loss": 0.5885,
      "step": 1768
    },
    {
      "epoch": 0.3636550519066708,
      "grad_norm": 0.23112855851650238,
      "learning_rate": 8.805051881313849e-05,
      "loss": 0.6836,
      "step": 1769
    },
    {
      "epoch": 0.36386062288004933,
      "grad_norm": 0.226564422249794,
      "learning_rate": 8.804760974090785e-05,
      "loss": 0.7297,
      "step": 1770
    },
    {
      "epoch": 0.3640661938534279,
      "grad_norm": 0.15169551968574524,
      "learning_rate": 8.804469854792266e-05,
      "loss": 0.6113,
      "step": 1771
    },
    {
      "epoch": 0.36427176482680645,
      "grad_norm": 0.23821888864040375,
      "learning_rate": 8.804178523432637e-05,
      "loss": 0.7175,
      "step": 1772
    },
    {
      "epoch": 0.364477335800185,
      "grad_norm": 0.23416121304035187,
      "learning_rate": 8.80388698002625e-05,
      "loss": 0.7214,
      "step": 1773
    },
    {
      "epoch": 0.3646829067735636,
      "grad_norm": 0.1642165631055832,
      "learning_rate": 8.803595224587467e-05,
      "loss": 0.5792,
      "step": 1774
    },
    {
      "epoch": 0.3648884777469421,
      "grad_norm": 0.2228156477212906,
      "learning_rate": 8.803303257130662e-05,
      "loss": 0.7051,
      "step": 1775
    },
    {
      "epoch": 0.3650940487203207,
      "grad_norm": 0.2340465635061264,
      "learning_rate": 8.80301107767022e-05,
      "loss": 0.7373,
      "step": 1776
    },
    {
      "epoch": 0.36529961969369923,
      "grad_norm": 0.2198680192232132,
      "learning_rate": 8.802718686220535e-05,
      "loss": 0.71,
      "step": 1777
    },
    {
      "epoch": 0.3655051906670778,
      "grad_norm": 0.2116042524576187,
      "learning_rate": 8.80242608279601e-05,
      "loss": 0.7465,
      "step": 1778
    },
    {
      "epoch": 0.36571076164045635,
      "grad_norm": 0.22121259570121765,
      "learning_rate": 8.802133267411062e-05,
      "loss": 0.7352,
      "step": 1779
    },
    {
      "epoch": 0.36591633261383494,
      "grad_norm": 0.23157210648059845,
      "learning_rate": 8.801840240080117e-05,
      "loss": 0.6896,
      "step": 1780
    },
    {
      "epoch": 0.36612190358721347,
      "grad_norm": 0.22456520795822144,
      "learning_rate": 8.801547000817609e-05,
      "loss": 0.7449,
      "step": 1781
    },
    {
      "epoch": 0.36632747456059206,
      "grad_norm": 0.15871234238147736,
      "learning_rate": 8.801253549637985e-05,
      "loss": 0.5766,
      "step": 1782
    },
    {
      "epoch": 0.3665330455339706,
      "grad_norm": 0.23135414719581604,
      "learning_rate": 8.800959886555704e-05,
      "loss": 0.7021,
      "step": 1783
    },
    {
      "epoch": 0.3667386165073492,
      "grad_norm": 0.1448424756526947,
      "learning_rate": 8.80066601158523e-05,
      "loss": 0.6072,
      "step": 1784
    },
    {
      "epoch": 0.3669441874807277,
      "grad_norm": 0.14697474241256714,
      "learning_rate": 8.800371924741044e-05,
      "loss": 0.6064,
      "step": 1785
    },
    {
      "epoch": 0.3671497584541063,
      "grad_norm": 0.22950981557369232,
      "learning_rate": 8.800077626037634e-05,
      "loss": 0.7119,
      "step": 1786
    },
    {
      "epoch": 0.36735532942748483,
      "grad_norm": 0.21077360212802887,
      "learning_rate": 8.799783115489497e-05,
      "loss": 0.7119,
      "step": 1787
    },
    {
      "epoch": 0.3675609004008634,
      "grad_norm": 0.21831658482551575,
      "learning_rate": 8.799488393111144e-05,
      "loss": 0.6915,
      "step": 1788
    },
    {
      "epoch": 0.36776647137424195,
      "grad_norm": 0.2097778469324112,
      "learning_rate": 8.799193458917092e-05,
      "loss": 0.7103,
      "step": 1789
    },
    {
      "epoch": 0.36797204234762054,
      "grad_norm": 0.21712899208068848,
      "learning_rate": 8.798898312921874e-05,
      "loss": 0.7155,
      "step": 1790
    },
    {
      "epoch": 0.3681776133209991,
      "grad_norm": 0.21277742087841034,
      "learning_rate": 8.798602955140029e-05,
      "loss": 0.7349,
      "step": 1791
    },
    {
      "epoch": 0.3683831842943776,
      "grad_norm": 0.2360071986913681,
      "learning_rate": 8.798307385586107e-05,
      "loss": 0.7345,
      "step": 1792
    },
    {
      "epoch": 0.3685887552677562,
      "grad_norm": 0.200873002409935,
      "learning_rate": 8.798011604274671e-05,
      "loss": 0.5943,
      "step": 1793
    },
    {
      "epoch": 0.36879432624113473,
      "grad_norm": 0.23026502132415771,
      "learning_rate": 8.797715611220293e-05,
      "loss": 0.7188,
      "step": 1794
    },
    {
      "epoch": 0.3689998972145133,
      "grad_norm": 0.22256635129451752,
      "learning_rate": 8.797419406437553e-05,
      "loss": 0.7152,
      "step": 1795
    },
    {
      "epoch": 0.36920546818789185,
      "grad_norm": 0.21542035043239594,
      "learning_rate": 8.797122989941045e-05,
      "loss": 0.7055,
      "step": 1796
    },
    {
      "epoch": 0.36941103916127044,
      "grad_norm": 0.22514380514621735,
      "learning_rate": 8.796826361745374e-05,
      "loss": 0.7028,
      "step": 1797
    },
    {
      "epoch": 0.36961661013464897,
      "grad_norm": 0.2098117172718048,
      "learning_rate": 8.796529521865149e-05,
      "loss": 0.7223,
      "step": 1798
    },
    {
      "epoch": 0.36982218110802756,
      "grad_norm": 0.2132442593574524,
      "learning_rate": 8.796232470314997e-05,
      "loss": 0.6883,
      "step": 1799
    },
    {
      "epoch": 0.3700277520814061,
      "grad_norm": 0.17681948840618134,
      "learning_rate": 8.795935207109552e-05,
      "loss": 0.5999,
      "step": 1800
    },
    {
      "epoch": 0.3702333230547847,
      "grad_norm": 0.23800528049468994,
      "learning_rate": 8.795637732263459e-05,
      "loss": 0.7058,
      "step": 1801
    },
    {
      "epoch": 0.3704388940281632,
      "grad_norm": 0.2394934594631195,
      "learning_rate": 8.795340045791371e-05,
      "loss": 0.7371,
      "step": 1802
    },
    {
      "epoch": 0.3706444650015418,
      "grad_norm": 0.21029235422611237,
      "learning_rate": 8.795042147707957e-05,
      "loss": 0.6879,
      "step": 1803
    },
    {
      "epoch": 0.37085003597492033,
      "grad_norm": 0.2209658920764923,
      "learning_rate": 8.79474403802789e-05,
      "loss": 0.7145,
      "step": 1804
    },
    {
      "epoch": 0.3710556069482989,
      "grad_norm": 0.2265157699584961,
      "learning_rate": 8.79444571676586e-05,
      "loss": 0.7329,
      "step": 1805
    },
    {
      "epoch": 0.37126117792167745,
      "grad_norm": 0.17411258816719055,
      "learning_rate": 8.79414718393656e-05,
      "loss": 0.5861,
      "step": 1806
    },
    {
      "epoch": 0.37146674889505604,
      "grad_norm": 0.235770583152771,
      "learning_rate": 8.793848439554699e-05,
      "loss": 0.7168,
      "step": 1807
    },
    {
      "epoch": 0.3716723198684346,
      "grad_norm": 0.24390238523483276,
      "learning_rate": 8.793549483634995e-05,
      "loss": 0.7242,
      "step": 1808
    },
    {
      "epoch": 0.37187789084181316,
      "grad_norm": 0.22740136086940765,
      "learning_rate": 8.793250316192175e-05,
      "loss": 0.7064,
      "step": 1809
    },
    {
      "epoch": 0.3720834618151917,
      "grad_norm": 0.1567818820476532,
      "learning_rate": 8.79295093724098e-05,
      "loss": 0.6035,
      "step": 1810
    },
    {
      "epoch": 0.37228903278857023,
      "grad_norm": 0.23284457623958588,
      "learning_rate": 8.792651346796157e-05,
      "loss": 0.7145,
      "step": 1811
    },
    {
      "epoch": 0.3724946037619488,
      "grad_norm": 0.21928593516349792,
      "learning_rate": 8.792351544872467e-05,
      "loss": 0.7015,
      "step": 1812
    },
    {
      "epoch": 0.37270017473532735,
      "grad_norm": 0.2226940542459488,
      "learning_rate": 8.792051531484678e-05,
      "loss": 0.7032,
      "step": 1813
    },
    {
      "epoch": 0.37290574570870594,
      "grad_norm": 0.1569989025592804,
      "learning_rate": 8.791751306647572e-05,
      "loss": 0.6043,
      "step": 1814
    },
    {
      "epoch": 0.37311131668208447,
      "grad_norm": 0.231995090842247,
      "learning_rate": 8.791450870375936e-05,
      "loss": 0.7066,
      "step": 1815
    },
    {
      "epoch": 0.37331688765546306,
      "grad_norm": 0.2193315476179123,
      "learning_rate": 8.791150222684576e-05,
      "loss": 0.7099,
      "step": 1816
    },
    {
      "epoch": 0.3735224586288416,
      "grad_norm": 0.2191406637430191,
      "learning_rate": 8.790849363588301e-05,
      "loss": 0.7082,
      "step": 1817
    },
    {
      "epoch": 0.3737280296022202,
      "grad_norm": 0.24114836752414703,
      "learning_rate": 8.790548293101932e-05,
      "loss": 0.6951,
      "step": 1818
    },
    {
      "epoch": 0.3739336005755987,
      "grad_norm": 0.21961726248264313,
      "learning_rate": 8.790247011240304e-05,
      "loss": 0.7007,
      "step": 1819
    },
    {
      "epoch": 0.3741391715489773,
      "grad_norm": 0.22864870727062225,
      "learning_rate": 8.789945518018259e-05,
      "loss": 0.7172,
      "step": 1820
    },
    {
      "epoch": 0.37434474252235583,
      "grad_norm": 0.2318045198917389,
      "learning_rate": 8.789643813450647e-05,
      "loss": 0.7168,
      "step": 1821
    },
    {
      "epoch": 0.3745503134957344,
      "grad_norm": 0.21737788617610931,
      "learning_rate": 8.789341897552336e-05,
      "loss": 0.7251,
      "step": 1822
    },
    {
      "epoch": 0.37475588446911295,
      "grad_norm": 0.21853739023208618,
      "learning_rate": 8.789039770338197e-05,
      "loss": 0.7059,
      "step": 1823
    },
    {
      "epoch": 0.37496145544249154,
      "grad_norm": 0.21663320064544678,
      "learning_rate": 8.788737431823116e-05,
      "loss": 0.6916,
      "step": 1824
    },
    {
      "epoch": 0.3751670264158701,
      "grad_norm": 0.17142772674560547,
      "learning_rate": 8.788434882021987e-05,
      "loss": 0.594,
      "step": 1825
    },
    {
      "epoch": 0.37537259738924866,
      "grad_norm": 0.2234950065612793,
      "learning_rate": 8.788132120949716e-05,
      "loss": 0.7175,
      "step": 1826
    },
    {
      "epoch": 0.3755781683626272,
      "grad_norm": 0.21172864735126495,
      "learning_rate": 8.787829148621218e-05,
      "loss": 0.6872,
      "step": 1827
    },
    {
      "epoch": 0.37578373933600573,
      "grad_norm": 0.22408267855644226,
      "learning_rate": 8.787525965051418e-05,
      "loss": 0.7375,
      "step": 1828
    },
    {
      "epoch": 0.3759893103093843,
      "grad_norm": 0.21300190687179565,
      "learning_rate": 8.787222570255256e-05,
      "loss": 0.7224,
      "step": 1829
    },
    {
      "epoch": 0.37619488128276285,
      "grad_norm": 0.22645267844200134,
      "learning_rate": 8.786918964247674e-05,
      "loss": 0.6957,
      "step": 1830
    },
    {
      "epoch": 0.37640045225614144,
      "grad_norm": 0.1754547655582428,
      "learning_rate": 8.786615147043633e-05,
      "loss": 0.5798,
      "step": 1831
    },
    {
      "epoch": 0.37660602322951997,
      "grad_norm": 0.2161412239074707,
      "learning_rate": 8.786311118658097e-05,
      "loss": 0.7041,
      "step": 1832
    },
    {
      "epoch": 0.37681159420289856,
      "grad_norm": 0.14319129288196564,
      "learning_rate": 8.78600687910605e-05,
      "loss": 0.5844,
      "step": 1833
    },
    {
      "epoch": 0.3770171651762771,
      "grad_norm": 0.22690434753894806,
      "learning_rate": 8.785702428402475e-05,
      "loss": 0.7024,
      "step": 1834
    },
    {
      "epoch": 0.3772227361496557,
      "grad_norm": 0.2214747965335846,
      "learning_rate": 8.785397766562371e-05,
      "loss": 0.7269,
      "step": 1835
    },
    {
      "epoch": 0.3774283071230342,
      "grad_norm": 0.2654751241207123,
      "learning_rate": 8.785092893600751e-05,
      "loss": 0.7037,
      "step": 1836
    },
    {
      "epoch": 0.3776338780964128,
      "grad_norm": 0.21953707933425903,
      "learning_rate": 8.784787809532632e-05,
      "loss": 0.7217,
      "step": 1837
    },
    {
      "epoch": 0.37783944906979133,
      "grad_norm": 0.22590485215187073,
      "learning_rate": 8.784482514373045e-05,
      "loss": 0.7056,
      "step": 1838
    },
    {
      "epoch": 0.3780450200431699,
      "grad_norm": 0.22106105089187622,
      "learning_rate": 8.78417700813703e-05,
      "loss": 0.7058,
      "step": 1839
    },
    {
      "epoch": 0.37825059101654845,
      "grad_norm": 0.1933329850435257,
      "learning_rate": 8.783871290839637e-05,
      "loss": 0.5885,
      "step": 1840
    },
    {
      "epoch": 0.37845616198992704,
      "grad_norm": 0.18652944266796112,
      "learning_rate": 8.78356536249593e-05,
      "loss": 0.5857,
      "step": 1841
    },
    {
      "epoch": 0.3786617329633056,
      "grad_norm": 0.2601449191570282,
      "learning_rate": 8.783259223120979e-05,
      "loss": 0.7123,
      "step": 1842
    },
    {
      "epoch": 0.37886730393668416,
      "grad_norm": 0.246074840426445,
      "learning_rate": 8.782952872729864e-05,
      "loss": 0.7277,
      "step": 1843
    },
    {
      "epoch": 0.3790728749100627,
      "grad_norm": 0.2558608949184418,
      "learning_rate": 8.78264631133768e-05,
      "loss": 0.7006,
      "step": 1844
    },
    {
      "epoch": 0.3792784458834413,
      "grad_norm": 0.21807844936847687,
      "learning_rate": 8.78233953895953e-05,
      "loss": 0.5869,
      "step": 1845
    },
    {
      "epoch": 0.3794840168568198,
      "grad_norm": 0.25354549288749695,
      "learning_rate": 8.782032555610526e-05,
      "loss": 0.7129,
      "step": 1846
    },
    {
      "epoch": 0.37968958783019835,
      "grad_norm": 0.243685781955719,
      "learning_rate": 8.781725361305793e-05,
      "loss": 0.7217,
      "step": 1847
    },
    {
      "epoch": 0.37989515880357694,
      "grad_norm": 0.16930992901325226,
      "learning_rate": 8.781417956060464e-05,
      "loss": 0.6007,
      "step": 1848
    },
    {
      "epoch": 0.38010072977695547,
      "grad_norm": 0.24475498497486115,
      "learning_rate": 8.781110339889682e-05,
      "loss": 0.7114,
      "step": 1849
    },
    {
      "epoch": 0.38030630075033406,
      "grad_norm": 0.24792300164699554,
      "learning_rate": 8.780802512808605e-05,
      "loss": 0.7409,
      "step": 1850
    },
    {
      "epoch": 0.3805118717237126,
      "grad_norm": 0.2320515662431717,
      "learning_rate": 8.780494474832395e-05,
      "loss": 0.7163,
      "step": 1851
    },
    {
      "epoch": 0.3807174426970912,
      "grad_norm": 0.24166975915431976,
      "learning_rate": 8.780186225976232e-05,
      "loss": 0.7304,
      "step": 1852
    },
    {
      "epoch": 0.3809230136704697,
      "grad_norm": 0.23629960417747498,
      "learning_rate": 8.779877766255297e-05,
      "loss": 0.7155,
      "step": 1853
    },
    {
      "epoch": 0.3811285846438483,
      "grad_norm": 0.22916334867477417,
      "learning_rate": 8.77956909568479e-05,
      "loss": 0.7263,
      "step": 1854
    },
    {
      "epoch": 0.38133415561722683,
      "grad_norm": 0.24127478897571564,
      "learning_rate": 8.779260214279915e-05,
      "loss": 0.6936,
      "step": 1855
    },
    {
      "epoch": 0.3815397265906054,
      "grad_norm": 0.22905930876731873,
      "learning_rate": 8.778951122055891e-05,
      "loss": 0.718,
      "step": 1856
    },
    {
      "epoch": 0.38174529756398395,
      "grad_norm": 0.21907439827919006,
      "learning_rate": 8.778641819027946e-05,
      "loss": 0.7082,
      "step": 1857
    },
    {
      "epoch": 0.38195086853736254,
      "grad_norm": 0.2231978327035904,
      "learning_rate": 8.778332305211315e-05,
      "loss": 0.5978,
      "step": 1858
    },
    {
      "epoch": 0.3821564395107411,
      "grad_norm": 0.2434241622686386,
      "learning_rate": 8.778022580621249e-05,
      "loss": 0.7043,
      "step": 1859
    },
    {
      "epoch": 0.38236201048411966,
      "grad_norm": 0.22279253602027893,
      "learning_rate": 8.777712645273005e-05,
      "loss": 0.728,
      "step": 1860
    },
    {
      "epoch": 0.3825675814574982,
      "grad_norm": 0.22146545350551605,
      "learning_rate": 8.777402499181854e-05,
      "loss": 0.7035,
      "step": 1861
    },
    {
      "epoch": 0.3827731524308768,
      "grad_norm": 0.1629379838705063,
      "learning_rate": 8.777092142363074e-05,
      "loss": 0.5911,
      "step": 1862
    },
    {
      "epoch": 0.3829787234042553,
      "grad_norm": 0.24716326594352722,
      "learning_rate": 8.776781574831956e-05,
      "loss": 0.7466,
      "step": 1863
    },
    {
      "epoch": 0.3831842943776339,
      "grad_norm": 0.21958030760288239,
      "learning_rate": 8.776470796603799e-05,
      "loss": 0.7112,
      "step": 1864
    },
    {
      "epoch": 0.38338986535101244,
      "grad_norm": 0.22167621552944183,
      "learning_rate": 8.776159807693914e-05,
      "loss": 0.7076,
      "step": 1865
    },
    {
      "epoch": 0.38359543632439097,
      "grad_norm": 0.22505022585391998,
      "learning_rate": 8.775848608117621e-05,
      "loss": 0.7383,
      "step": 1866
    },
    {
      "epoch": 0.38380100729776956,
      "grad_norm": 0.2208850234746933,
      "learning_rate": 8.775537197890254e-05,
      "loss": 0.7371,
      "step": 1867
    },
    {
      "epoch": 0.3840065782711481,
      "grad_norm": 0.21698389947414398,
      "learning_rate": 8.775225577027154e-05,
      "loss": 0.7226,
      "step": 1868
    },
    {
      "epoch": 0.3842121492445267,
      "grad_norm": 0.22070789337158203,
      "learning_rate": 8.774913745543668e-05,
      "loss": 0.712,
      "step": 1869
    },
    {
      "epoch": 0.3844177202179052,
      "grad_norm": 0.22153621912002563,
      "learning_rate": 8.774601703455166e-05,
      "loss": 0.7102,
      "step": 1870
    },
    {
      "epoch": 0.3846232911912838,
      "grad_norm": 0.21667127311229706,
      "learning_rate": 8.774289450777017e-05,
      "loss": 0.705,
      "step": 1871
    },
    {
      "epoch": 0.38482886216466233,
      "grad_norm": 0.22485022246837616,
      "learning_rate": 8.773976987524604e-05,
      "loss": 0.7232,
      "step": 1872
    },
    {
      "epoch": 0.3850344331380409,
      "grad_norm": 0.19532062113285065,
      "learning_rate": 8.77366431371332e-05,
      "loss": 0.61,
      "step": 1873
    },
    {
      "epoch": 0.38524000411141945,
      "grad_norm": 0.2282322347164154,
      "learning_rate": 8.773351429358574e-05,
      "loss": 0.721,
      "step": 1874
    },
    {
      "epoch": 0.38544557508479804,
      "grad_norm": 0.141531839966774,
      "learning_rate": 8.773038334475774e-05,
      "loss": 0.5959,
      "step": 1875
    },
    {
      "epoch": 0.3856511460581766,
      "grad_norm": 0.22724571824073792,
      "learning_rate": 8.772725029080349e-05,
      "loss": 0.7027,
      "step": 1876
    },
    {
      "epoch": 0.38585671703155516,
      "grad_norm": 0.22629983723163605,
      "learning_rate": 8.772411513187731e-05,
      "loss": 0.7021,
      "step": 1877
    },
    {
      "epoch": 0.3860622880049337,
      "grad_norm": 0.18585380911827087,
      "learning_rate": 8.772097786813368e-05,
      "loss": 0.5524,
      "step": 1878
    },
    {
      "epoch": 0.3862678589783123,
      "grad_norm": 0.25130245089530945,
      "learning_rate": 8.771783849972714e-05,
      "loss": 0.7274,
      "step": 1879
    },
    {
      "epoch": 0.3864734299516908,
      "grad_norm": 0.22500745952129364,
      "learning_rate": 8.771469702681236e-05,
      "loss": 0.725,
      "step": 1880
    },
    {
      "epoch": 0.3866790009250694,
      "grad_norm": 0.20772625505924225,
      "learning_rate": 8.771155344954412e-05,
      "loss": 0.7155,
      "step": 1881
    },
    {
      "epoch": 0.38688457189844794,
      "grad_norm": 0.2251315712928772,
      "learning_rate": 8.770840776807726e-05,
      "loss": 0.6973,
      "step": 1882
    },
    {
      "epoch": 0.3870901428718265,
      "grad_norm": 0.2260076254606247,
      "learning_rate": 8.770525998256677e-05,
      "loss": 0.7128,
      "step": 1883
    },
    {
      "epoch": 0.38729571384520506,
      "grad_norm": 0.16973739862442017,
      "learning_rate": 8.770211009316772e-05,
      "loss": 0.5794,
      "step": 1884
    },
    {
      "epoch": 0.3875012848185836,
      "grad_norm": 0.2505844831466675,
      "learning_rate": 8.76989581000353e-05,
      "loss": 0.7359,
      "step": 1885
    },
    {
      "epoch": 0.3877068557919622,
      "grad_norm": 0.28069007396698,
      "learning_rate": 8.769580400332479e-05,
      "loss": 0.7233,
      "step": 1886
    },
    {
      "epoch": 0.3879124267653407,
      "grad_norm": 0.13608971238136292,
      "learning_rate": 8.769264780319158e-05,
      "loss": 0.5905,
      "step": 1887
    },
    {
      "epoch": 0.3881179977387193,
      "grad_norm": 0.25234588980674744,
      "learning_rate": 8.768948949979116e-05,
      "loss": 0.7122,
      "step": 1888
    },
    {
      "epoch": 0.38832356871209783,
      "grad_norm": 0.234871044754982,
      "learning_rate": 8.768632909327912e-05,
      "loss": 0.7299,
      "step": 1889
    },
    {
      "epoch": 0.3885291396854764,
      "grad_norm": 0.2207827866077423,
      "learning_rate": 8.768316658381114e-05,
      "loss": 0.7086,
      "step": 1890
    },
    {
      "epoch": 0.38873471065885495,
      "grad_norm": 0.25734445452690125,
      "learning_rate": 8.768000197154306e-05,
      "loss": 0.7071,
      "step": 1891
    },
    {
      "epoch": 0.38894028163223354,
      "grad_norm": 0.2389577329158783,
      "learning_rate": 8.767683525663077e-05,
      "loss": 0.733,
      "step": 1892
    },
    {
      "epoch": 0.3891458526056121,
      "grad_norm": 0.17553114891052246,
      "learning_rate": 8.767366643923028e-05,
      "loss": 0.5974,
      "step": 1893
    },
    {
      "epoch": 0.38935142357899066,
      "grad_norm": 0.23687125742435455,
      "learning_rate": 8.76704955194977e-05,
      "loss": 0.7148,
      "step": 1894
    },
    {
      "epoch": 0.3895569945523692,
      "grad_norm": 0.25215673446655273,
      "learning_rate": 8.766732249758925e-05,
      "loss": 0.7338,
      "step": 1895
    },
    {
      "epoch": 0.3897625655257478,
      "grad_norm": 0.16502924263477325,
      "learning_rate": 8.766414737366124e-05,
      "loss": 0.584,
      "step": 1896
    },
    {
      "epoch": 0.3899681364991263,
      "grad_norm": 0.1485537588596344,
      "learning_rate": 8.76609701478701e-05,
      "loss": 0.6049,
      "step": 1897
    },
    {
      "epoch": 0.3901737074725049,
      "grad_norm": 0.3515810966491699,
      "learning_rate": 8.765779082037235e-05,
      "loss": 0.7529,
      "step": 1898
    },
    {
      "epoch": 0.39037927844588344,
      "grad_norm": 0.23719021677970886,
      "learning_rate": 8.765460939132464e-05,
      "loss": 0.728,
      "step": 1899
    },
    {
      "epoch": 0.390584849419262,
      "grad_norm": 0.17814306914806366,
      "learning_rate": 8.76514258608837e-05,
      "loss": 0.601,
      "step": 1900
    },
    {
      "epoch": 0.39079042039264056,
      "grad_norm": 0.4228149652481079,
      "learning_rate": 8.764824022920636e-05,
      "loss": 0.7195,
      "step": 1901
    },
    {
      "epoch": 0.3909959913660191,
      "grad_norm": 0.16185280680656433,
      "learning_rate": 8.764505249644953e-05,
      "loss": 0.5728,
      "step": 1902
    },
    {
      "epoch": 0.3912015623393977,
      "grad_norm": 0.23503097891807556,
      "learning_rate": 8.764186266277032e-05,
      "loss": 0.71,
      "step": 1903
    },
    {
      "epoch": 0.3914071333127762,
      "grad_norm": 0.23683130741119385,
      "learning_rate": 8.763867072832583e-05,
      "loss": 0.7351,
      "step": 1904
    },
    {
      "epoch": 0.3916127042861548,
      "grad_norm": 0.2431173473596573,
      "learning_rate": 8.763547669327334e-05,
      "loss": 0.72,
      "step": 1905
    },
    {
      "epoch": 0.39181827525953333,
      "grad_norm": 0.2246868759393692,
      "learning_rate": 8.763228055777016e-05,
      "loss": 0.7136,
      "step": 1906
    },
    {
      "epoch": 0.3920238462329119,
      "grad_norm": 0.17881381511688232,
      "learning_rate": 8.762908232197379e-05,
      "loss": 0.6021,
      "step": 1907
    },
    {
      "epoch": 0.39222941720629045,
      "grad_norm": 0.25456559658050537,
      "learning_rate": 8.76258819860418e-05,
      "loss": 0.7192,
      "step": 1908
    },
    {
      "epoch": 0.39243498817966904,
      "grad_norm": 0.2538883686065674,
      "learning_rate": 8.762267955013185e-05,
      "loss": 0.6971,
      "step": 1909
    },
    {
      "epoch": 0.3926405591530476,
      "grad_norm": 0.21888628602027893,
      "learning_rate": 8.761947501440166e-05,
      "loss": 0.7097,
      "step": 1910
    },
    {
      "epoch": 0.39284613012642616,
      "grad_norm": 0.2221071869134903,
      "learning_rate": 8.761626837900916e-05,
      "loss": 0.7004,
      "step": 1911
    },
    {
      "epoch": 0.3930517010998047,
      "grad_norm": 0.23489388823509216,
      "learning_rate": 8.761305964411228e-05,
      "loss": 0.6935,
      "step": 1912
    },
    {
      "epoch": 0.3932572720731833,
      "grad_norm": 0.23386436700820923,
      "learning_rate": 8.760984880986915e-05,
      "loss": 0.695,
      "step": 1913
    },
    {
      "epoch": 0.3934628430465618,
      "grad_norm": 0.22081080079078674,
      "learning_rate": 8.760663587643792e-05,
      "loss": 0.6939,
      "step": 1914
    },
    {
      "epoch": 0.3936684140199404,
      "grad_norm": 0.2191271334886551,
      "learning_rate": 8.760342084397688e-05,
      "loss": 0.7055,
      "step": 1915
    },
    {
      "epoch": 0.39387398499331894,
      "grad_norm": 0.16592054069042206,
      "learning_rate": 8.760020371264442e-05,
      "loss": 0.5968,
      "step": 1916
    },
    {
      "epoch": 0.3940795559666975,
      "grad_norm": 0.2341727763414383,
      "learning_rate": 8.759698448259905e-05,
      "loss": 0.7216,
      "step": 1917
    },
    {
      "epoch": 0.39428512694007606,
      "grad_norm": 0.2350844144821167,
      "learning_rate": 8.759376315399935e-05,
      "loss": 0.7036,
      "step": 1918
    },
    {
      "epoch": 0.39449069791345465,
      "grad_norm": 0.1551404446363449,
      "learning_rate": 8.759053972700401e-05,
      "loss": 0.6018,
      "step": 1919
    },
    {
      "epoch": 0.3946962688868332,
      "grad_norm": 0.2272733896970749,
      "learning_rate": 8.758731420177186e-05,
      "loss": 0.7132,
      "step": 1920
    },
    {
      "epoch": 0.3949018398602117,
      "grad_norm": 0.22375091910362244,
      "learning_rate": 8.758408657846177e-05,
      "loss": 0.6917,
      "step": 1921
    },
    {
      "epoch": 0.3951074108335903,
      "grad_norm": 0.14521102607250214,
      "learning_rate": 8.758085685723279e-05,
      "loss": 0.5774,
      "step": 1922
    },
    {
      "epoch": 0.39531298180696883,
      "grad_norm": 0.2234261929988861,
      "learning_rate": 8.757762503824401e-05,
      "loss": 0.7322,
      "step": 1923
    },
    {
      "epoch": 0.3955185527803474,
      "grad_norm": 0.2137596607208252,
      "learning_rate": 8.757439112165465e-05,
      "loss": 0.7094,
      "step": 1924
    },
    {
      "epoch": 0.39572412375372595,
      "grad_norm": 0.15637266635894775,
      "learning_rate": 8.757115510762404e-05,
      "loss": 0.599,
      "step": 1925
    },
    {
      "epoch": 0.39592969472710454,
      "grad_norm": 0.21594415605068207,
      "learning_rate": 8.756791699631159e-05,
      "loss": 0.7096,
      "step": 1926
    },
    {
      "epoch": 0.3961352657004831,
      "grad_norm": 0.21532535552978516,
      "learning_rate": 8.756467678787683e-05,
      "loss": 0.7331,
      "step": 1927
    },
    {
      "epoch": 0.39634083667386166,
      "grad_norm": 0.14360411465168,
      "learning_rate": 8.756143448247938e-05,
      "loss": 0.5832,
      "step": 1928
    },
    {
      "epoch": 0.3965464076472402,
      "grad_norm": 0.2573210597038269,
      "learning_rate": 8.7558190080279e-05,
      "loss": 0.7116,
      "step": 1929
    },
    {
      "epoch": 0.3967519786206188,
      "grad_norm": 0.22037194669246674,
      "learning_rate": 8.755494358143552e-05,
      "loss": 0.6988,
      "step": 1930
    },
    {
      "epoch": 0.3969575495939973,
      "grad_norm": 0.1471826732158661,
      "learning_rate": 8.755169498610885e-05,
      "loss": 0.6081,
      "step": 1931
    },
    {
      "epoch": 0.3971631205673759,
      "grad_norm": 0.24475279450416565,
      "learning_rate": 8.754844429445906e-05,
      "loss": 0.7527,
      "step": 1932
    },
    {
      "epoch": 0.39736869154075444,
      "grad_norm": 0.21802780032157898,
      "learning_rate": 8.754519150664629e-05,
      "loss": 0.6628,
      "step": 1933
    },
    {
      "epoch": 0.397574262514133,
      "grad_norm": 0.14480328559875488,
      "learning_rate": 8.75419366228308e-05,
      "loss": 0.597,
      "step": 1934
    },
    {
      "epoch": 0.39777983348751156,
      "grad_norm": 0.21927644312381744,
      "learning_rate": 8.753867964317292e-05,
      "loss": 0.7108,
      "step": 1935
    },
    {
      "epoch": 0.39798540446089015,
      "grad_norm": 0.20976369082927704,
      "learning_rate": 8.753542056783312e-05,
      "loss": 0.7283,
      "step": 1936
    },
    {
      "epoch": 0.3981909754342687,
      "grad_norm": 0.22052782773971558,
      "learning_rate": 8.753215939697198e-05,
      "loss": 0.7261,
      "step": 1937
    },
    {
      "epoch": 0.39839654640764727,
      "grad_norm": 0.21982043981552124,
      "learning_rate": 8.752889613075012e-05,
      "loss": 0.6902,
      "step": 1938
    },
    {
      "epoch": 0.3986021173810258,
      "grad_norm": 0.24831879138946533,
      "learning_rate": 8.752563076932833e-05,
      "loss": 0.7175,
      "step": 1939
    },
    {
      "epoch": 0.39880768835440433,
      "grad_norm": 0.22775912284851074,
      "learning_rate": 8.75223633128675e-05,
      "loss": 0.7126,
      "step": 1940
    },
    {
      "epoch": 0.3990132593277829,
      "grad_norm": 0.21900929510593414,
      "learning_rate": 8.751909376152854e-05,
      "loss": 0.6947,
      "step": 1941
    },
    {
      "epoch": 0.39921883030116145,
      "grad_norm": 0.22170402109622955,
      "learning_rate": 8.751582211547259e-05,
      "loss": 0.7201,
      "step": 1942
    },
    {
      "epoch": 0.39942440127454004,
      "grad_norm": 0.22413894534111023,
      "learning_rate": 8.751254837486079e-05,
      "loss": 0.7205,
      "step": 1943
    },
    {
      "epoch": 0.3996299722479186,
      "grad_norm": 0.22276797890663147,
      "learning_rate": 8.750927253985443e-05,
      "loss": 0.714,
      "step": 1944
    },
    {
      "epoch": 0.39983554322129716,
      "grad_norm": 0.21520061790943146,
      "learning_rate": 8.750599461061492e-05,
      "loss": 0.7147,
      "step": 1945
    },
    {
      "epoch": 0.4000411141946757,
      "grad_norm": 0.16708485782146454,
      "learning_rate": 8.750271458730372e-05,
      "loss": 0.5976,
      "step": 1946
    },
    {
      "epoch": 0.4002466851680543,
      "grad_norm": 0.24202388525009155,
      "learning_rate": 8.74994324700824e-05,
      "loss": 0.7329,
      "step": 1947
    },
    {
      "epoch": 0.4004522561414328,
      "grad_norm": 0.13979558646678925,
      "learning_rate": 8.749614825911274e-05,
      "loss": 0.5932,
      "step": 1948
    },
    {
      "epoch": 0.4006578271148114,
      "grad_norm": 0.13720543682575226,
      "learning_rate": 8.749286195455645e-05,
      "loss": 0.564,
      "step": 1949
    },
    {
      "epoch": 0.40086339808818994,
      "grad_norm": 0.22568507492542267,
      "learning_rate": 8.748957355657546e-05,
      "loss": 0.7259,
      "step": 1950
    },
    {
      "epoch": 0.4010689690615685,
      "grad_norm": 0.2142673283815384,
      "learning_rate": 8.748628306533178e-05,
      "loss": 0.7024,
      "step": 1951
    },
    {
      "epoch": 0.40127454003494706,
      "grad_norm": 0.2180175483226776,
      "learning_rate": 8.748299048098751e-05,
      "loss": 0.7488,
      "step": 1952
    },
    {
      "epoch": 0.40148011100832565,
      "grad_norm": 0.21027667820453644,
      "learning_rate": 8.747969580370488e-05,
      "loss": 0.708,
      "step": 1953
    },
    {
      "epoch": 0.4016856819817042,
      "grad_norm": 0.21340122818946838,
      "learning_rate": 8.747639903364617e-05,
      "loss": 0.7076,
      "step": 1954
    },
    {
      "epoch": 0.40189125295508277,
      "grad_norm": 0.22183535993099213,
      "learning_rate": 8.747310017097382e-05,
      "loss": 0.6994,
      "step": 1955
    },
    {
      "epoch": 0.4020968239284613,
      "grad_norm": 0.21292465925216675,
      "learning_rate": 8.746979921585035e-05,
      "loss": 0.675,
      "step": 1956
    },
    {
      "epoch": 0.40230239490183983,
      "grad_norm": 0.2158004343509674,
      "learning_rate": 8.746649616843837e-05,
      "loss": 0.727,
      "step": 1957
    },
    {
      "epoch": 0.4025079658752184,
      "grad_norm": 0.20767906308174133,
      "learning_rate": 8.746319102890061e-05,
      "loss": 0.7034,
      "step": 1958
    },
    {
      "epoch": 0.40271353684859695,
      "grad_norm": 0.21342967450618744,
      "learning_rate": 8.74598837973999e-05,
      "loss": 0.7249,
      "step": 1959
    },
    {
      "epoch": 0.40291910782197554,
      "grad_norm": 0.22150453925132751,
      "learning_rate": 8.745657447409917e-05,
      "loss": 0.7209,
      "step": 1960
    },
    {
      "epoch": 0.4031246787953541,
      "grad_norm": 0.20457392930984497,
      "learning_rate": 8.745326305916145e-05,
      "loss": 0.6967,
      "step": 1961
    },
    {
      "epoch": 0.40333024976873266,
      "grad_norm": 0.2096332609653473,
      "learning_rate": 8.744994955274992e-05,
      "loss": 0.7295,
      "step": 1962
    },
    {
      "epoch": 0.4035358207421112,
      "grad_norm": 0.20849314332008362,
      "learning_rate": 8.744663395502776e-05,
      "loss": 0.6962,
      "step": 1963
    },
    {
      "epoch": 0.4037413917154898,
      "grad_norm": 0.21918678283691406,
      "learning_rate": 8.744331626615835e-05,
      "loss": 0.6026,
      "step": 1964
    },
    {
      "epoch": 0.4039469626888683,
      "grad_norm": 0.21508684754371643,
      "learning_rate": 8.743999648630511e-05,
      "loss": 0.7116,
      "step": 1965
    },
    {
      "epoch": 0.4041525336622469,
      "grad_norm": 0.23266804218292236,
      "learning_rate": 8.743667461563161e-05,
      "loss": 0.7314,
      "step": 1966
    },
    {
      "epoch": 0.40435810463562544,
      "grad_norm": 0.21796725690364838,
      "learning_rate": 8.743335065430151e-05,
      "loss": 0.7151,
      "step": 1967
    },
    {
      "epoch": 0.404563675609004,
      "grad_norm": 0.21634382009506226,
      "learning_rate": 8.743002460247855e-05,
      "loss": 0.7272,
      "step": 1968
    },
    {
      "epoch": 0.40476924658238256,
      "grad_norm": 0.21737129986286163,
      "learning_rate": 8.74266964603266e-05,
      "loss": 0.748,
      "step": 1969
    },
    {
      "epoch": 0.40497481755576115,
      "grad_norm": 0.20188266038894653,
      "learning_rate": 8.742336622800962e-05,
      "loss": 0.6833,
      "step": 1970
    },
    {
      "epoch": 0.4051803885291397,
      "grad_norm": 0.21718573570251465,
      "learning_rate": 8.742003390569166e-05,
      "loss": 0.7016,
      "step": 1971
    },
    {
      "epoch": 0.40538595950251827,
      "grad_norm": 0.2084118276834488,
      "learning_rate": 8.741669949353692e-05,
      "loss": 0.6989,
      "step": 1972
    },
    {
      "epoch": 0.4055915304758968,
      "grad_norm": 0.21882924437522888,
      "learning_rate": 8.741336299170963e-05,
      "loss": 0.6893,
      "step": 1973
    },
    {
      "epoch": 0.4057971014492754,
      "grad_norm": 0.2056969553232193,
      "learning_rate": 8.741002440037421e-05,
      "loss": 0.7163,
      "step": 1974
    },
    {
      "epoch": 0.4060026724226539,
      "grad_norm": 0.22237667441368103,
      "learning_rate": 8.740668371969509e-05,
      "loss": 0.7379,
      "step": 1975
    },
    {
      "epoch": 0.40620824339603245,
      "grad_norm": 0.2131538689136505,
      "learning_rate": 8.740334094983688e-05,
      "loss": 0.7185,
      "step": 1976
    },
    {
      "epoch": 0.40641381436941104,
      "grad_norm": 0.20948132872581482,
      "learning_rate": 8.739999609096425e-05,
      "loss": 0.5797,
      "step": 1977
    },
    {
      "epoch": 0.4066193853427896,
      "grad_norm": 0.1722819209098816,
      "learning_rate": 8.7396649143242e-05,
      "loss": 0.5985,
      "step": 1978
    },
    {
      "epoch": 0.40682495631616816,
      "grad_norm": 0.15967948734760284,
      "learning_rate": 8.739330010683498e-05,
      "loss": 0.5984,
      "step": 1979
    },
    {
      "epoch": 0.4070305272895467,
      "grad_norm": 0.29981619119644165,
      "learning_rate": 8.738994898190825e-05,
      "loss": 0.6891,
      "step": 1980
    },
    {
      "epoch": 0.4072360982629253,
      "grad_norm": 0.17661848664283752,
      "learning_rate": 8.738659576862684e-05,
      "loss": 0.5816,
      "step": 1981
    },
    {
      "epoch": 0.4074416692363038,
      "grad_norm": 0.23567262291908264,
      "learning_rate": 8.738324046715597e-05,
      "loss": 0.6944,
      "step": 1982
    },
    {
      "epoch": 0.4076472402096824,
      "grad_norm": 0.23192854225635529,
      "learning_rate": 8.737988307766094e-05,
      "loss": 0.7268,
      "step": 1983
    },
    {
      "epoch": 0.40785281118306094,
      "grad_norm": 0.2210889458656311,
      "learning_rate": 8.737652360030715e-05,
      "loss": 0.711,
      "step": 1984
    },
    {
      "epoch": 0.4080583821564395,
      "grad_norm": 0.22944270074367523,
      "learning_rate": 8.737316203526013e-05,
      "loss": 0.7187,
      "step": 1985
    },
    {
      "epoch": 0.40826395312981806,
      "grad_norm": 0.2202499508857727,
      "learning_rate": 8.736979838268545e-05,
      "loss": 0.6949,
      "step": 1986
    },
    {
      "epoch": 0.40846952410319665,
      "grad_norm": 0.22138486802577972,
      "learning_rate": 8.736643264274885e-05,
      "loss": 0.7328,
      "step": 1987
    },
    {
      "epoch": 0.4086750950765752,
      "grad_norm": 0.22516939043998718,
      "learning_rate": 8.736306481561613e-05,
      "loss": 0.7106,
      "step": 1988
    },
    {
      "epoch": 0.40888066604995377,
      "grad_norm": 0.22086863219738007,
      "learning_rate": 8.735969490145321e-05,
      "loss": 0.6854,
      "step": 1989
    },
    {
      "epoch": 0.4090862370233323,
      "grad_norm": 0.2156277447938919,
      "learning_rate": 8.73563229004261e-05,
      "loss": 0.7179,
      "step": 1990
    },
    {
      "epoch": 0.4092918079967109,
      "grad_norm": 0.26995977759361267,
      "learning_rate": 8.735294881270095e-05,
      "loss": 0.59,
      "step": 1991
    },
    {
      "epoch": 0.4094973789700894,
      "grad_norm": 0.2523725926876068,
      "learning_rate": 8.734957263844397e-05,
      "loss": 0.7057,
      "step": 1992
    },
    {
      "epoch": 0.409702949943468,
      "grad_norm": 0.2281750589609146,
      "learning_rate": 8.734619437782148e-05,
      "loss": 0.7269,
      "step": 1993
    },
    {
      "epoch": 0.40990852091684654,
      "grad_norm": 0.23070600628852844,
      "learning_rate": 8.734281403099992e-05,
      "loss": 0.724,
      "step": 1994
    },
    {
      "epoch": 0.4101140918902251,
      "grad_norm": 0.22441944479942322,
      "learning_rate": 8.733943159814583e-05,
      "loss": 0.7058,
      "step": 1995
    },
    {
      "epoch": 0.41031966286360366,
      "grad_norm": 0.1988096684217453,
      "learning_rate": 8.733604707942584e-05,
      "loss": 0.5961,
      "step": 1996
    },
    {
      "epoch": 0.4105252338369822,
      "grad_norm": 0.16709105670452118,
      "learning_rate": 8.733266047500667e-05,
      "loss": 0.5956,
      "step": 1997
    },
    {
      "epoch": 0.4107308048103608,
      "grad_norm": 0.258070170879364,
      "learning_rate": 8.73292717850552e-05,
      "loss": 0.6959,
      "step": 1998
    },
    {
      "epoch": 0.4109363757837393,
      "grad_norm": 0.24676097929477692,
      "learning_rate": 8.732588100973834e-05,
      "loss": 0.7152,
      "step": 1999
    },
    {
      "epoch": 0.4111419467571179,
      "grad_norm": 0.2049533575773239,
      "learning_rate": 8.732248814922317e-05,
      "loss": 0.603,
      "step": 2000
    },
    {
      "epoch": 0.41134751773049644,
      "grad_norm": 0.24677561223506927,
      "learning_rate": 8.73190932036768e-05,
      "loss": 0.7021,
      "step": 2001
    },
    {
      "epoch": 0.411553088703875,
      "grad_norm": 0.24673065543174744,
      "learning_rate": 8.731569617326652e-05,
      "loss": 0.7424,
      "step": 2002
    },
    {
      "epoch": 0.41175865967725356,
      "grad_norm": 0.23665191233158112,
      "learning_rate": 8.731229705815968e-05,
      "loss": 0.7199,
      "step": 2003
    },
    {
      "epoch": 0.41196423065063215,
      "grad_norm": 0.21852630376815796,
      "learning_rate": 8.730889585852371e-05,
      "loss": 0.7065,
      "step": 2004
    },
    {
      "epoch": 0.4121698016240107,
      "grad_norm": 0.22494211792945862,
      "learning_rate": 8.730549257452622e-05,
      "loss": 0.7032,
      "step": 2005
    },
    {
      "epoch": 0.41237537259738927,
      "grad_norm": 0.21385926008224487,
      "learning_rate": 8.730208720633483e-05,
      "loss": 0.6929,
      "step": 2006
    },
    {
      "epoch": 0.4125809435707678,
      "grad_norm": 0.19130924344062805,
      "learning_rate": 8.729867975411734e-05,
      "loss": 0.5725,
      "step": 2007
    },
    {
      "epoch": 0.4127865145441464,
      "grad_norm": 0.226227268576622,
      "learning_rate": 8.729527021804158e-05,
      "loss": 0.6859,
      "step": 2008
    },
    {
      "epoch": 0.4129920855175249,
      "grad_norm": 0.22433815896511078,
      "learning_rate": 8.729185859827555e-05,
      "loss": 0.7239,
      "step": 2009
    },
    {
      "epoch": 0.4131976564909035,
      "grad_norm": 0.2165122628211975,
      "learning_rate": 8.728844489498733e-05,
      "loss": 0.7045,
      "step": 2010
    },
    {
      "epoch": 0.41340322746428204,
      "grad_norm": 0.21789471805095673,
      "learning_rate": 8.728502910834506e-05,
      "loss": 0.7185,
      "step": 2011
    },
    {
      "epoch": 0.41360879843766063,
      "grad_norm": 0.2177097499370575,
      "learning_rate": 8.728161123851708e-05,
      "loss": 0.7074,
      "step": 2012
    },
    {
      "epoch": 0.41381436941103916,
      "grad_norm": 0.22537820041179657,
      "learning_rate": 8.727819128567171e-05,
      "loss": 0.706,
      "step": 2013
    },
    {
      "epoch": 0.4140199403844177,
      "grad_norm": 0.21425795555114746,
      "learning_rate": 8.727476924997747e-05,
      "loss": 0.6974,
      "step": 2014
    },
    {
      "epoch": 0.4142255113577963,
      "grad_norm": 0.23247577250003815,
      "learning_rate": 8.727134513160296e-05,
      "loss": 0.7111,
      "step": 2015
    },
    {
      "epoch": 0.4144310823311748,
      "grad_norm": 0.21180875599384308,
      "learning_rate": 8.726791893071683e-05,
      "loss": 0.6801,
      "step": 2016
    },
    {
      "epoch": 0.4146366533045534,
      "grad_norm": 0.21250028908252716,
      "learning_rate": 8.72644906474879e-05,
      "loss": 0.7447,
      "step": 2017
    },
    {
      "epoch": 0.41484222427793194,
      "grad_norm": 0.21931192278862,
      "learning_rate": 8.726106028208505e-05,
      "loss": 0.7272,
      "step": 2018
    },
    {
      "epoch": 0.4150477952513105,
      "grad_norm": 0.21856500208377838,
      "learning_rate": 8.72576278346773e-05,
      "loss": 0.7224,
      "step": 2019
    },
    {
      "epoch": 0.41525336622468906,
      "grad_norm": 0.21037447452545166,
      "learning_rate": 8.725419330543373e-05,
      "loss": 0.727,
      "step": 2020
    },
    {
      "epoch": 0.41545893719806765,
      "grad_norm": 0.21209198236465454,
      "learning_rate": 8.725075669452356e-05,
      "loss": 0.7019,
      "step": 2021
    },
    {
      "epoch": 0.4156645081714462,
      "grad_norm": 0.20165219902992249,
      "learning_rate": 8.724731800211608e-05,
      "loss": 0.6149,
      "step": 2022
    },
    {
      "epoch": 0.41587007914482477,
      "grad_norm": 0.22927507758140564,
      "learning_rate": 8.72438772283807e-05,
      "loss": 0.7089,
      "step": 2023
    },
    {
      "epoch": 0.4160756501182033,
      "grad_norm": 0.2256333827972412,
      "learning_rate": 8.724043437348695e-05,
      "loss": 0.7093,
      "step": 2024
    },
    {
      "epoch": 0.4162812210915819,
      "grad_norm": 0.21047276258468628,
      "learning_rate": 8.723698943760443e-05,
      "loss": 0.7246,
      "step": 2025
    },
    {
      "epoch": 0.4164867920649604,
      "grad_norm": 0.21218207478523254,
      "learning_rate": 8.723354242090285e-05,
      "loss": 0.6883,
      "step": 2026
    },
    {
      "epoch": 0.416692363038339,
      "grad_norm": 0.21619375050067902,
      "learning_rate": 8.723009332355203e-05,
      "loss": 0.7068,
      "step": 2027
    },
    {
      "epoch": 0.41689793401171754,
      "grad_norm": 0.215839222073555,
      "learning_rate": 8.72266421457219e-05,
      "loss": 0.6964,
      "step": 2028
    },
    {
      "epoch": 0.41710350498509613,
      "grad_norm": 0.22797274589538574,
      "learning_rate": 8.722318888758248e-05,
      "loss": 0.6966,
      "step": 2029
    },
    {
      "epoch": 0.41730907595847466,
      "grad_norm": 0.2232465296983719,
      "learning_rate": 8.72197335493039e-05,
      "loss": 0.611,
      "step": 2030
    },
    {
      "epoch": 0.4175146469318532,
      "grad_norm": 0.2285899519920349,
      "learning_rate": 8.721627613105637e-05,
      "loss": 0.7202,
      "step": 2031
    },
    {
      "epoch": 0.4177202179052318,
      "grad_norm": 0.23706313967704773,
      "learning_rate": 8.721281663301024e-05,
      "loss": 0.7267,
      "step": 2032
    },
    {
      "epoch": 0.4179257888786103,
      "grad_norm": 0.21476082503795624,
      "learning_rate": 8.720935505533593e-05,
      "loss": 0.7026,
      "step": 2033
    },
    {
      "epoch": 0.4181313598519889,
      "grad_norm": 0.20751173794269562,
      "learning_rate": 8.720589139820399e-05,
      "loss": 0.726,
      "step": 2034
    },
    {
      "epoch": 0.41833693082536744,
      "grad_norm": 0.19482995569705963,
      "learning_rate": 8.720242566178504e-05,
      "loss": 0.5893,
      "step": 2035
    },
    {
      "epoch": 0.418542501798746,
      "grad_norm": 0.2433481514453888,
      "learning_rate": 8.719895784624985e-05,
      "loss": 0.6991,
      "step": 2036
    },
    {
      "epoch": 0.41874807277212456,
      "grad_norm": 0.22105759382247925,
      "learning_rate": 8.719548795176922e-05,
      "loss": 0.7016,
      "step": 2037
    },
    {
      "epoch": 0.41895364374550315,
      "grad_norm": 0.14366379380226135,
      "learning_rate": 8.719201597851414e-05,
      "loss": 0.5847,
      "step": 2038
    },
    {
      "epoch": 0.4191592147188817,
      "grad_norm": 0.15119072794914246,
      "learning_rate": 8.718854192665563e-05,
      "loss": 0.599,
      "step": 2039
    },
    {
      "epoch": 0.41936478569226027,
      "grad_norm": 0.2527151107788086,
      "learning_rate": 8.718506579636484e-05,
      "loss": 0.6794,
      "step": 2040
    },
    {
      "epoch": 0.4195703566656388,
      "grad_norm": 0.1412784457206726,
      "learning_rate": 8.718158758781305e-05,
      "loss": 0.5728,
      "step": 2041
    },
    {
      "epoch": 0.4197759276390174,
      "grad_norm": 0.2282373160123825,
      "learning_rate": 8.717810730117158e-05,
      "loss": 0.7497,
      "step": 2042
    },
    {
      "epoch": 0.4199814986123959,
      "grad_norm": 0.2128640115261078,
      "learning_rate": 8.717462493661192e-05,
      "loss": 0.7085,
      "step": 2043
    },
    {
      "epoch": 0.4201870695857745,
      "grad_norm": 0.22235573828220367,
      "learning_rate": 8.717114049430558e-05,
      "loss": 0.7508,
      "step": 2044
    },
    {
      "epoch": 0.42039264055915304,
      "grad_norm": 0.21980416774749756,
      "learning_rate": 8.716765397442428e-05,
      "loss": 0.7091,
      "step": 2045
    },
    {
      "epoch": 0.42059821153253163,
      "grad_norm": 0.20546141266822815,
      "learning_rate": 8.716416537713978e-05,
      "loss": 0.7008,
      "step": 2046
    },
    {
      "epoch": 0.42080378250591016,
      "grad_norm": 0.2216566503047943,
      "learning_rate": 8.71606747026239e-05,
      "loss": 0.6921,
      "step": 2047
    },
    {
      "epoch": 0.42100935347928875,
      "grad_norm": 0.2280108779668808,
      "learning_rate": 8.715718195104863e-05,
      "loss": 0.7094,
      "step": 2048
    },
    {
      "epoch": 0.4212149244526673,
      "grad_norm": 0.18423175811767578,
      "learning_rate": 8.715368712258605e-05,
      "loss": 0.6069,
      "step": 2049
    },
    {
      "epoch": 0.4214204954260458,
      "grad_norm": 0.22304539382457733,
      "learning_rate": 8.715019021740834e-05,
      "loss": 0.7094,
      "step": 2050
    },
    {
      "epoch": 0.4216260663994244,
      "grad_norm": 0.2160019874572754,
      "learning_rate": 8.714669123568776e-05,
      "loss": 0.7204,
      "step": 2051
    },
    {
      "epoch": 0.42183163737280294,
      "grad_norm": 0.21349206566810608,
      "learning_rate": 8.714319017759671e-05,
      "loss": 0.7041,
      "step": 2052
    },
    {
      "epoch": 0.4220372083461815,
      "grad_norm": 0.2105959951877594,
      "learning_rate": 8.713968704330766e-05,
      "loss": 0.7152,
      "step": 2053
    },
    {
      "epoch": 0.42224277931956006,
      "grad_norm": 0.21072207391262054,
      "learning_rate": 8.713618183299318e-05,
      "loss": 0.7148,
      "step": 2054
    },
    {
      "epoch": 0.42244835029293865,
      "grad_norm": 0.2207954227924347,
      "learning_rate": 8.713267454682595e-05,
      "loss": 0.7272,
      "step": 2055
    },
    {
      "epoch": 0.4226539212663172,
      "grad_norm": 0.21951311826705933,
      "learning_rate": 8.712916518497877e-05,
      "loss": 0.7121,
      "step": 2056
    },
    {
      "epoch": 0.42285949223969577,
      "grad_norm": 0.21501171588897705,
      "learning_rate": 8.712565374762456e-05,
      "loss": 0.7086,
      "step": 2057
    },
    {
      "epoch": 0.4230650632130743,
      "grad_norm": 0.21046118438243866,
      "learning_rate": 8.712214023493628e-05,
      "loss": 0.6967,
      "step": 2058
    },
    {
      "epoch": 0.4232706341864529,
      "grad_norm": 0.1807229071855545,
      "learning_rate": 8.711862464708701e-05,
      "loss": 0.5913,
      "step": 2059
    },
    {
      "epoch": 0.4234762051598314,
      "grad_norm": 0.22645685076713562,
      "learning_rate": 8.711510698424999e-05,
      "loss": 0.7036,
      "step": 2060
    },
    {
      "epoch": 0.42368177613321,
      "grad_norm": 0.22503720223903656,
      "learning_rate": 8.711158724659848e-05,
      "loss": 0.7092,
      "step": 2061
    },
    {
      "epoch": 0.42388734710658854,
      "grad_norm": 0.21952955424785614,
      "learning_rate": 8.71080654343059e-05,
      "loss": 0.7028,
      "step": 2062
    },
    {
      "epoch": 0.42409291807996713,
      "grad_norm": 0.21978265047073364,
      "learning_rate": 8.710454154754574e-05,
      "loss": 0.6954,
      "step": 2063
    },
    {
      "epoch": 0.42429848905334566,
      "grad_norm": 0.21806906163692474,
      "learning_rate": 8.710101558649162e-05,
      "loss": 0.6992,
      "step": 2064
    },
    {
      "epoch": 0.42450406002672425,
      "grad_norm": 0.14885424077510834,
      "learning_rate": 8.709748755131724e-05,
      "loss": 0.5892,
      "step": 2065
    },
    {
      "epoch": 0.4247096310001028,
      "grad_norm": 0.230007603764534,
      "learning_rate": 8.709395744219641e-05,
      "loss": 0.7061,
      "step": 2066
    },
    {
      "epoch": 0.42491520197348137,
      "grad_norm": 0.21456275880336761,
      "learning_rate": 8.709042525930305e-05,
      "loss": 0.699,
      "step": 2067
    },
    {
      "epoch": 0.4251207729468599,
      "grad_norm": 0.21649466454982758,
      "learning_rate": 8.708689100281116e-05,
      "loss": 0.6888,
      "step": 2068
    },
    {
      "epoch": 0.42532634392023844,
      "grad_norm": 0.2111383080482483,
      "learning_rate": 8.708335467289487e-05,
      "loss": 0.7007,
      "step": 2069
    },
    {
      "epoch": 0.425531914893617,
      "grad_norm": 0.2149335891008377,
      "learning_rate": 8.707981626972839e-05,
      "loss": 0.6819,
      "step": 2070
    },
    {
      "epoch": 0.42573748586699556,
      "grad_norm": 0.14442218840122223,
      "learning_rate": 8.707627579348605e-05,
      "loss": 0.5817,
      "step": 2071
    },
    {
      "epoch": 0.42594305684037415,
      "grad_norm": 0.21797578036785126,
      "learning_rate": 8.707273324434225e-05,
      "loss": 0.693,
      "step": 2072
    },
    {
      "epoch": 0.4261486278137527,
      "grad_norm": 0.2137763351202011,
      "learning_rate": 8.706918862247155e-05,
      "loss": 0.7087,
      "step": 2073
    },
    {
      "epoch": 0.42635419878713127,
      "grad_norm": 0.21722511947155,
      "learning_rate": 8.706564192804854e-05,
      "loss": 0.7327,
      "step": 2074
    },
    {
      "epoch": 0.4265597697605098,
      "grad_norm": 0.21744219958782196,
      "learning_rate": 8.706209316124798e-05,
      "loss": 0.7024,
      "step": 2075
    },
    {
      "epoch": 0.4267653407338884,
      "grad_norm": 0.21922947466373444,
      "learning_rate": 8.705854232224467e-05,
      "loss": 0.7089,
      "step": 2076
    },
    {
      "epoch": 0.4269709117072669,
      "grad_norm": 0.20731019973754883,
      "learning_rate": 8.705498941121357e-05,
      "loss": 0.7112,
      "step": 2077
    },
    {
      "epoch": 0.4271764826806455,
      "grad_norm": 0.15655431151390076,
      "learning_rate": 8.705143442832973e-05,
      "loss": 0.5976,
      "step": 2078
    },
    {
      "epoch": 0.42738205365402404,
      "grad_norm": 0.22649213671684265,
      "learning_rate": 8.704787737376822e-05,
      "loss": 0.7271,
      "step": 2079
    },
    {
      "epoch": 0.42758762462740263,
      "grad_norm": 0.2306176871061325,
      "learning_rate": 8.704431824770436e-05,
      "loss": 0.7294,
      "step": 2080
    },
    {
      "epoch": 0.42779319560078116,
      "grad_norm": 0.21303272247314453,
      "learning_rate": 8.704075705031344e-05,
      "loss": 0.703,
      "step": 2081
    },
    {
      "epoch": 0.42799876657415975,
      "grad_norm": 0.2082429826259613,
      "learning_rate": 8.70371937817709e-05,
      "loss": 0.7122,
      "step": 2082
    },
    {
      "epoch": 0.4282043375475383,
      "grad_norm": 0.21812103688716888,
      "learning_rate": 8.703362844225233e-05,
      "loss": 0.6854,
      "step": 2083
    },
    {
      "epoch": 0.42840990852091687,
      "grad_norm": 0.22010985016822815,
      "learning_rate": 8.703006103193334e-05,
      "loss": 0.7085,
      "step": 2084
    },
    {
      "epoch": 0.4286154794942954,
      "grad_norm": 0.21230296790599823,
      "learning_rate": 8.70264915509897e-05,
      "loss": 0.6915,
      "step": 2085
    },
    {
      "epoch": 0.428821050467674,
      "grad_norm": 0.22726766765117645,
      "learning_rate": 8.702291999959725e-05,
      "loss": 0.7325,
      "step": 2086
    },
    {
      "epoch": 0.4290266214410525,
      "grad_norm": 0.22241102159023285,
      "learning_rate": 8.701934637793194e-05,
      "loss": 0.7029,
      "step": 2087
    },
    {
      "epoch": 0.42923219241443106,
      "grad_norm": 0.1587475687265396,
      "learning_rate": 8.701577068616984e-05,
      "loss": 0.5836,
      "step": 2088
    },
    {
      "epoch": 0.42943776338780965,
      "grad_norm": 0.2406635880470276,
      "learning_rate": 8.701219292448708e-05,
      "loss": 0.6863,
      "step": 2089
    },
    {
      "epoch": 0.4296433343611882,
      "grad_norm": 0.21944580972194672,
      "learning_rate": 8.700861309305995e-05,
      "loss": 0.6938,
      "step": 2090
    },
    {
      "epoch": 0.42984890533456677,
      "grad_norm": 0.21135850250720978,
      "learning_rate": 8.700503119206481e-05,
      "loss": 0.685,
      "step": 2091
    },
    {
      "epoch": 0.4300544763079453,
      "grad_norm": 0.20949722826480865,
      "learning_rate": 8.700144722167811e-05,
      "loss": 0.6967,
      "step": 2092
    },
    {
      "epoch": 0.4302600472813239,
      "grad_norm": 0.21594803035259247,
      "learning_rate": 8.699786118207642e-05,
      "loss": 0.7037,
      "step": 2093
    },
    {
      "epoch": 0.4304656182547024,
      "grad_norm": 0.16418609023094177,
      "learning_rate": 8.69942730734364e-05,
      "loss": 0.5692,
      "step": 2094
    },
    {
      "epoch": 0.430671189228081,
      "grad_norm": 0.23615112900733948,
      "learning_rate": 8.699068289593483e-05,
      "loss": 0.7278,
      "step": 2095
    },
    {
      "epoch": 0.43087676020145954,
      "grad_norm": 0.22218084335327148,
      "learning_rate": 8.698709064974858e-05,
      "loss": 0.677,
      "step": 2096
    },
    {
      "epoch": 0.43108233117483813,
      "grad_norm": 0.21628277003765106,
      "learning_rate": 8.698349633505462e-05,
      "loss": 0.6902,
      "step": 2097
    },
    {
      "epoch": 0.43128790214821666,
      "grad_norm": 0.21895258128643036,
      "learning_rate": 8.697989995203002e-05,
      "loss": 0.6952,
      "step": 2098
    },
    {
      "epoch": 0.43149347312159525,
      "grad_norm": 0.21633300185203552,
      "learning_rate": 8.697630150085197e-05,
      "loss": 0.7332,
      "step": 2099
    },
    {
      "epoch": 0.4316990440949738,
      "grad_norm": 0.2174568474292755,
      "learning_rate": 8.697270098169774e-05,
      "loss": 0.6904,
      "step": 2100
    },
    {
      "epoch": 0.43190461506835237,
      "grad_norm": 0.22629016637802124,
      "learning_rate": 8.696909839474473e-05,
      "loss": 0.7198,
      "step": 2101
    },
    {
      "epoch": 0.4321101860417309,
      "grad_norm": 0.20996680855751038,
      "learning_rate": 8.696549374017038e-05,
      "loss": 0.6932,
      "step": 2102
    },
    {
      "epoch": 0.4323157570151095,
      "grad_norm": 0.20978742837905884,
      "learning_rate": 8.696188701815231e-05,
      "loss": 0.684,
      "step": 2103
    },
    {
      "epoch": 0.432521327988488,
      "grad_norm": 0.21533238887786865,
      "learning_rate": 8.695827822886818e-05,
      "loss": 0.7218,
      "step": 2104
    },
    {
      "epoch": 0.43272689896186656,
      "grad_norm": 0.20759303867816925,
      "learning_rate": 8.695466737249582e-05,
      "loss": 0.6742,
      "step": 2105
    },
    {
      "epoch": 0.43293246993524515,
      "grad_norm": 0.17055755853652954,
      "learning_rate": 8.695105444921307e-05,
      "loss": 0.5937,
      "step": 2106
    },
    {
      "epoch": 0.4331380409086237,
      "grad_norm": 0.1438744068145752,
      "learning_rate": 8.694743945919796e-05,
      "loss": 0.5962,
      "step": 2107
    },
    {
      "epoch": 0.43334361188200227,
      "grad_norm": 0.23514226078987122,
      "learning_rate": 8.694382240262857e-05,
      "loss": 0.7071,
      "step": 2108
    },
    {
      "epoch": 0.4335491828553808,
      "grad_norm": 0.16390731930732727,
      "learning_rate": 8.694020327968309e-05,
      "loss": 0.597,
      "step": 2109
    },
    {
      "epoch": 0.4337547538287594,
      "grad_norm": 0.21311801671981812,
      "learning_rate": 8.693658209053983e-05,
      "loss": 0.7061,
      "step": 2110
    },
    {
      "epoch": 0.4339603248021379,
      "grad_norm": 0.21026752889156342,
      "learning_rate": 8.693295883537717e-05,
      "loss": 0.7125,
      "step": 2111
    },
    {
      "epoch": 0.4341658957755165,
      "grad_norm": 0.21940794587135315,
      "learning_rate": 8.692933351437362e-05,
      "loss": 0.7429,
      "step": 2112
    },
    {
      "epoch": 0.43437146674889504,
      "grad_norm": 0.22087624669075012,
      "learning_rate": 8.69257061277078e-05,
      "loss": 0.7089,
      "step": 2113
    },
    {
      "epoch": 0.43457703772227363,
      "grad_norm": 0.21447579562664032,
      "learning_rate": 8.69220766755584e-05,
      "loss": 0.7126,
      "step": 2114
    },
    {
      "epoch": 0.43478260869565216,
      "grad_norm": 0.18616484105587006,
      "learning_rate": 8.691844515810422e-05,
      "loss": 0.5893,
      "step": 2115
    },
    {
      "epoch": 0.43498817966903075,
      "grad_norm": 0.2412138730287552,
      "learning_rate": 8.691481157552418e-05,
      "loss": 0.6838,
      "step": 2116
    },
    {
      "epoch": 0.4351937506424093,
      "grad_norm": 0.2211569845676422,
      "learning_rate": 8.691117592799726e-05,
      "loss": 0.7146,
      "step": 2117
    },
    {
      "epoch": 0.43539932161578787,
      "grad_norm": 0.22833772003650665,
      "learning_rate": 8.690753821570261e-05,
      "loss": 0.6909,
      "step": 2118
    },
    {
      "epoch": 0.4356048925891664,
      "grad_norm": 0.22425860166549683,
      "learning_rate": 8.690389843881944e-05,
      "loss": 0.7387,
      "step": 2119
    },
    {
      "epoch": 0.435810463562545,
      "grad_norm": 0.20990809798240662,
      "learning_rate": 8.690025659752702e-05,
      "loss": 0.7058,
      "step": 2120
    },
    {
      "epoch": 0.4360160345359235,
      "grad_norm": 0.21391835808753967,
      "learning_rate": 8.689661269200483e-05,
      "loss": 0.706,
      "step": 2121
    },
    {
      "epoch": 0.4362216055093021,
      "grad_norm": 0.21198540925979614,
      "learning_rate": 8.689296672243234e-05,
      "loss": 0.6776,
      "step": 2122
    },
    {
      "epoch": 0.43642717648268065,
      "grad_norm": 0.22344285249710083,
      "learning_rate": 8.68893186889892e-05,
      "loss": 0.6062,
      "step": 2123
    },
    {
      "epoch": 0.4366327474560592,
      "grad_norm": 0.23118963837623596,
      "learning_rate": 8.68856685918551e-05,
      "loss": 0.7088,
      "step": 2124
    },
    {
      "epoch": 0.43683831842943777,
      "grad_norm": 0.14518238604068756,
      "learning_rate": 8.68820164312099e-05,
      "loss": 0.5962,
      "step": 2125
    },
    {
      "epoch": 0.4370438894028163,
      "grad_norm": 0.22062361240386963,
      "learning_rate": 8.68783622072335e-05,
      "loss": 0.7169,
      "step": 2126
    },
    {
      "epoch": 0.4372494603761949,
      "grad_norm": 0.21670423448085785,
      "learning_rate": 8.687470592010593e-05,
      "loss": 0.6916,
      "step": 2127
    },
    {
      "epoch": 0.4374550313495734,
      "grad_norm": 0.21488401293754578,
      "learning_rate": 8.687104757000733e-05,
      "loss": 0.7139,
      "step": 2128
    },
    {
      "epoch": 0.437660602322952,
      "grad_norm": 0.22047607600688934,
      "learning_rate": 8.686738715711791e-05,
      "loss": 0.6969,
      "step": 2129
    },
    {
      "epoch": 0.43786617329633054,
      "grad_norm": 0.21157632768154144,
      "learning_rate": 8.686372468161802e-05,
      "loss": 0.7293,
      "step": 2130
    },
    {
      "epoch": 0.43807174426970913,
      "grad_norm": 0.2109154462814331,
      "learning_rate": 8.686006014368806e-05,
      "loss": 0.7178,
      "step": 2131
    },
    {
      "epoch": 0.43827731524308766,
      "grad_norm": 0.2221369594335556,
      "learning_rate": 8.685639354350862e-05,
      "loss": 0.7315,
      "step": 2132
    },
    {
      "epoch": 0.43848288621646625,
      "grad_norm": 0.2168595790863037,
      "learning_rate": 8.68527248812603e-05,
      "loss": 0.7079,
      "step": 2133
    },
    {
      "epoch": 0.4386884571898448,
      "grad_norm": 0.2099953144788742,
      "learning_rate": 8.684905415712383e-05,
      "loss": 0.7007,
      "step": 2134
    },
    {
      "epoch": 0.43889402816322337,
      "grad_norm": 0.21563635766506195,
      "learning_rate": 8.684538137128008e-05,
      "loss": 0.716,
      "step": 2135
    },
    {
      "epoch": 0.4390995991366019,
      "grad_norm": 0.2030235230922699,
      "learning_rate": 8.684170652390996e-05,
      "loss": 0.7029,
      "step": 2136
    },
    {
      "epoch": 0.4393051701099805,
      "grad_norm": 0.21220625936985016,
      "learning_rate": 8.683802961519454e-05,
      "loss": 0.7057,
      "step": 2137
    },
    {
      "epoch": 0.439510741083359,
      "grad_norm": 0.2082281857728958,
      "learning_rate": 8.683435064531496e-05,
      "loss": 0.6924,
      "step": 2138
    },
    {
      "epoch": 0.4397163120567376,
      "grad_norm": 0.2149658501148224,
      "learning_rate": 8.683066961445245e-05,
      "loss": 0.7082,
      "step": 2139
    },
    {
      "epoch": 0.43992188303011615,
      "grad_norm": 0.21991075575351715,
      "learning_rate": 8.682698652278836e-05,
      "loss": 0.7101,
      "step": 2140
    },
    {
      "epoch": 0.44012745400349473,
      "grad_norm": 0.21779777109622955,
      "learning_rate": 8.682330137050415e-05,
      "loss": 0.6922,
      "step": 2141
    },
    {
      "epoch": 0.44033302497687327,
      "grad_norm": 0.21721771359443665,
      "learning_rate": 8.681961415778134e-05,
      "loss": 0.7198,
      "step": 2142
    },
    {
      "epoch": 0.4405385959502518,
      "grad_norm": 0.21693062782287598,
      "learning_rate": 8.681592488480163e-05,
      "loss": 0.74,
      "step": 2143
    },
    {
      "epoch": 0.4407441669236304,
      "grad_norm": 0.21777969598770142,
      "learning_rate": 8.681223355174673e-05,
      "loss": 0.6871,
      "step": 2144
    },
    {
      "epoch": 0.4409497378970089,
      "grad_norm": 0.2129591703414917,
      "learning_rate": 8.680854015879852e-05,
      "loss": 0.6949,
      "step": 2145
    },
    {
      "epoch": 0.4411553088703875,
      "grad_norm": 0.20881325006484985,
      "learning_rate": 8.680484470613896e-05,
      "loss": 0.6919,
      "step": 2146
    },
    {
      "epoch": 0.44136087984376604,
      "grad_norm": 0.21094316244125366,
      "learning_rate": 8.680114719395007e-05,
      "loss": 0.7102,
      "step": 2147
    },
    {
      "epoch": 0.44156645081714463,
      "grad_norm": 0.2205977588891983,
      "learning_rate": 8.679744762241407e-05,
      "loss": 0.6933,
      "step": 2148
    },
    {
      "epoch": 0.44177202179052316,
      "grad_norm": 0.2161235362291336,
      "learning_rate": 8.679374599171317e-05,
      "loss": 0.7472,
      "step": 2149
    },
    {
      "epoch": 0.44197759276390175,
      "grad_norm": 0.2870723009109497,
      "learning_rate": 8.679004230202973e-05,
      "loss": 0.5985,
      "step": 2150
    },
    {
      "epoch": 0.4421831637372803,
      "grad_norm": 0.22053900361061096,
      "learning_rate": 8.678633655354627e-05,
      "loss": 0.7013,
      "step": 2151
    },
    {
      "epoch": 0.44238873471065887,
      "grad_norm": 0.22010482847690582,
      "learning_rate": 8.67826287464453e-05,
      "loss": 0.7361,
      "step": 2152
    },
    {
      "epoch": 0.4425943056840374,
      "grad_norm": 0.2220645248889923,
      "learning_rate": 8.677891888090949e-05,
      "loss": 0.7354,
      "step": 2153
    },
    {
      "epoch": 0.442799876657416,
      "grad_norm": 0.22568100690841675,
      "learning_rate": 8.677520695712164e-05,
      "loss": 0.6069,
      "step": 2154
    },
    {
      "epoch": 0.4430054476307945,
      "grad_norm": 0.21187719702720642,
      "learning_rate": 8.677149297526459e-05,
      "loss": 0.6829,
      "step": 2155
    },
    {
      "epoch": 0.4432110186041731,
      "grad_norm": 0.22478394210338593,
      "learning_rate": 8.676777693552132e-05,
      "loss": 0.6992,
      "step": 2156
    },
    {
      "epoch": 0.44341658957755165,
      "grad_norm": 0.2064889669418335,
      "learning_rate": 8.67640588380749e-05,
      "loss": 0.6845,
      "step": 2157
    },
    {
      "epoch": 0.44362216055093023,
      "grad_norm": 0.21473796665668488,
      "learning_rate": 8.67603386831085e-05,
      "loss": 0.706,
      "step": 2158
    },
    {
      "epoch": 0.44382773152430877,
      "grad_norm": 0.22386027872562408,
      "learning_rate": 8.675661647080541e-05,
      "loss": 0.7064,
      "step": 2159
    },
    {
      "epoch": 0.4440333024976873,
      "grad_norm": 0.21549421548843384,
      "learning_rate": 8.675289220134901e-05,
      "loss": 0.6826,
      "step": 2160
    },
    {
      "epoch": 0.4442388734710659,
      "grad_norm": 0.1654203236103058,
      "learning_rate": 8.674916587492274e-05,
      "loss": 0.5987,
      "step": 2161
    },
    {
      "epoch": 0.4444444444444444,
      "grad_norm": 0.23500193655490875,
      "learning_rate": 8.674543749171023e-05,
      "loss": 0.7202,
      "step": 2162
    },
    {
      "epoch": 0.444650015417823,
      "grad_norm": 0.22905461490154266,
      "learning_rate": 8.67417070518951e-05,
      "loss": 0.7066,
      "step": 2163
    },
    {
      "epoch": 0.44485558639120154,
      "grad_norm": 0.1377820372581482,
      "learning_rate": 8.673797455566118e-05,
      "loss": 0.5963,
      "step": 2164
    },
    {
      "epoch": 0.44506115736458013,
      "grad_norm": 0.21596823632717133,
      "learning_rate": 8.673424000319233e-05,
      "loss": 0.6887,
      "step": 2165
    },
    {
      "epoch": 0.44526672833795866,
      "grad_norm": 0.13856928050518036,
      "learning_rate": 8.673050339467255e-05,
      "loss": 0.5903,
      "step": 2166
    },
    {
      "epoch": 0.44547229931133725,
      "grad_norm": 0.22425222396850586,
      "learning_rate": 8.672676473028591e-05,
      "loss": 0.696,
      "step": 2167
    },
    {
      "epoch": 0.4456778702847158,
      "grad_norm": 0.20974132418632507,
      "learning_rate": 8.672302401021662e-05,
      "loss": 0.6882,
      "step": 2168
    },
    {
      "epoch": 0.44588344125809437,
      "grad_norm": 0.20939786732196808,
      "learning_rate": 8.671928123464893e-05,
      "loss": 0.6787,
      "step": 2169
    },
    {
      "epoch": 0.4460890122314729,
      "grad_norm": 0.21304769814014435,
      "learning_rate": 8.671553640376724e-05,
      "loss": 0.6775,
      "step": 2170
    },
    {
      "epoch": 0.4462945832048515,
      "grad_norm": 0.21474890410900116,
      "learning_rate": 8.671178951775607e-05,
      "loss": 0.6984,
      "step": 2171
    },
    {
      "epoch": 0.44650015417823,
      "grad_norm": 0.2142523229122162,
      "learning_rate": 8.670804057679999e-05,
      "loss": 0.6975,
      "step": 2172
    },
    {
      "epoch": 0.4467057251516086,
      "grad_norm": 0.21635667979717255,
      "learning_rate": 8.670428958108367e-05,
      "loss": 0.6998,
      "step": 2173
    },
    {
      "epoch": 0.44691129612498715,
      "grad_norm": 0.18972234427928925,
      "learning_rate": 8.670053653079194e-05,
      "loss": 0.5905,
      "step": 2174
    },
    {
      "epoch": 0.44711686709836573,
      "grad_norm": 0.22437618672847748,
      "learning_rate": 8.669678142610969e-05,
      "loss": 0.7078,
      "step": 2175
    },
    {
      "epoch": 0.44732243807174427,
      "grad_norm": 0.22813966870307922,
      "learning_rate": 8.669302426722192e-05,
      "loss": 0.6999,
      "step": 2176
    },
    {
      "epoch": 0.44752800904512285,
      "grad_norm": 0.14738696813583374,
      "learning_rate": 8.66892650543137e-05,
      "loss": 0.5654,
      "step": 2177
    },
    {
      "epoch": 0.4477335800185014,
      "grad_norm": 0.2084706872701645,
      "learning_rate": 8.668550378757024e-05,
      "loss": 0.7261,
      "step": 2178
    },
    {
      "epoch": 0.4479391509918799,
      "grad_norm": 0.22098992764949799,
      "learning_rate": 8.668174046717686e-05,
      "loss": 0.7273,
      "step": 2179
    },
    {
      "epoch": 0.4481447219652585,
      "grad_norm": 0.20854520797729492,
      "learning_rate": 8.667797509331895e-05,
      "loss": 0.7197,
      "step": 2180
    },
    {
      "epoch": 0.44835029293863704,
      "grad_norm": 0.2072971910238266,
      "learning_rate": 8.667420766618198e-05,
      "loss": 0.6683,
      "step": 2181
    },
    {
      "epoch": 0.44855586391201563,
      "grad_norm": 0.20528066158294678,
      "learning_rate": 8.667043818595162e-05,
      "loss": 0.7181,
      "step": 2182
    },
    {
      "epoch": 0.44876143488539416,
      "grad_norm": 0.21476523578166962,
      "learning_rate": 8.666666665281352e-05,
      "loss": 0.72,
      "step": 2183
    },
    {
      "epoch": 0.44896700585877275,
      "grad_norm": 0.20512348413467407,
      "learning_rate": 8.666289306695351e-05,
      "loss": 0.6984,
      "step": 2184
    },
    {
      "epoch": 0.4491725768321513,
      "grad_norm": 0.21752099692821503,
      "learning_rate": 8.665911742855748e-05,
      "loss": 0.6836,
      "step": 2185
    },
    {
      "epoch": 0.44937814780552987,
      "grad_norm": 0.21713502705097198,
      "learning_rate": 8.665533973781145e-05,
      "loss": 0.6965,
      "step": 2186
    },
    {
      "epoch": 0.4495837187789084,
      "grad_norm": 0.22159411013126373,
      "learning_rate": 8.665155999490153e-05,
      "loss": 0.7348,
      "step": 2187
    },
    {
      "epoch": 0.449789289752287,
      "grad_norm": 0.20660369098186493,
      "learning_rate": 8.664777820001394e-05,
      "loss": 0.6958,
      "step": 2188
    },
    {
      "epoch": 0.4499948607256655,
      "grad_norm": 0.1848221719264984,
      "learning_rate": 8.664399435333497e-05,
      "loss": 0.5917,
      "step": 2189
    },
    {
      "epoch": 0.4502004316990441,
      "grad_norm": 0.15177948772907257,
      "learning_rate": 8.664020845505104e-05,
      "loss": 0.5976,
      "step": 2190
    },
    {
      "epoch": 0.45040600267242265,
      "grad_norm": 0.23266561329364777,
      "learning_rate": 8.663642050534867e-05,
      "loss": 0.7185,
      "step": 2191
    },
    {
      "epoch": 0.45061157364580123,
      "grad_norm": 0.2253771871328354,
      "learning_rate": 8.663263050441446e-05,
      "loss": 0.6928,
      "step": 2192
    },
    {
      "epoch": 0.45081714461917977,
      "grad_norm": 0.20975717902183533,
      "learning_rate": 8.662883845243515e-05,
      "loss": 0.7157,
      "step": 2193
    },
    {
      "epoch": 0.45102271559255835,
      "grad_norm": 0.23472397029399872,
      "learning_rate": 8.662504434959753e-05,
      "loss": 0.7103,
      "step": 2194
    },
    {
      "epoch": 0.4512282865659369,
      "grad_norm": 0.22584107518196106,
      "learning_rate": 8.662124819608853e-05,
      "loss": 0.7278,
      "step": 2195
    },
    {
      "epoch": 0.4514338575393155,
      "grad_norm": 0.22365206480026245,
      "learning_rate": 8.661744999209518e-05,
      "loss": 0.599,
      "step": 2196
    },
    {
      "epoch": 0.451639428512694,
      "grad_norm": 0.24951714277267456,
      "learning_rate": 8.661364973780458e-05,
      "loss": 0.7315,
      "step": 2197
    },
    {
      "epoch": 0.45184499948607254,
      "grad_norm": 0.22680872678756714,
      "learning_rate": 8.660984743340396e-05,
      "loss": 0.7005,
      "step": 2198
    },
    {
      "epoch": 0.45205057045945113,
      "grad_norm": 0.22146962583065033,
      "learning_rate": 8.660604307908063e-05,
      "loss": 0.6956,
      "step": 2199
    },
    {
      "epoch": 0.45225614143282966,
      "grad_norm": 0.16175302863121033,
      "learning_rate": 8.660223667502205e-05,
      "loss": 0.5844,
      "step": 2200
    },
    {
      "epoch": 0.45246171240620825,
      "grad_norm": 0.24984121322631836,
      "learning_rate": 8.65984282214157e-05,
      "loss": 0.7104,
      "step": 2201
    },
    {
      "epoch": 0.4526672833795868,
      "grad_norm": 0.23822738230228424,
      "learning_rate": 8.659461771844923e-05,
      "loss": 0.7287,
      "step": 2202
    },
    {
      "epoch": 0.45287285435296537,
      "grad_norm": 0.21192102134227753,
      "learning_rate": 8.659080516631036e-05,
      "loss": 0.714,
      "step": 2203
    },
    {
      "epoch": 0.4530784253263439,
      "grad_norm": 0.23573461174964905,
      "learning_rate": 8.65869905651869e-05,
      "loss": 0.7125,
      "step": 2204
    },
    {
      "epoch": 0.4532839962997225,
      "grad_norm": 0.22849269211292267,
      "learning_rate": 8.658317391526678e-05,
      "loss": 0.7213,
      "step": 2205
    },
    {
      "epoch": 0.453489567273101,
      "grad_norm": 0.2162596434354782,
      "learning_rate": 8.657935521673808e-05,
      "loss": 0.7036,
      "step": 2206
    },
    {
      "epoch": 0.4536951382464796,
      "grad_norm": 0.22291293740272522,
      "learning_rate": 8.657553446978885e-05,
      "loss": 0.7055,
      "step": 2207
    },
    {
      "epoch": 0.45390070921985815,
      "grad_norm": 0.23885302245616913,
      "learning_rate": 8.657171167460738e-05,
      "loss": 0.7177,
      "step": 2208
    },
    {
      "epoch": 0.45410628019323673,
      "grad_norm": 0.1670546680688858,
      "learning_rate": 8.656788683138198e-05,
      "loss": 0.5963,
      "step": 2209
    },
    {
      "epoch": 0.45431185116661527,
      "grad_norm": 0.26193171739578247,
      "learning_rate": 8.656405994030109e-05,
      "loss": 0.6881,
      "step": 2210
    },
    {
      "epoch": 0.45451742213999385,
      "grad_norm": 0.2238868772983551,
      "learning_rate": 8.656023100155324e-05,
      "loss": 0.6955,
      "step": 2211
    },
    {
      "epoch": 0.4547229931133724,
      "grad_norm": 0.22464968264102936,
      "learning_rate": 8.655640001532704e-05,
      "loss": 0.6937,
      "step": 2212
    },
    {
      "epoch": 0.454928564086751,
      "grad_norm": 0.2210894376039505,
      "learning_rate": 8.655256698181125e-05,
      "loss": 0.7033,
      "step": 2213
    },
    {
      "epoch": 0.4551341350601295,
      "grad_norm": 0.2309311479330063,
      "learning_rate": 8.654873190119472e-05,
      "loss": 0.6877,
      "step": 2214
    },
    {
      "epoch": 0.4553397060335081,
      "grad_norm": 0.15510539710521698,
      "learning_rate": 8.654489477366635e-05,
      "loss": 0.6074,
      "step": 2215
    },
    {
      "epoch": 0.45554527700688663,
      "grad_norm": 0.1340515911579132,
      "learning_rate": 8.654105559941519e-05,
      "loss": 0.5916,
      "step": 2216
    },
    {
      "epoch": 0.45575084798026516,
      "grad_norm": 0.3258119225502014,
      "learning_rate": 8.653721437863041e-05,
      "loss": 0.6729,
      "step": 2217
    },
    {
      "epoch": 0.45595641895364375,
      "grad_norm": 0.24723531305789948,
      "learning_rate": 8.653337111150121e-05,
      "loss": 0.6963,
      "step": 2218
    },
    {
      "epoch": 0.4561619899270223,
      "grad_norm": 0.16881807148456573,
      "learning_rate": 8.652952579821693e-05,
      "loss": 0.5994,
      "step": 2219
    },
    {
      "epoch": 0.45636756090040087,
      "grad_norm": 0.16700582206249237,
      "learning_rate": 8.652567843896702e-05,
      "loss": 0.5822,
      "step": 2220
    },
    {
      "epoch": 0.4565731318737794,
      "grad_norm": 0.1435755044221878,
      "learning_rate": 8.652182903394105e-05,
      "loss": 0.5809,
      "step": 2221
    },
    {
      "epoch": 0.456778702847158,
      "grad_norm": 0.14672505855560303,
      "learning_rate": 8.651797758332862e-05,
      "loss": 0.5943,
      "step": 2222
    },
    {
      "epoch": 0.4569842738205365,
      "grad_norm": 0.3784264922142029,
      "learning_rate": 8.651412408731949e-05,
      "loss": 0.7184,
      "step": 2223
    },
    {
      "epoch": 0.4571898447939151,
      "grad_norm": 0.24264433979988098,
      "learning_rate": 8.651026854610348e-05,
      "loss": 0.6976,
      "step": 2224
    },
    {
      "epoch": 0.45739541576729364,
      "grad_norm": 0.26151180267333984,
      "learning_rate": 8.650641095987059e-05,
      "loss": 0.6998,
      "step": 2225
    },
    {
      "epoch": 0.45760098674067223,
      "grad_norm": 0.33650773763656616,
      "learning_rate": 8.650255132881082e-05,
      "loss": 0.7366,
      "step": 2226
    },
    {
      "epoch": 0.45780655771405077,
      "grad_norm": 0.27262553572654724,
      "learning_rate": 8.649868965311432e-05,
      "loss": 0.7319,
      "step": 2227
    },
    {
      "epoch": 0.45801212868742935,
      "grad_norm": 0.2205299288034439,
      "learning_rate": 8.649482593297135e-05,
      "loss": 0.6905,
      "step": 2228
    },
    {
      "epoch": 0.4582176996608079,
      "grad_norm": 0.2557431757450104,
      "learning_rate": 8.649096016857226e-05,
      "loss": 0.6974,
      "step": 2229
    },
    {
      "epoch": 0.4584232706341865,
      "grad_norm": 0.27587607502937317,
      "learning_rate": 8.648709236010749e-05,
      "loss": 0.7024,
      "step": 2230
    },
    {
      "epoch": 0.458628841607565,
      "grad_norm": 0.32615306973457336,
      "learning_rate": 8.64832225077676e-05,
      "loss": 0.6211,
      "step": 2231
    },
    {
      "epoch": 0.4588344125809436,
      "grad_norm": 0.24620257318019867,
      "learning_rate": 8.647935061174321e-05,
      "loss": 0.7277,
      "step": 2232
    },
    {
      "epoch": 0.45903998355432213,
      "grad_norm": 0.2339821755886078,
      "learning_rate": 8.647547667222509e-05,
      "loss": 0.7122,
      "step": 2233
    },
    {
      "epoch": 0.45924555452770066,
      "grad_norm": 0.21899057924747467,
      "learning_rate": 8.647160068940411e-05,
      "loss": 0.7294,
      "step": 2234
    },
    {
      "epoch": 0.45945112550107925,
      "grad_norm": 0.21356239914894104,
      "learning_rate": 8.646772266347119e-05,
      "loss": 0.7077,
      "step": 2235
    },
    {
      "epoch": 0.4596566964744578,
      "grad_norm": 0.21990163624286652,
      "learning_rate": 8.646384259461737e-05,
      "loss": 0.6991,
      "step": 2236
    },
    {
      "epoch": 0.45986226744783637,
      "grad_norm": 0.2190622240304947,
      "learning_rate": 8.645996048303385e-05,
      "loss": 0.7178,
      "step": 2237
    },
    {
      "epoch": 0.4600678384212149,
      "grad_norm": 0.20803511142730713,
      "learning_rate": 8.645607632891187e-05,
      "loss": 0.6785,
      "step": 2238
    },
    {
      "epoch": 0.4602734093945935,
      "grad_norm": 0.20758850872516632,
      "learning_rate": 8.645219013244277e-05,
      "loss": 0.6661,
      "step": 2239
    },
    {
      "epoch": 0.460478980367972,
      "grad_norm": 0.21537218987941742,
      "learning_rate": 8.6448301893818e-05,
      "loss": 0.7075,
      "step": 2240
    },
    {
      "epoch": 0.4606845513413506,
      "grad_norm": 0.2241329848766327,
      "learning_rate": 8.644441161322912e-05,
      "loss": 0.7014,
      "step": 2241
    },
    {
      "epoch": 0.46089012231472914,
      "grad_norm": 0.20497076213359833,
      "learning_rate": 8.64405192908678e-05,
      "loss": 0.6964,
      "step": 2242
    },
    {
      "epoch": 0.46109569328810773,
      "grad_norm": 0.20961910486221313,
      "learning_rate": 8.643662492692578e-05,
      "loss": 0.6976,
      "step": 2243
    },
    {
      "epoch": 0.46130126426148627,
      "grad_norm": 0.2163321077823639,
      "learning_rate": 8.643272852159493e-05,
      "loss": 0.7253,
      "step": 2244
    },
    {
      "epoch": 0.46150683523486485,
      "grad_norm": 0.21539649367332458,
      "learning_rate": 8.642883007506721e-05,
      "loss": 0.6848,
      "step": 2245
    },
    {
      "epoch": 0.4617124062082434,
      "grad_norm": 0.2067098766565323,
      "learning_rate": 8.642492958753465e-05,
      "loss": 0.7156,
      "step": 2246
    },
    {
      "epoch": 0.461917977181622,
      "grad_norm": 0.21964769065380096,
      "learning_rate": 8.642102705918945e-05,
      "loss": 0.6989,
      "step": 2247
    },
    {
      "epoch": 0.4621235481550005,
      "grad_norm": 0.2275928258895874,
      "learning_rate": 8.641712249022384e-05,
      "loss": 0.6847,
      "step": 2248
    },
    {
      "epoch": 0.4623291191283791,
      "grad_norm": 0.2040269672870636,
      "learning_rate": 8.641321588083018e-05,
      "loss": 0.6973,
      "step": 2249
    },
    {
      "epoch": 0.46253469010175763,
      "grad_norm": 0.23092588782310486,
      "learning_rate": 8.640930723120093e-05,
      "loss": 0.7266,
      "step": 2250
    },
    {
      "epoch": 0.4627402610751362,
      "grad_norm": 0.2156527191400528,
      "learning_rate": 8.640539654152868e-05,
      "loss": 0.7062,
      "step": 2251
    },
    {
      "epoch": 0.46294583204851475,
      "grad_norm": 0.2142401933670044,
      "learning_rate": 8.640148381200607e-05,
      "loss": 0.7047,
      "step": 2252
    },
    {
      "epoch": 0.4631514030218933,
      "grad_norm": 0.31457456946372986,
      "learning_rate": 8.639756904282586e-05,
      "loss": 0.6032,
      "step": 2253
    },
    {
      "epoch": 0.46335697399527187,
      "grad_norm": 0.23436057567596436,
      "learning_rate": 8.639365223418091e-05,
      "loss": 0.7436,
      "step": 2254
    },
    {
      "epoch": 0.4635625449686504,
      "grad_norm": 0.14833630621433258,
      "learning_rate": 8.638973338626418e-05,
      "loss": 0.588,
      "step": 2255
    },
    {
      "epoch": 0.463768115942029,
      "grad_norm": 0.24190352857112885,
      "learning_rate": 8.638581249926876e-05,
      "loss": 0.7079,
      "step": 2256
    },
    {
      "epoch": 0.4639736869154075,
      "grad_norm": 0.2287464588880539,
      "learning_rate": 8.638188957338778e-05,
      "loss": 0.6983,
      "step": 2257
    },
    {
      "epoch": 0.4641792578887861,
      "grad_norm": 0.24814251065254211,
      "learning_rate": 8.637796460881454e-05,
      "loss": 0.707,
      "step": 2258
    },
    {
      "epoch": 0.46438482886216464,
      "grad_norm": 0.22504420578479767,
      "learning_rate": 8.637403760574236e-05,
      "loss": 0.7045,
      "step": 2259
    },
    {
      "epoch": 0.46459039983554323,
      "grad_norm": 0.21358801424503326,
      "learning_rate": 8.637010856436475e-05,
      "loss": 0.7027,
      "step": 2260
    },
    {
      "epoch": 0.46479597080892177,
      "grad_norm": 0.21219758689403534,
      "learning_rate": 8.636617748487523e-05,
      "loss": 0.689,
      "step": 2261
    },
    {
      "epoch": 0.46500154178230035,
      "grad_norm": 0.21138092875480652,
      "learning_rate": 8.63622443674675e-05,
      "loss": 0.7208,
      "step": 2262
    },
    {
      "epoch": 0.4652071127556789,
      "grad_norm": 0.27241116762161255,
      "learning_rate": 8.635830921233532e-05,
      "loss": 0.5964,
      "step": 2263
    },
    {
      "epoch": 0.4654126837290575,
      "grad_norm": 0.2141522914171219,
      "learning_rate": 8.635437201967255e-05,
      "loss": 0.7362,
      "step": 2264
    },
    {
      "epoch": 0.465618254702436,
      "grad_norm": 0.2085803896188736,
      "learning_rate": 8.635043278967317e-05,
      "loss": 0.6859,
      "step": 2265
    },
    {
      "epoch": 0.4658238256758146,
      "grad_norm": 0.21698498725891113,
      "learning_rate": 8.634649152253123e-05,
      "loss": 0.7078,
      "step": 2266
    },
    {
      "epoch": 0.46602939664919313,
      "grad_norm": 0.19954286515712738,
      "learning_rate": 8.63425482184409e-05,
      "loss": 0.6877,
      "step": 2267
    },
    {
      "epoch": 0.4662349676225717,
      "grad_norm": 0.18924130499362946,
      "learning_rate": 8.633860287759646e-05,
      "loss": 0.6001,
      "step": 2268
    },
    {
      "epoch": 0.46644053859595025,
      "grad_norm": 0.15498289465904236,
      "learning_rate": 8.633465550019227e-05,
      "loss": 0.5894,
      "step": 2269
    },
    {
      "epoch": 0.46664610956932884,
      "grad_norm": 0.2448817938566208,
      "learning_rate": 8.633070608642282e-05,
      "loss": 0.6883,
      "step": 2270
    },
    {
      "epoch": 0.46685168054270737,
      "grad_norm": 0.24218863248825073,
      "learning_rate": 8.632675463648264e-05,
      "loss": 0.7305,
      "step": 2271
    },
    {
      "epoch": 0.4670572515160859,
      "grad_norm": 0.21386098861694336,
      "learning_rate": 8.632280115056642e-05,
      "loss": 0.703,
      "step": 2272
    },
    {
      "epoch": 0.4672628224894645,
      "grad_norm": 0.20794478058815002,
      "learning_rate": 8.631884562886894e-05,
      "loss": 0.7054,
      "step": 2273
    },
    {
      "epoch": 0.467468393462843,
      "grad_norm": 0.22331750392913818,
      "learning_rate": 8.631488807158505e-05,
      "loss": 0.7116,
      "step": 2274
    },
    {
      "epoch": 0.4676739644362216,
      "grad_norm": 0.22476287186145782,
      "learning_rate": 8.631092847890973e-05,
      "loss": 0.7001,
      "step": 2275
    },
    {
      "epoch": 0.46787953540960014,
      "grad_norm": 0.23165211081504822,
      "learning_rate": 8.630696685103806e-05,
      "loss": 0.5924,
      "step": 2276
    },
    {
      "epoch": 0.46808510638297873,
      "grad_norm": 0.17003892362117767,
      "learning_rate": 8.63030031881652e-05,
      "loss": 0.5951,
      "step": 2277
    },
    {
      "epoch": 0.46829067735635727,
      "grad_norm": 0.14959658682346344,
      "learning_rate": 8.629903749048642e-05,
      "loss": 0.5875,
      "step": 2278
    },
    {
      "epoch": 0.46849624832973585,
      "grad_norm": 0.28558462858200073,
      "learning_rate": 8.629506975819709e-05,
      "loss": 0.7339,
      "step": 2279
    },
    {
      "epoch": 0.4687018193031144,
      "grad_norm": 0.2474449872970581,
      "learning_rate": 8.629109999149268e-05,
      "loss": 0.7125,
      "step": 2280
    },
    {
      "epoch": 0.468907390276493,
      "grad_norm": 0.22551508247852325,
      "learning_rate": 8.628712819056878e-05,
      "loss": 0.7266,
      "step": 2281
    },
    {
      "epoch": 0.4691129612498715,
      "grad_norm": 0.23484089970588684,
      "learning_rate": 8.628315435562105e-05,
      "loss": 0.686,
      "step": 2282
    },
    {
      "epoch": 0.4693185322232501,
      "grad_norm": 0.2324771285057068,
      "learning_rate": 8.627917848684525e-05,
      "loss": 0.7387,
      "step": 2283
    },
    {
      "epoch": 0.46952410319662863,
      "grad_norm": 0.28548941016197205,
      "learning_rate": 8.627520058443727e-05,
      "loss": 0.6007,
      "step": 2284
    },
    {
      "epoch": 0.4697296741700072,
      "grad_norm": 0.1830257922410965,
      "learning_rate": 8.627122064859307e-05,
      "loss": 0.5817,
      "step": 2285
    },
    {
      "epoch": 0.46993524514338575,
      "grad_norm": 0.2828942835330963,
      "learning_rate": 8.626723867950875e-05,
      "loss": 0.6864,
      "step": 2286
    },
    {
      "epoch": 0.47014081611676434,
      "grad_norm": 0.20021386444568634,
      "learning_rate": 8.626325467738045e-05,
      "loss": 0.5965,
      "step": 2287
    },
    {
      "epoch": 0.47034638709014287,
      "grad_norm": 0.2412208914756775,
      "learning_rate": 8.625926864240445e-05,
      "loss": 0.7398,
      "step": 2288
    },
    {
      "epoch": 0.47055195806352146,
      "grad_norm": 0.2284758985042572,
      "learning_rate": 8.625528057477714e-05,
      "loss": 0.7037,
      "step": 2289
    },
    {
      "epoch": 0.4707575290369,
      "grad_norm": 0.22256653010845184,
      "learning_rate": 8.625129047469498e-05,
      "loss": 0.6852,
      "step": 2290
    },
    {
      "epoch": 0.4709631000102785,
      "grad_norm": 0.21506358683109283,
      "learning_rate": 8.624729834235455e-05,
      "loss": 0.6848,
      "step": 2291
    },
    {
      "epoch": 0.4711686709836571,
      "grad_norm": 0.2219688594341278,
      "learning_rate": 8.624330417795251e-05,
      "loss": 0.7025,
      "step": 2292
    },
    {
      "epoch": 0.47137424195703564,
      "grad_norm": 0.22017613053321838,
      "learning_rate": 8.623930798168564e-05,
      "loss": 0.6911,
      "step": 2293
    },
    {
      "epoch": 0.47157981293041423,
      "grad_norm": 0.2322702705860138,
      "learning_rate": 8.623530975375084e-05,
      "loss": 0.6266,
      "step": 2294
    },
    {
      "epoch": 0.47178538390379277,
      "grad_norm": 0.25697195529937744,
      "learning_rate": 8.623130949434505e-05,
      "loss": 0.7211,
      "step": 2295
    },
    {
      "epoch": 0.47199095487717135,
      "grad_norm": 0.16440944373607635,
      "learning_rate": 8.622730720366535e-05,
      "loss": 0.6019,
      "step": 2296
    },
    {
      "epoch": 0.4721965258505499,
      "grad_norm": 0.2459285408258438,
      "learning_rate": 8.622330288190893e-05,
      "loss": 0.6854,
      "step": 2297
    },
    {
      "epoch": 0.4724020968239285,
      "grad_norm": 0.25851428508758545,
      "learning_rate": 8.621929652927306e-05,
      "loss": 0.6919,
      "step": 2298
    },
    {
      "epoch": 0.472607667797307,
      "grad_norm": 0.17177143692970276,
      "learning_rate": 8.621528814595508e-05,
      "loss": 0.5922,
      "step": 2299
    },
    {
      "epoch": 0.4728132387706856,
      "grad_norm": 0.22151097655296326,
      "learning_rate": 8.621127773215252e-05,
      "loss": 0.6958,
      "step": 2300
    },
    {
      "epoch": 0.47301880974406413,
      "grad_norm": 0.2228916585445404,
      "learning_rate": 8.620726528806292e-05,
      "loss": 0.7062,
      "step": 2301
    },
    {
      "epoch": 0.4732243807174427,
      "grad_norm": 0.17388984560966492,
      "learning_rate": 8.620325081388396e-05,
      "loss": 0.5868,
      "step": 2302
    },
    {
      "epoch": 0.47342995169082125,
      "grad_norm": 0.22164839506149292,
      "learning_rate": 8.61992343098134e-05,
      "loss": 0.6753,
      "step": 2303
    },
    {
      "epoch": 0.47363552266419984,
      "grad_norm": 0.2175762802362442,
      "learning_rate": 8.619521577604915e-05,
      "loss": 0.7057,
      "step": 2304
    },
    {
      "epoch": 0.47384109363757837,
      "grad_norm": 0.21533454954624176,
      "learning_rate": 8.619119521278916e-05,
      "loss": 0.6798,
      "step": 2305
    },
    {
      "epoch": 0.47404666461095696,
      "grad_norm": 0.23147819936275482,
      "learning_rate": 8.618717262023151e-05,
      "loss": 0.7162,
      "step": 2306
    },
    {
      "epoch": 0.4742522355843355,
      "grad_norm": 0.21729323267936707,
      "learning_rate": 8.618314799857437e-05,
      "loss": 0.7169,
      "step": 2307
    },
    {
      "epoch": 0.474457806557714,
      "grad_norm": 0.19784866273403168,
      "learning_rate": 8.617912134801603e-05,
      "loss": 0.6863,
      "step": 2308
    },
    {
      "epoch": 0.4746633775310926,
      "grad_norm": 0.20950141549110413,
      "learning_rate": 8.617509266875484e-05,
      "loss": 0.6784,
      "step": 2309
    },
    {
      "epoch": 0.47486894850447114,
      "grad_norm": 0.2207701951265335,
      "learning_rate": 8.617106196098928e-05,
      "loss": 0.7182,
      "step": 2310
    },
    {
      "epoch": 0.47507451947784973,
      "grad_norm": 0.21060660481452942,
      "learning_rate": 8.616702922491794e-05,
      "loss": 0.7051,
      "step": 2311
    },
    {
      "epoch": 0.47528009045122827,
      "grad_norm": 0.21560098230838776,
      "learning_rate": 8.616299446073948e-05,
      "loss": 0.7186,
      "step": 2312
    },
    {
      "epoch": 0.47548566142460685,
      "grad_norm": 0.20710930228233337,
      "learning_rate": 8.615895766865268e-05,
      "loss": 0.6939,
      "step": 2313
    },
    {
      "epoch": 0.4756912323979854,
      "grad_norm": 0.20942838490009308,
      "learning_rate": 8.615491884885642e-05,
      "loss": 0.6854,
      "step": 2314
    },
    {
      "epoch": 0.475896803371364,
      "grad_norm": 0.21396920084953308,
      "learning_rate": 8.615087800154966e-05,
      "loss": 0.6919,
      "step": 2315
    },
    {
      "epoch": 0.4761023743447425,
      "grad_norm": 0.20860084891319275,
      "learning_rate": 8.614683512693147e-05,
      "loss": 0.715,
      "step": 2316
    },
    {
      "epoch": 0.4763079453181211,
      "grad_norm": 0.19696597754955292,
      "learning_rate": 8.614279022520105e-05,
      "loss": 0.7004,
      "step": 2317
    },
    {
      "epoch": 0.47651351629149963,
      "grad_norm": 0.214441180229187,
      "learning_rate": 8.613874329655765e-05,
      "loss": 0.695,
      "step": 2318
    },
    {
      "epoch": 0.4767190872648782,
      "grad_norm": 0.20082063972949982,
      "learning_rate": 8.613469434120065e-05,
      "loss": 0.69,
      "step": 2319
    },
    {
      "epoch": 0.47692465823825675,
      "grad_norm": 0.20159681141376495,
      "learning_rate": 8.613064335932952e-05,
      "loss": 0.6772,
      "step": 2320
    },
    {
      "epoch": 0.47713022921163534,
      "grad_norm": 0.20627199113368988,
      "learning_rate": 8.612659035114383e-05,
      "loss": 0.6884,
      "step": 2321
    },
    {
      "epoch": 0.47733580018501387,
      "grad_norm": 0.19715279340744019,
      "learning_rate": 8.612253531684328e-05,
      "loss": 0.5856,
      "step": 2322
    },
    {
      "epoch": 0.47754137115839246,
      "grad_norm": 0.21673934161663055,
      "learning_rate": 8.61184782566276e-05,
      "loss": 0.7141,
      "step": 2323
    },
    {
      "epoch": 0.477746942131771,
      "grad_norm": 0.21236567199230194,
      "learning_rate": 8.611441917069668e-05,
      "loss": 0.7081,
      "step": 2324
    },
    {
      "epoch": 0.4779525131051496,
      "grad_norm": 0.22194881737232208,
      "learning_rate": 8.61103580592505e-05,
      "loss": 0.725,
      "step": 2325
    },
    {
      "epoch": 0.4781580840785281,
      "grad_norm": 0.20836539566516876,
      "learning_rate": 8.610629492248915e-05,
      "loss": 0.6872,
      "step": 2326
    },
    {
      "epoch": 0.47836365505190664,
      "grad_norm": 0.20728257298469543,
      "learning_rate": 8.610222976061275e-05,
      "loss": 0.6898,
      "step": 2327
    },
    {
      "epoch": 0.47856922602528523,
      "grad_norm": 0.2103557288646698,
      "learning_rate": 8.609816257382162e-05,
      "loss": 0.6939,
      "step": 2328
    },
    {
      "epoch": 0.47877479699866377,
      "grad_norm": 0.18069760501384735,
      "learning_rate": 8.609409336231611e-05,
      "loss": 0.5892,
      "step": 2329
    },
    {
      "epoch": 0.47898036797204235,
      "grad_norm": 0.21599088609218597,
      "learning_rate": 8.609002212629668e-05,
      "loss": 0.7186,
      "step": 2330
    },
    {
      "epoch": 0.4791859389454209,
      "grad_norm": 0.22007983922958374,
      "learning_rate": 8.608594886596392e-05,
      "loss": 0.6984,
      "step": 2331
    },
    {
      "epoch": 0.4793915099187995,
      "grad_norm": 0.13403122127056122,
      "learning_rate": 8.608187358151852e-05,
      "loss": 0.5937,
      "step": 2332
    },
    {
      "epoch": 0.479597080892178,
      "grad_norm": 0.21932478249073029,
      "learning_rate": 8.607779627316119e-05,
      "loss": 0.6969,
      "step": 2333
    },
    {
      "epoch": 0.4798026518655566,
      "grad_norm": 0.22216017544269562,
      "learning_rate": 8.607371694109285e-05,
      "loss": 0.7011,
      "step": 2334
    },
    {
      "epoch": 0.48000822283893513,
      "grad_norm": 0.20484335720539093,
      "learning_rate": 8.606963558551445e-05,
      "loss": 0.6637,
      "step": 2335
    },
    {
      "epoch": 0.4802137938123137,
      "grad_norm": 0.22132568061351776,
      "learning_rate": 8.606555220662707e-05,
      "loss": 0.7098,
      "step": 2336
    },
    {
      "epoch": 0.48041936478569225,
      "grad_norm": 0.15403473377227783,
      "learning_rate": 8.606146680463187e-05,
      "loss": 0.5913,
      "step": 2337
    },
    {
      "epoch": 0.48062493575907084,
      "grad_norm": 0.21559444069862366,
      "learning_rate": 8.605737937973011e-05,
      "loss": 0.7038,
      "step": 2338
    },
    {
      "epoch": 0.48083050673244937,
      "grad_norm": 0.13026109337806702,
      "learning_rate": 8.605328993212317e-05,
      "loss": 0.5778,
      "step": 2339
    },
    {
      "epoch": 0.48103607770582796,
      "grad_norm": 0.2200099676847458,
      "learning_rate": 8.604919846201255e-05,
      "loss": 0.7091,
      "step": 2340
    },
    {
      "epoch": 0.4812416486792065,
      "grad_norm": 0.21221928298473358,
      "learning_rate": 8.604510496959975e-05,
      "loss": 0.7062,
      "step": 2341
    },
    {
      "epoch": 0.4814472196525851,
      "grad_norm": 0.20801213383674622,
      "learning_rate": 8.604100945508648e-05,
      "loss": 0.6884,
      "step": 2342
    },
    {
      "epoch": 0.4816527906259636,
      "grad_norm": 0.23321124911308289,
      "learning_rate": 8.603691191867451e-05,
      "loss": 0.6849,
      "step": 2343
    },
    {
      "epoch": 0.4818583615993422,
      "grad_norm": 0.1625455915927887,
      "learning_rate": 8.603281236056569e-05,
      "loss": 0.5854,
      "step": 2344
    },
    {
      "epoch": 0.48206393257272073,
      "grad_norm": 0.14913566410541534,
      "learning_rate": 8.602871078096198e-05,
      "loss": 0.5857,
      "step": 2345
    },
    {
      "epoch": 0.48226950354609927,
      "grad_norm": 0.23094283044338226,
      "learning_rate": 8.602460718006548e-05,
      "loss": 0.6814,
      "step": 2346
    },
    {
      "epoch": 0.48247507451947785,
      "grad_norm": 0.21578393876552582,
      "learning_rate": 8.602050155807832e-05,
      "loss": 0.6983,
      "step": 2347
    },
    {
      "epoch": 0.4826806454928564,
      "grad_norm": 0.21311207115650177,
      "learning_rate": 8.601639391520278e-05,
      "loss": 0.714,
      "step": 2348
    },
    {
      "epoch": 0.482886216466235,
      "grad_norm": 0.20807845890522003,
      "learning_rate": 8.601228425164123e-05,
      "loss": 0.6955,
      "step": 2349
    },
    {
      "epoch": 0.4830917874396135,
      "grad_norm": 0.2071390300989151,
      "learning_rate": 8.600817256759611e-05,
      "loss": 0.6911,
      "step": 2350
    },
    {
      "epoch": 0.4832973584129921,
      "grad_norm": 0.20365330576896667,
      "learning_rate": 8.600405886327001e-05,
      "loss": 0.5981,
      "step": 2351
    },
    {
      "epoch": 0.48350292938637063,
      "grad_norm": 0.21439498662948608,
      "learning_rate": 8.599994313886558e-05,
      "loss": 0.7061,
      "step": 2352
    },
    {
      "epoch": 0.4837085003597492,
      "grad_norm": 0.22116196155548096,
      "learning_rate": 8.599582539458558e-05,
      "loss": 0.719,
      "step": 2353
    },
    {
      "epoch": 0.48391407133312775,
      "grad_norm": 0.14612843096256256,
      "learning_rate": 8.599170563063289e-05,
      "loss": 0.5788,
      "step": 2354
    },
    {
      "epoch": 0.48411964230650634,
      "grad_norm": 0.20347650349140167,
      "learning_rate": 8.598758384721045e-05,
      "loss": 0.6891,
      "step": 2355
    },
    {
      "epoch": 0.48432521327988487,
      "grad_norm": 0.13734294474124908,
      "learning_rate": 8.598346004452132e-05,
      "loss": 0.5705,
      "step": 2356
    },
    {
      "epoch": 0.48453078425326346,
      "grad_norm": 0.21844719350337982,
      "learning_rate": 8.597933422276868e-05,
      "loss": 0.7261,
      "step": 2357
    },
    {
      "epoch": 0.484736355226642,
      "grad_norm": 0.20626910030841827,
      "learning_rate": 8.597520638215578e-05,
      "loss": 0.6712,
      "step": 2358
    },
    {
      "epoch": 0.4849419262000206,
      "grad_norm": 0.2096855491399765,
      "learning_rate": 8.597107652288598e-05,
      "loss": 0.6777,
      "step": 2359
    },
    {
      "epoch": 0.4851474971733991,
      "grad_norm": 0.20726048946380615,
      "learning_rate": 8.596694464516273e-05,
      "loss": 0.7194,
      "step": 2360
    },
    {
      "epoch": 0.4853530681467777,
      "grad_norm": 0.2092740535736084,
      "learning_rate": 8.59628107491896e-05,
      "loss": 0.6859,
      "step": 2361
    },
    {
      "epoch": 0.48555863912015623,
      "grad_norm": 0.20741955935955048,
      "learning_rate": 8.595867483517025e-05,
      "loss": 0.7095,
      "step": 2362
    },
    {
      "epoch": 0.4857642100935348,
      "grad_norm": 0.1959150731563568,
      "learning_rate": 8.595453690330843e-05,
      "loss": 0.7032,
      "step": 2363
    },
    {
      "epoch": 0.48596978106691335,
      "grad_norm": 0.20496924221515656,
      "learning_rate": 8.5950396953808e-05,
      "loss": 0.714,
      "step": 2364
    },
    {
      "epoch": 0.4861753520402919,
      "grad_norm": 0.1742028295993805,
      "learning_rate": 8.59462549868729e-05,
      "loss": 0.5882,
      "step": 2365
    },
    {
      "epoch": 0.4863809230136705,
      "grad_norm": 0.14946137368679047,
      "learning_rate": 8.59421110027072e-05,
      "loss": 0.5834,
      "step": 2366
    },
    {
      "epoch": 0.486586493987049,
      "grad_norm": 0.22946619987487793,
      "learning_rate": 8.593796500151507e-05,
      "loss": 0.6916,
      "step": 2367
    },
    {
      "epoch": 0.4867920649604276,
      "grad_norm": 0.2186809778213501,
      "learning_rate": 8.593381698350074e-05,
      "loss": 0.695,
      "step": 2368
    },
    {
      "epoch": 0.48699763593380613,
      "grad_norm": 0.21201607584953308,
      "learning_rate": 8.592966694886857e-05,
      "loss": 0.6895,
      "step": 2369
    },
    {
      "epoch": 0.4872032069071847,
      "grad_norm": 0.20772308111190796,
      "learning_rate": 8.592551489782302e-05,
      "loss": 0.6752,
      "step": 2370
    },
    {
      "epoch": 0.48740877788056325,
      "grad_norm": 0.2207845002412796,
      "learning_rate": 8.592136083056862e-05,
      "loss": 0.7037,
      "step": 2371
    },
    {
      "epoch": 0.48761434885394184,
      "grad_norm": 0.20530985295772552,
      "learning_rate": 8.591720474731006e-05,
      "loss": 0.6922,
      "step": 2372
    },
    {
      "epoch": 0.48781991982732037,
      "grad_norm": 0.2157611846923828,
      "learning_rate": 8.591304664825205e-05,
      "loss": 0.7053,
      "step": 2373
    },
    {
      "epoch": 0.48802549080069896,
      "grad_norm": 0.2080930769443512,
      "learning_rate": 8.590888653359947e-05,
      "loss": 0.6036,
      "step": 2374
    },
    {
      "epoch": 0.4882310617740775,
      "grad_norm": 0.22034066915512085,
      "learning_rate": 8.590472440355725e-05,
      "loss": 0.6732,
      "step": 2375
    },
    {
      "epoch": 0.4884366327474561,
      "grad_norm": 0.21666774153709412,
      "learning_rate": 8.590056025833045e-05,
      "loss": 0.6879,
      "step": 2376
    },
    {
      "epoch": 0.4886422037208346,
      "grad_norm": 0.21656173467636108,
      "learning_rate": 8.589639409812422e-05,
      "loss": 0.7001,
      "step": 2377
    },
    {
      "epoch": 0.4888477746942132,
      "grad_norm": 0.2207968384027481,
      "learning_rate": 8.589222592314381e-05,
      "loss": 0.6988,
      "step": 2378
    },
    {
      "epoch": 0.48905334566759173,
      "grad_norm": 0.21282252669334412,
      "learning_rate": 8.588805573359454e-05,
      "loss": 0.6686,
      "step": 2379
    },
    {
      "epoch": 0.4892589166409703,
      "grad_norm": 0.21024645864963531,
      "learning_rate": 8.588388352968188e-05,
      "loss": 0.6777,
      "step": 2380
    },
    {
      "epoch": 0.48946448761434885,
      "grad_norm": 0.21151992678642273,
      "learning_rate": 8.587970931161137e-05,
      "loss": 0.6922,
      "step": 2381
    },
    {
      "epoch": 0.4896700585877274,
      "grad_norm": 0.2125832885503769,
      "learning_rate": 8.587553307958865e-05,
      "loss": 0.6968,
      "step": 2382
    },
    {
      "epoch": 0.489875629561106,
      "grad_norm": 0.22030989825725555,
      "learning_rate": 8.587135483381948e-05,
      "loss": 0.6913,
      "step": 2383
    },
    {
      "epoch": 0.4900812005344845,
      "grad_norm": 0.2217807024717331,
      "learning_rate": 8.586717457450967e-05,
      "loss": 0.7198,
      "step": 2384
    },
    {
      "epoch": 0.4902867715078631,
      "grad_norm": 0.20852632820606232,
      "learning_rate": 8.586299230186519e-05,
      "loss": 0.6752,
      "step": 2385
    },
    {
      "epoch": 0.4904923424812416,
      "grad_norm": 0.20621474087238312,
      "learning_rate": 8.585880801609208e-05,
      "loss": 0.6783,
      "step": 2386
    },
    {
      "epoch": 0.4906979134546202,
      "grad_norm": 0.21134278178215027,
      "learning_rate": 8.585462171739647e-05,
      "loss": 0.5887,
      "step": 2387
    },
    {
      "epoch": 0.49090348442799875,
      "grad_norm": 0.2228272408246994,
      "learning_rate": 8.58504334059846e-05,
      "loss": 0.6875,
      "step": 2388
    },
    {
      "epoch": 0.49110905540137734,
      "grad_norm": 0.2240232229232788,
      "learning_rate": 8.584624308206281e-05,
      "loss": 0.6768,
      "step": 2389
    },
    {
      "epoch": 0.49131462637475587,
      "grad_norm": 0.21626600623130798,
      "learning_rate": 8.584205074583754e-05,
      "loss": 0.7107,
      "step": 2390
    },
    {
      "epoch": 0.49152019734813446,
      "grad_norm": 0.21161963045597076,
      "learning_rate": 8.583785639751532e-05,
      "loss": 0.6794,
      "step": 2391
    },
    {
      "epoch": 0.491725768321513,
      "grad_norm": 0.21978048980236053,
      "learning_rate": 8.583366003730278e-05,
      "loss": 0.6772,
      "step": 2392
    },
    {
      "epoch": 0.4919313392948916,
      "grad_norm": 0.20937666296958923,
      "learning_rate": 8.582946166540668e-05,
      "loss": 0.6825,
      "step": 2393
    },
    {
      "epoch": 0.4921369102682701,
      "grad_norm": 0.21978282928466797,
      "learning_rate": 8.582526128203385e-05,
      "loss": 0.7231,
      "step": 2394
    },
    {
      "epoch": 0.4923424812416487,
      "grad_norm": 0.21103829145431519,
      "learning_rate": 8.582105888739121e-05,
      "loss": 0.6941,
      "step": 2395
    },
    {
      "epoch": 0.49254805221502723,
      "grad_norm": 0.20812061429023743,
      "learning_rate": 8.581685448168579e-05,
      "loss": 0.6734,
      "step": 2396
    },
    {
      "epoch": 0.4927536231884058,
      "grad_norm": 0.2180771827697754,
      "learning_rate": 8.581264806512471e-05,
      "loss": 0.6817,
      "step": 2397
    },
    {
      "epoch": 0.49295919416178435,
      "grad_norm": 0.20335964858531952,
      "learning_rate": 8.580843963791524e-05,
      "loss": 0.7109,
      "step": 2398
    },
    {
      "epoch": 0.49316476513516294,
      "grad_norm": 0.22317105531692505,
      "learning_rate": 8.580422920026468e-05,
      "loss": 0.6899,
      "step": 2399
    },
    {
      "epoch": 0.4933703361085415,
      "grad_norm": 0.2043156623840332,
      "learning_rate": 8.580001675238047e-05,
      "loss": 0.7072,
      "step": 2400
    },
    {
      "epoch": 0.49357590708192,
      "grad_norm": 0.22758691012859344,
      "learning_rate": 8.579580229447013e-05,
      "loss": 0.5851,
      "step": 2401
    },
    {
      "epoch": 0.4937814780552986,
      "grad_norm": 0.21011817455291748,
      "learning_rate": 8.579158582674129e-05,
      "loss": 0.6755,
      "step": 2402
    },
    {
      "epoch": 0.4939870490286771,
      "grad_norm": 0.14406029880046844,
      "learning_rate": 8.578736734940168e-05,
      "loss": 0.5801,
      "step": 2403
    },
    {
      "epoch": 0.4941926200020557,
      "grad_norm": 0.21777774393558502,
      "learning_rate": 8.578314686265911e-05,
      "loss": 0.6707,
      "step": 2404
    },
    {
      "epoch": 0.49439819097543425,
      "grad_norm": 0.21820279955863953,
      "learning_rate": 8.577892436672152e-05,
      "loss": 0.6942,
      "step": 2405
    },
    {
      "epoch": 0.49460376194881284,
      "grad_norm": 0.2069522887468338,
      "learning_rate": 8.577469986179693e-05,
      "loss": 0.6923,
      "step": 2406
    },
    {
      "epoch": 0.49480933292219137,
      "grad_norm": 0.202153280377388,
      "learning_rate": 8.577047334809346e-05,
      "loss": 0.7045,
      "step": 2407
    },
    {
      "epoch": 0.49501490389556996,
      "grad_norm": 0.22939299046993256,
      "learning_rate": 8.576624482581932e-05,
      "loss": 0.6958,
      "step": 2408
    },
    {
      "epoch": 0.4952204748689485,
      "grad_norm": 0.19599145650863647,
      "learning_rate": 8.576201429518283e-05,
      "loss": 0.6101,
      "step": 2409
    },
    {
      "epoch": 0.4954260458423271,
      "grad_norm": 0.2155923992395401,
      "learning_rate": 8.575778175639245e-05,
      "loss": 0.7045,
      "step": 2410
    },
    {
      "epoch": 0.4956316168157056,
      "grad_norm": 0.13790921866893768,
      "learning_rate": 8.575354720965663e-05,
      "loss": 0.5729,
      "step": 2411
    },
    {
      "epoch": 0.4958371877890842,
      "grad_norm": 0.23278020322322845,
      "learning_rate": 8.574931065518403e-05,
      "loss": 0.7441,
      "step": 2412
    },
    {
      "epoch": 0.49604275876246273,
      "grad_norm": 0.15767961740493774,
      "learning_rate": 8.574507209318337e-05,
      "loss": 0.617,
      "step": 2413
    },
    {
      "epoch": 0.4962483297358413,
      "grad_norm": 0.21228386461734772,
      "learning_rate": 8.574083152386344e-05,
      "loss": 0.6849,
      "step": 2414
    },
    {
      "epoch": 0.49645390070921985,
      "grad_norm": 0.20901069045066833,
      "learning_rate": 8.573658894743316e-05,
      "loss": 0.6881,
      "step": 2415
    },
    {
      "epoch": 0.49665947168259844,
      "grad_norm": 0.20342102646827698,
      "learning_rate": 8.573234436410155e-05,
      "loss": 0.7173,
      "step": 2416
    },
    {
      "epoch": 0.496865042655977,
      "grad_norm": 0.22326229512691498,
      "learning_rate": 8.572809777407771e-05,
      "loss": 0.7265,
      "step": 2417
    },
    {
      "epoch": 0.49707061362935556,
      "grad_norm": 0.2064063847064972,
      "learning_rate": 8.572384917757086e-05,
      "loss": 0.6939,
      "step": 2418
    },
    {
      "epoch": 0.4972761846027341,
      "grad_norm": 0.2083250731229782,
      "learning_rate": 8.57195985747903e-05,
      "loss": 0.7009,
      "step": 2419
    },
    {
      "epoch": 0.4974817555761126,
      "grad_norm": 0.20397667586803436,
      "learning_rate": 8.571534596594544e-05,
      "loss": 0.6835,
      "step": 2420
    },
    {
      "epoch": 0.4976873265494912,
      "grad_norm": 0.2096882462501526,
      "learning_rate": 8.571109135124579e-05,
      "loss": 0.714,
      "step": 2421
    },
    {
      "epoch": 0.49789289752286975,
      "grad_norm": 0.2030659317970276,
      "learning_rate": 8.570683473090095e-05,
      "loss": 0.6971,
      "step": 2422
    },
    {
      "epoch": 0.49809846849624834,
      "grad_norm": 0.202758327126503,
      "learning_rate": 8.570257610512064e-05,
      "loss": 0.6856,
      "step": 2423
    },
    {
      "epoch": 0.49830403946962687,
      "grad_norm": 0.20229479670524597,
      "learning_rate": 8.569831547411464e-05,
      "loss": 0.7063,
      "step": 2424
    },
    {
      "epoch": 0.49850961044300546,
      "grad_norm": 0.2144801914691925,
      "learning_rate": 8.569405283809285e-05,
      "loss": 0.7056,
      "step": 2425
    },
    {
      "epoch": 0.498715181416384,
      "grad_norm": 0.19797521829605103,
      "learning_rate": 8.56897881972653e-05,
      "loss": 0.6035,
      "step": 2426
    },
    {
      "epoch": 0.4989207523897626,
      "grad_norm": 0.21914798021316528,
      "learning_rate": 8.568552155184204e-05,
      "loss": 0.6789,
      "step": 2427
    },
    {
      "epoch": 0.4991263233631411,
      "grad_norm": 0.2153196483850479,
      "learning_rate": 8.568125290203332e-05,
      "loss": 0.7026,
      "step": 2428
    },
    {
      "epoch": 0.4993318943365197,
      "grad_norm": 0.1549125760793686,
      "learning_rate": 8.567698224804941e-05,
      "loss": 0.5727,
      "step": 2429
    },
    {
      "epoch": 0.49953746530989823,
      "grad_norm": 0.2103041261434555,
      "learning_rate": 8.567270959010071e-05,
      "loss": 0.7001,
      "step": 2430
    },
    {
      "epoch": 0.4997430362832768,
      "grad_norm": 0.20346547663211823,
      "learning_rate": 8.566843492839769e-05,
      "loss": 0.6998,
      "step": 2431
    },
    {
      "epoch": 0.49994860725665535,
      "grad_norm": 0.16657423973083496,
      "learning_rate": 8.5664158263151e-05,
      "loss": 0.5893,
      "step": 2432
    },
    {
      "epoch": 0.5001541782300339,
      "grad_norm": 0.2198108732700348,
      "learning_rate": 8.565987959457128e-05,
      "loss": 0.692,
      "step": 2433
    },
    {
      "epoch": 0.5003597492034125,
      "grad_norm": 0.21006634831428528,
      "learning_rate": 8.565559892286934e-05,
      "loss": 0.7012,
      "step": 2434
    },
    {
      "epoch": 0.500565320176791,
      "grad_norm": 0.20093873143196106,
      "learning_rate": 8.565131624825605e-05,
      "loss": 0.6853,
      "step": 2435
    },
    {
      "epoch": 0.5007708911501696,
      "grad_norm": 0.21130932867527008,
      "learning_rate": 8.564703157094242e-05,
      "loss": 0.7092,
      "step": 2436
    },
    {
      "epoch": 0.5009764621235482,
      "grad_norm": 0.21420711278915405,
      "learning_rate": 8.564274489113954e-05,
      "loss": 0.7132,
      "step": 2437
    },
    {
      "epoch": 0.5011820330969267,
      "grad_norm": 0.2129506766796112,
      "learning_rate": 8.563845620905856e-05,
      "loss": 0.6958,
      "step": 2438
    },
    {
      "epoch": 0.5013876040703052,
      "grad_norm": 0.20229041576385498,
      "learning_rate": 8.563416552491081e-05,
      "loss": 0.6567,
      "step": 2439
    },
    {
      "epoch": 0.5015931750436838,
      "grad_norm": 0.21202024817466736,
      "learning_rate": 8.562987283890764e-05,
      "loss": 0.7095,
      "step": 2440
    },
    {
      "epoch": 0.5017987460170624,
      "grad_norm": 0.20876267552375793,
      "learning_rate": 8.562557815126053e-05,
      "loss": 0.6786,
      "step": 2441
    },
    {
      "epoch": 0.5020043169904409,
      "grad_norm": 0.20050349831581116,
      "learning_rate": 8.562128146218108e-05,
      "loss": 0.6929,
      "step": 2442
    },
    {
      "epoch": 0.5022098879638195,
      "grad_norm": 0.2047853022813797,
      "learning_rate": 8.561698277188095e-05,
      "loss": 0.6934,
      "step": 2443
    },
    {
      "epoch": 0.5024154589371981,
      "grad_norm": 0.18259146809577942,
      "learning_rate": 8.561268208057192e-05,
      "loss": 0.6199,
      "step": 2444
    },
    {
      "epoch": 0.5026210299105767,
      "grad_norm": 0.1506025195121765,
      "learning_rate": 8.560837938846587e-05,
      "loss": 0.6148,
      "step": 2445
    },
    {
      "epoch": 0.5028266008839551,
      "grad_norm": 0.22317710518836975,
      "learning_rate": 8.560407469577477e-05,
      "loss": 0.7029,
      "step": 2446
    },
    {
      "epoch": 0.5030321718573337,
      "grad_norm": 0.21875528991222382,
      "learning_rate": 8.55997680027107e-05,
      "loss": 0.7086,
      "step": 2447
    },
    {
      "epoch": 0.5032377428307123,
      "grad_norm": 0.2068042755126953,
      "learning_rate": 8.559545930948581e-05,
      "loss": 0.6979,
      "step": 2448
    },
    {
      "epoch": 0.5034433138040909,
      "grad_norm": 0.20604568719863892,
      "learning_rate": 8.559114861631239e-05,
      "loss": 0.6828,
      "step": 2449
    },
    {
      "epoch": 0.5036488847774694,
      "grad_norm": 0.20887784659862518,
      "learning_rate": 8.55868359234028e-05,
      "loss": 0.7186,
      "step": 2450
    },
    {
      "epoch": 0.503854455750848,
      "grad_norm": 0.23300114274024963,
      "learning_rate": 8.55825212309695e-05,
      "loss": 0.6772,
      "step": 2451
    },
    {
      "epoch": 0.5040600267242266,
      "grad_norm": 0.2133777141571045,
      "learning_rate": 8.557820453922507e-05,
      "loss": 0.5952,
      "step": 2452
    },
    {
      "epoch": 0.5042655976976052,
      "grad_norm": 0.23336206376552582,
      "learning_rate": 8.557388584838216e-05,
      "loss": 0.6794,
      "step": 2453
    },
    {
      "epoch": 0.5044711686709836,
      "grad_norm": 0.22460931539535522,
      "learning_rate": 8.556956515865353e-05,
      "loss": 0.6914,
      "step": 2454
    },
    {
      "epoch": 0.5046767396443622,
      "grad_norm": 0.21478697657585144,
      "learning_rate": 8.556524247025206e-05,
      "loss": 0.7215,
      "step": 2455
    },
    {
      "epoch": 0.5048823106177408,
      "grad_norm": 0.22004112601280212,
      "learning_rate": 8.556091778339068e-05,
      "loss": 0.6831,
      "step": 2456
    },
    {
      "epoch": 0.5050878815911193,
      "grad_norm": 0.21334481239318848,
      "learning_rate": 8.555659109828247e-05,
      "loss": 0.6868,
      "step": 2457
    },
    {
      "epoch": 0.5052934525644979,
      "grad_norm": 0.20527870953083038,
      "learning_rate": 8.555226241514059e-05,
      "loss": 0.7008,
      "step": 2458
    },
    {
      "epoch": 0.5054990235378765,
      "grad_norm": 0.2052440643310547,
      "learning_rate": 8.554793173417825e-05,
      "loss": 0.6851,
      "step": 2459
    },
    {
      "epoch": 0.505704594511255,
      "grad_norm": 0.20601294934749603,
      "learning_rate": 8.554359905560886e-05,
      "loss": 0.7074,
      "step": 2460
    },
    {
      "epoch": 0.5059101654846335,
      "grad_norm": 0.20732106268405914,
      "learning_rate": 8.553926437964584e-05,
      "loss": 0.7022,
      "step": 2461
    },
    {
      "epoch": 0.5061157364580121,
      "grad_norm": 0.20242151618003845,
      "learning_rate": 8.553492770650275e-05,
      "loss": 0.7151,
      "step": 2462
    },
    {
      "epoch": 0.5063213074313907,
      "grad_norm": 0.2136530876159668,
      "learning_rate": 8.553058903639322e-05,
      "loss": 0.6944,
      "step": 2463
    },
    {
      "epoch": 0.5065268784047693,
      "grad_norm": 0.20471519231796265,
      "learning_rate": 8.552624836953102e-05,
      "loss": 0.7044,
      "step": 2464
    },
    {
      "epoch": 0.5067324493781478,
      "grad_norm": 0.2073119431734085,
      "learning_rate": 8.552190570612998e-05,
      "loss": 0.7084,
      "step": 2465
    },
    {
      "epoch": 0.5069380203515264,
      "grad_norm": 0.20517416298389435,
      "learning_rate": 8.551756104640403e-05,
      "loss": 0.7044,
      "step": 2466
    },
    {
      "epoch": 0.5071435913249049,
      "grad_norm": 0.20278342068195343,
      "learning_rate": 8.551321439056722e-05,
      "loss": 0.724,
      "step": 2467
    },
    {
      "epoch": 0.5073491622982835,
      "grad_norm": 0.20847640931606293,
      "learning_rate": 8.550886573883371e-05,
      "loss": 0.6805,
      "step": 2468
    },
    {
      "epoch": 0.507554733271662,
      "grad_norm": 0.21068242192268372,
      "learning_rate": 8.550451509141772e-05,
      "loss": 0.6878,
      "step": 2469
    },
    {
      "epoch": 0.5077603042450406,
      "grad_norm": 0.19965562224388123,
      "learning_rate": 8.55001624485336e-05,
      "loss": 0.6728,
      "step": 2470
    },
    {
      "epoch": 0.5079658752184192,
      "grad_norm": 0.28934335708618164,
      "learning_rate": 8.549580781039576e-05,
      "loss": 0.6096,
      "step": 2471
    },
    {
      "epoch": 0.5081714461917978,
      "grad_norm": 0.21150463819503784,
      "learning_rate": 8.549145117721875e-05,
      "loss": 0.7202,
      "step": 2472
    },
    {
      "epoch": 0.5083770171651762,
      "grad_norm": 0.17131322622299194,
      "learning_rate": 8.548709254921721e-05,
      "loss": 0.5992,
      "step": 2473
    },
    {
      "epoch": 0.5085825881385548,
      "grad_norm": 0.1621021330356598,
      "learning_rate": 8.548273192660585e-05,
      "loss": 0.5971,
      "step": 2474
    },
    {
      "epoch": 0.5087881591119334,
      "grad_norm": 0.22314049303531647,
      "learning_rate": 8.547836930959949e-05,
      "loss": 0.7129,
      "step": 2475
    },
    {
      "epoch": 0.5089937300853119,
      "grad_norm": 0.21151074767112732,
      "learning_rate": 8.547400469841307e-05,
      "loss": 0.6885,
      "step": 2476
    },
    {
      "epoch": 0.5091993010586905,
      "grad_norm": 0.20470760762691498,
      "learning_rate": 8.546963809326162e-05,
      "loss": 0.7107,
      "step": 2477
    },
    {
      "epoch": 0.5094048720320691,
      "grad_norm": 0.20865213871002197,
      "learning_rate": 8.546526949436025e-05,
      "loss": 0.7328,
      "step": 2478
    },
    {
      "epoch": 0.5096104430054477,
      "grad_norm": 0.24143381416797638,
      "learning_rate": 8.546089890192422e-05,
      "loss": 0.5784,
      "step": 2479
    },
    {
      "epoch": 0.5098160139788261,
      "grad_norm": 0.21726645529270172,
      "learning_rate": 8.545652631616878e-05,
      "loss": 0.7009,
      "step": 2480
    },
    {
      "epoch": 0.5100215849522047,
      "grad_norm": 0.24358177185058594,
      "learning_rate": 8.545215173730938e-05,
      "loss": 0.7017,
      "step": 2481
    },
    {
      "epoch": 0.5102271559255833,
      "grad_norm": 0.21474173665046692,
      "learning_rate": 8.544777516556155e-05,
      "loss": 0.6889,
      "step": 2482
    },
    {
      "epoch": 0.5104327268989619,
      "grad_norm": 0.2038557231426239,
      "learning_rate": 8.54433966011409e-05,
      "loss": 0.7172,
      "step": 2483
    },
    {
      "epoch": 0.5106382978723404,
      "grad_norm": 0.22823157906532288,
      "learning_rate": 8.54390160442631e-05,
      "loss": 0.7173,
      "step": 2484
    },
    {
      "epoch": 0.510843868845719,
      "grad_norm": 0.20391489565372467,
      "learning_rate": 8.5434633495144e-05,
      "loss": 0.7198,
      "step": 2485
    },
    {
      "epoch": 0.5110494398190976,
      "grad_norm": 0.1981978565454483,
      "learning_rate": 8.543024895399953e-05,
      "loss": 0.6856,
      "step": 2486
    },
    {
      "epoch": 0.5112550107924761,
      "grad_norm": 0.2035714089870453,
      "learning_rate": 8.542586242104563e-05,
      "loss": 0.6885,
      "step": 2487
    },
    {
      "epoch": 0.5114605817658546,
      "grad_norm": 0.20313310623168945,
      "learning_rate": 8.542147389649847e-05,
      "loss": 0.7015,
      "step": 2488
    },
    {
      "epoch": 0.5116661527392332,
      "grad_norm": 0.20469297468662262,
      "learning_rate": 8.541708338057419e-05,
      "loss": 0.7098,
      "step": 2489
    },
    {
      "epoch": 0.5118717237126118,
      "grad_norm": 0.2113511860370636,
      "learning_rate": 8.541269087348913e-05,
      "loss": 0.7239,
      "step": 2490
    },
    {
      "epoch": 0.5120772946859904,
      "grad_norm": 0.20842553675174713,
      "learning_rate": 8.540829637545969e-05,
      "loss": 0.7047,
      "step": 2491
    },
    {
      "epoch": 0.5122828656593689,
      "grad_norm": 0.2060026377439499,
      "learning_rate": 8.540389988670234e-05,
      "loss": 0.6655,
      "step": 2492
    },
    {
      "epoch": 0.5124884366327475,
      "grad_norm": 0.21950404345989227,
      "learning_rate": 8.53995014074337e-05,
      "loss": 0.6143,
      "step": 2493
    },
    {
      "epoch": 0.512694007606126,
      "grad_norm": 0.21250604093074799,
      "learning_rate": 8.539510093787044e-05,
      "loss": 0.6995,
      "step": 2494
    },
    {
      "epoch": 0.5128995785795045,
      "grad_norm": 0.21519462764263153,
      "learning_rate": 8.539069847822938e-05,
      "loss": 0.6877,
      "step": 2495
    },
    {
      "epoch": 0.5131051495528831,
      "grad_norm": 0.21637707948684692,
      "learning_rate": 8.538629402872738e-05,
      "loss": 0.7088,
      "step": 2496
    },
    {
      "epoch": 0.5133107205262617,
      "grad_norm": 0.2197788506746292,
      "learning_rate": 8.538188758958144e-05,
      "loss": 0.6753,
      "step": 2497
    },
    {
      "epoch": 0.5135162914996403,
      "grad_norm": 0.22371014952659607,
      "learning_rate": 8.537747916100865e-05,
      "loss": 0.7074,
      "step": 2498
    },
    {
      "epoch": 0.5137218624730188,
      "grad_norm": 0.16387100517749786,
      "learning_rate": 8.537306874322618e-05,
      "loss": 0.5846,
      "step": 2499
    },
    {
      "epoch": 0.5139274334463974,
      "grad_norm": 0.24268200993537903,
      "learning_rate": 8.536865633645132e-05,
      "loss": 0.6932,
      "step": 2500
    },
    {
      "epoch": 0.5141330044197759,
      "grad_norm": 0.23605839908123016,
      "learning_rate": 8.536424194090144e-05,
      "loss": 0.6874,
      "step": 2501
    },
    {
      "epoch": 0.5143385753931545,
      "grad_norm": 0.20614401996135712,
      "learning_rate": 8.535982555679402e-05,
      "loss": 0.6704,
      "step": 2502
    },
    {
      "epoch": 0.514544146366533,
      "grad_norm": 0.20825539529323578,
      "learning_rate": 8.535540718434665e-05,
      "loss": 0.7012,
      "step": 2503
    },
    {
      "epoch": 0.5147497173399116,
      "grad_norm": 0.2111969292163849,
      "learning_rate": 8.535098682377698e-05,
      "loss": 0.6834,
      "step": 2504
    },
    {
      "epoch": 0.5149552883132902,
      "grad_norm": 0.21059072017669678,
      "learning_rate": 8.534656447530278e-05,
      "loss": 0.7163,
      "step": 2505
    },
    {
      "epoch": 0.5151608592866688,
      "grad_norm": 0.20956206321716309,
      "learning_rate": 8.534214013914193e-05,
      "loss": 0.6897,
      "step": 2506
    },
    {
      "epoch": 0.5153664302600472,
      "grad_norm": 0.16276654601097107,
      "learning_rate": 8.53377138155124e-05,
      "loss": 0.5806,
      "step": 2507
    },
    {
      "epoch": 0.5155720012334258,
      "grad_norm": 0.14373748004436493,
      "learning_rate": 8.533328550463226e-05,
      "loss": 0.5802,
      "step": 2508
    },
    {
      "epoch": 0.5157775722068044,
      "grad_norm": 0.14410528540611267,
      "learning_rate": 8.532885520671963e-05,
      "loss": 0.5905,
      "step": 2509
    },
    {
      "epoch": 0.515983143180183,
      "grad_norm": 0.25100046396255493,
      "learning_rate": 8.532442292199283e-05,
      "loss": 0.7222,
      "step": 2510
    },
    {
      "epoch": 0.5161887141535615,
      "grad_norm": 0.1554838865995407,
      "learning_rate": 8.531998865067017e-05,
      "loss": 0.5799,
      "step": 2511
    },
    {
      "epoch": 0.5163942851269401,
      "grad_norm": 0.21566714346408844,
      "learning_rate": 8.531555239297013e-05,
      "loss": 0.7103,
      "step": 2512
    },
    {
      "epoch": 0.5165998561003187,
      "grad_norm": 0.1622397005558014,
      "learning_rate": 8.531111414911126e-05,
      "loss": 0.5907,
      "step": 2513
    },
    {
      "epoch": 0.5168054270736971,
      "grad_norm": 0.2527947723865509,
      "learning_rate": 8.530667391931221e-05,
      "loss": 0.6972,
      "step": 2514
    },
    {
      "epoch": 0.5170109980470757,
      "grad_norm": 0.14436852931976318,
      "learning_rate": 8.530223170379174e-05,
      "loss": 0.5834,
      "step": 2515
    },
    {
      "epoch": 0.5172165690204543,
      "grad_norm": 0.22850194573402405,
      "learning_rate": 8.529778750276866e-05,
      "loss": 0.7095,
      "step": 2516
    },
    {
      "epoch": 0.5174221399938329,
      "grad_norm": 0.21069450676441193,
      "learning_rate": 8.529334131646196e-05,
      "loss": 0.6754,
      "step": 2517
    },
    {
      "epoch": 0.5176277109672114,
      "grad_norm": 0.16173620522022247,
      "learning_rate": 8.528889314509066e-05,
      "loss": 0.6033,
      "step": 2518
    },
    {
      "epoch": 0.51783328194059,
      "grad_norm": 0.23078560829162598,
      "learning_rate": 8.528444298887391e-05,
      "loss": 0.6971,
      "step": 2519
    },
    {
      "epoch": 0.5180388529139686,
      "grad_norm": 0.21634352207183838,
      "learning_rate": 8.527999084803092e-05,
      "loss": 0.6821,
      "step": 2520
    },
    {
      "epoch": 0.5182444238873471,
      "grad_norm": 0.20838621258735657,
      "learning_rate": 8.527553672278107e-05,
      "loss": 0.7123,
      "step": 2521
    },
    {
      "epoch": 0.5184499948607256,
      "grad_norm": 0.20532085001468658,
      "learning_rate": 8.527108061334378e-05,
      "loss": 0.7199,
      "step": 2522
    },
    {
      "epoch": 0.5186555658341042,
      "grad_norm": 0.20181244611740112,
      "learning_rate": 8.526662251993856e-05,
      "loss": 0.6995,
      "step": 2523
    },
    {
      "epoch": 0.5188611368074828,
      "grad_norm": 0.1562027484178543,
      "learning_rate": 8.526216244278505e-05,
      "loss": 0.5845,
      "step": 2524
    },
    {
      "epoch": 0.5190667077808614,
      "grad_norm": 0.22398139536380768,
      "learning_rate": 8.5257700382103e-05,
      "loss": 0.7083,
      "step": 2525
    },
    {
      "epoch": 0.5192722787542399,
      "grad_norm": 0.206566721200943,
      "learning_rate": 8.52532363381122e-05,
      "loss": 0.7012,
      "step": 2526
    },
    {
      "epoch": 0.5194778497276185,
      "grad_norm": 0.20333848893642426,
      "learning_rate": 8.524877031103259e-05,
      "loss": 0.7052,
      "step": 2527
    },
    {
      "epoch": 0.519683420700997,
      "grad_norm": 0.1408892273902893,
      "learning_rate": 8.524430230108419e-05,
      "loss": 0.5717,
      "step": 2528
    },
    {
      "epoch": 0.5198889916743756,
      "grad_norm": 0.21199721097946167,
      "learning_rate": 8.523983230848712e-05,
      "loss": 0.6796,
      "step": 2529
    },
    {
      "epoch": 0.5200945626477541,
      "grad_norm": 0.21294069290161133,
      "learning_rate": 8.523536033346159e-05,
      "loss": 0.6961,
      "step": 2530
    },
    {
      "epoch": 0.5203001336211327,
      "grad_norm": 0.2040695995092392,
      "learning_rate": 8.523088637622793e-05,
      "loss": 0.7192,
      "step": 2531
    },
    {
      "epoch": 0.5205057045945113,
      "grad_norm": 0.13950461149215698,
      "learning_rate": 8.522641043700653e-05,
      "loss": 0.5966,
      "step": 2532
    },
    {
      "epoch": 0.5207112755678898,
      "grad_norm": 0.22141605615615845,
      "learning_rate": 8.52219325160179e-05,
      "loss": 0.7104,
      "step": 2533
    },
    {
      "epoch": 0.5209168465412684,
      "grad_norm": 0.13655850291252136,
      "learning_rate": 8.521745261348264e-05,
      "loss": 0.5766,
      "step": 2534
    },
    {
      "epoch": 0.5211224175146469,
      "grad_norm": 0.21564966440200806,
      "learning_rate": 8.521297072962148e-05,
      "loss": 0.7378,
      "step": 2535
    },
    {
      "epoch": 0.5213279884880255,
      "grad_norm": 0.13964693248271942,
      "learning_rate": 8.520848686465521e-05,
      "loss": 0.5763,
      "step": 2536
    },
    {
      "epoch": 0.521533559461404,
      "grad_norm": 0.20813791453838348,
      "learning_rate": 8.520400101880472e-05,
      "loss": 0.6768,
      "step": 2537
    },
    {
      "epoch": 0.5217391304347826,
      "grad_norm": 0.20774829387664795,
      "learning_rate": 8.519951319229101e-05,
      "loss": 0.7078,
      "step": 2538
    },
    {
      "epoch": 0.5219447014081612,
      "grad_norm": 0.14507782459259033,
      "learning_rate": 8.519502338533519e-05,
      "loss": 0.6009,
      "step": 2539
    },
    {
      "epoch": 0.5221502723815398,
      "grad_norm": 0.21281610429286957,
      "learning_rate": 8.519053159815843e-05,
      "loss": 0.6951,
      "step": 2540
    },
    {
      "epoch": 0.5223558433549182,
      "grad_norm": 0.21360744535923004,
      "learning_rate": 8.518603783098203e-05,
      "loss": 0.7098,
      "step": 2541
    },
    {
      "epoch": 0.5225614143282968,
      "grad_norm": 0.20327754318714142,
      "learning_rate": 8.518154208402736e-05,
      "loss": 0.7009,
      "step": 2542
    },
    {
      "epoch": 0.5227669853016754,
      "grad_norm": 0.200285404920578,
      "learning_rate": 8.517704435751594e-05,
      "loss": 0.6858,
      "step": 2543
    },
    {
      "epoch": 0.522972556275054,
      "grad_norm": 0.13732387125492096,
      "learning_rate": 8.517254465166932e-05,
      "loss": 0.5735,
      "step": 2544
    },
    {
      "epoch": 0.5231781272484325,
      "grad_norm": 0.21144580841064453,
      "learning_rate": 8.516804296670919e-05,
      "loss": 0.7217,
      "step": 2545
    },
    {
      "epoch": 0.5233836982218111,
      "grad_norm": 0.20281550288200378,
      "learning_rate": 8.516353930285735e-05,
      "loss": 0.7018,
      "step": 2546
    },
    {
      "epoch": 0.5235892691951897,
      "grad_norm": 0.1997842639684677,
      "learning_rate": 8.515903366033563e-05,
      "loss": 0.6991,
      "step": 2547
    },
    {
      "epoch": 0.5237948401685681,
      "grad_norm": 0.13998793065547943,
      "learning_rate": 8.515452603936603e-05,
      "loss": 0.5788,
      "step": 2548
    },
    {
      "epoch": 0.5240004111419467,
      "grad_norm": 0.2052655965089798,
      "learning_rate": 8.51500164401706e-05,
      "loss": 0.7221,
      "step": 2549
    },
    {
      "epoch": 0.5242059821153253,
      "grad_norm": 0.21158649027347565,
      "learning_rate": 8.514550486297155e-05,
      "loss": 0.7077,
      "step": 2550
    },
    {
      "epoch": 0.5244115530887039,
      "grad_norm": 0.2046501189470291,
      "learning_rate": 8.51409913079911e-05,
      "loss": 0.6898,
      "step": 2551
    },
    {
      "epoch": 0.5246171240620824,
      "grad_norm": 0.13471710681915283,
      "learning_rate": 8.513647577545163e-05,
      "loss": 0.5809,
      "step": 2552
    },
    {
      "epoch": 0.524822695035461,
      "grad_norm": 0.21416522562503815,
      "learning_rate": 8.51319582655756e-05,
      "loss": 0.6954,
      "step": 2553
    },
    {
      "epoch": 0.5250282660088396,
      "grad_norm": 0.21434451639652252,
      "learning_rate": 8.512743877858554e-05,
      "loss": 0.6864,
      "step": 2554
    },
    {
      "epoch": 0.5252338369822181,
      "grad_norm": 0.2164076715707779,
      "learning_rate": 8.512291731470415e-05,
      "loss": 0.7236,
      "step": 2555
    },
    {
      "epoch": 0.5254394079555966,
      "grad_norm": 0.2215905487537384,
      "learning_rate": 8.511839387415415e-05,
      "loss": 0.6808,
      "step": 2556
    },
    {
      "epoch": 0.5256449789289752,
      "grad_norm": 0.212999165058136,
      "learning_rate": 8.51138684571584e-05,
      "loss": 0.6986,
      "step": 2557
    },
    {
      "epoch": 0.5258505499023538,
      "grad_norm": 0.20863129198551178,
      "learning_rate": 8.510934106393983e-05,
      "loss": 0.708,
      "step": 2558
    },
    {
      "epoch": 0.5260561208757324,
      "grad_norm": 0.14516817033290863,
      "learning_rate": 8.51048116947215e-05,
      "loss": 0.574,
      "step": 2559
    },
    {
      "epoch": 0.5262616918491109,
      "grad_norm": 0.2149210274219513,
      "learning_rate": 8.510028034972656e-05,
      "loss": 0.6872,
      "step": 2560
    },
    {
      "epoch": 0.5264672628224895,
      "grad_norm": 0.21908272802829742,
      "learning_rate": 8.509574702917823e-05,
      "loss": 0.6847,
      "step": 2561
    },
    {
      "epoch": 0.526672833795868,
      "grad_norm": 0.1989137828350067,
      "learning_rate": 8.509121173329985e-05,
      "loss": 0.6807,
      "step": 2562
    },
    {
      "epoch": 0.5268784047692466,
      "grad_norm": 0.14854271709918976,
      "learning_rate": 8.508667446231486e-05,
      "loss": 0.5931,
      "step": 2563
    },
    {
      "epoch": 0.5270839757426251,
      "grad_norm": 0.21540796756744385,
      "learning_rate": 8.508213521644677e-05,
      "loss": 0.6948,
      "step": 2564
    },
    {
      "epoch": 0.5272895467160037,
      "grad_norm": 0.21465127170085907,
      "learning_rate": 8.507759399591922e-05,
      "loss": 0.7256,
      "step": 2565
    },
    {
      "epoch": 0.5274951176893823,
      "grad_norm": 0.2020212709903717,
      "learning_rate": 8.507305080095595e-05,
      "loss": 0.6946,
      "step": 2566
    },
    {
      "epoch": 0.5277006886627608,
      "grad_norm": 0.21125240623950958,
      "learning_rate": 8.506850563178077e-05,
      "loss": 0.6756,
      "step": 2567
    },
    {
      "epoch": 0.5279062596361394,
      "grad_norm": 0.17571476101875305,
      "learning_rate": 8.506395848861759e-05,
      "loss": 0.5914,
      "step": 2568
    },
    {
      "epoch": 0.5281118306095179,
      "grad_norm": 0.22128242254257202,
      "learning_rate": 8.505940937169044e-05,
      "loss": 0.6772,
      "step": 2569
    },
    {
      "epoch": 0.5283174015828965,
      "grad_norm": 0.13210316002368927,
      "learning_rate": 8.505485828122341e-05,
      "loss": 0.5798,
      "step": 2570
    },
    {
      "epoch": 0.528522972556275,
      "grad_norm": 0.22432683408260345,
      "learning_rate": 8.505030521744074e-05,
      "loss": 0.693,
      "step": 2571
    },
    {
      "epoch": 0.5287285435296536,
      "grad_norm": 0.15919888019561768,
      "learning_rate": 8.504575018056672e-05,
      "loss": 0.5888,
      "step": 2572
    },
    {
      "epoch": 0.5289341145030322,
      "grad_norm": 0.21992851793766022,
      "learning_rate": 8.504119317082577e-05,
      "loss": 0.6978,
      "step": 2573
    },
    {
      "epoch": 0.5291396854764108,
      "grad_norm": 0.2072344422340393,
      "learning_rate": 8.503663418844238e-05,
      "loss": 0.7253,
      "step": 2574
    },
    {
      "epoch": 0.5293452564497892,
      "grad_norm": 0.14406660199165344,
      "learning_rate": 8.503207323364117e-05,
      "loss": 0.5729,
      "step": 2575
    },
    {
      "epoch": 0.5295508274231678,
      "grad_norm": 0.21171186864376068,
      "learning_rate": 8.50275103066468e-05,
      "loss": 0.7078,
      "step": 2576
    },
    {
      "epoch": 0.5297563983965464,
      "grad_norm": 0.22379416227340698,
      "learning_rate": 8.502294540768409e-05,
      "loss": 0.6871,
      "step": 2577
    },
    {
      "epoch": 0.529961969369925,
      "grad_norm": 0.2064572423696518,
      "learning_rate": 8.501837853697792e-05,
      "loss": 0.7041,
      "step": 2578
    },
    {
      "epoch": 0.5301675403433035,
      "grad_norm": 0.20695674419403076,
      "learning_rate": 8.501380969475331e-05,
      "loss": 0.7138,
      "step": 2579
    },
    {
      "epoch": 0.5303731113166821,
      "grad_norm": 0.21721471846103668,
      "learning_rate": 8.50092388812353e-05,
      "loss": 0.7119,
      "step": 2580
    },
    {
      "epoch": 0.5305786822900607,
      "grad_norm": 0.20023848116397858,
      "learning_rate": 8.50046660966491e-05,
      "loss": 0.6828,
      "step": 2581
    },
    {
      "epoch": 0.5307842532634393,
      "grad_norm": 0.22572509944438934,
      "learning_rate": 8.500009134121998e-05,
      "loss": 0.7025,
      "step": 2582
    },
    {
      "epoch": 0.5309898242368177,
      "grad_norm": 0.20377467572689056,
      "learning_rate": 8.499551461517332e-05,
      "loss": 0.6907,
      "step": 2583
    },
    {
      "epoch": 0.5311953952101963,
      "grad_norm": 0.2061266154050827,
      "learning_rate": 8.499093591873459e-05,
      "loss": 0.7025,
      "step": 2584
    },
    {
      "epoch": 0.5314009661835749,
      "grad_norm": 0.20886844396591187,
      "learning_rate": 8.498635525212937e-05,
      "loss": 0.689,
      "step": 2585
    },
    {
      "epoch": 0.5316065371569534,
      "grad_norm": 0.21331052482128143,
      "learning_rate": 8.498177261558332e-05,
      "loss": 0.7088,
      "step": 2586
    },
    {
      "epoch": 0.531812108130332,
      "grad_norm": 0.2123933583498001,
      "learning_rate": 8.49771880093222e-05,
      "loss": 0.6907,
      "step": 2587
    },
    {
      "epoch": 0.5320176791037106,
      "grad_norm": 0.20878660678863525,
      "learning_rate": 8.49726014335719e-05,
      "loss": 0.724,
      "step": 2588
    },
    {
      "epoch": 0.5322232500770891,
      "grad_norm": 0.1978175789117813,
      "learning_rate": 8.496801288855835e-05,
      "loss": 0.6824,
      "step": 2589
    },
    {
      "epoch": 0.5324288210504676,
      "grad_norm": 0.21396887302398682,
      "learning_rate": 8.496342237450761e-05,
      "loss": 0.712,
      "step": 2590
    },
    {
      "epoch": 0.5326343920238462,
      "grad_norm": 0.21784614026546478,
      "learning_rate": 8.495882989164584e-05,
      "loss": 0.6793,
      "step": 2591
    },
    {
      "epoch": 0.5328399629972248,
      "grad_norm": 0.20604658126831055,
      "learning_rate": 8.495423544019928e-05,
      "loss": 0.7158,
      "step": 2592
    },
    {
      "epoch": 0.5330455339706034,
      "grad_norm": 0.21813294291496277,
      "learning_rate": 8.49496390203943e-05,
      "loss": 0.6887,
      "step": 2593
    },
    {
      "epoch": 0.5332511049439819,
      "grad_norm": 0.1722048819065094,
      "learning_rate": 8.494504063245733e-05,
      "loss": 0.6013,
      "step": 2594
    },
    {
      "epoch": 0.5334566759173605,
      "grad_norm": 0.2043728232383728,
      "learning_rate": 8.49404402766149e-05,
      "loss": 0.684,
      "step": 2595
    },
    {
      "epoch": 0.533662246890739,
      "grad_norm": 0.20982548594474792,
      "learning_rate": 8.493583795309364e-05,
      "loss": 0.6776,
      "step": 2596
    },
    {
      "epoch": 0.5338678178641176,
      "grad_norm": 0.20805718004703522,
      "learning_rate": 8.493123366212034e-05,
      "loss": 0.7061,
      "step": 2597
    },
    {
      "epoch": 0.5340733888374961,
      "grad_norm": 0.1766945868730545,
      "learning_rate": 8.492662740392178e-05,
      "loss": 0.595,
      "step": 2598
    },
    {
      "epoch": 0.5342789598108747,
      "grad_norm": 0.22322477400302887,
      "learning_rate": 8.49220191787249e-05,
      "loss": 0.665,
      "step": 2599
    },
    {
      "epoch": 0.5344845307842533,
      "grad_norm": 0.22785376012325287,
      "learning_rate": 8.491740898675675e-05,
      "loss": 0.7141,
      "step": 2600
    },
    {
      "epoch": 0.5346901017576319,
      "grad_norm": 0.2232331484556198,
      "learning_rate": 8.491279682824441e-05,
      "loss": 0.7175,
      "step": 2601
    },
    {
      "epoch": 0.5348956727310104,
      "grad_norm": 0.2167566865682602,
      "learning_rate": 8.490818270341514e-05,
      "loss": 0.6922,
      "step": 2602
    },
    {
      "epoch": 0.5351012437043889,
      "grad_norm": 0.20170411467552185,
      "learning_rate": 8.490356661249623e-05,
      "loss": 0.6809,
      "step": 2603
    },
    {
      "epoch": 0.5353068146777675,
      "grad_norm": 0.21896955370903015,
      "learning_rate": 8.48989485557151e-05,
      "loss": 0.6952,
      "step": 2604
    },
    {
      "epoch": 0.535512385651146,
      "grad_norm": 0.17013712227344513,
      "learning_rate": 8.489432853329927e-05,
      "loss": 0.5891,
      "step": 2605
    },
    {
      "epoch": 0.5357179566245246,
      "grad_norm": 0.23494184017181396,
      "learning_rate": 8.488970654547632e-05,
      "loss": 0.6739,
      "step": 2606
    },
    {
      "epoch": 0.5359235275979032,
      "grad_norm": 0.21912021934986115,
      "learning_rate": 8.4885082592474e-05,
      "loss": 0.7035,
      "step": 2607
    },
    {
      "epoch": 0.5361290985712818,
      "grad_norm": 0.14512377977371216,
      "learning_rate": 8.488045667452006e-05,
      "loss": 0.569,
      "step": 2608
    },
    {
      "epoch": 0.5363346695446602,
      "grad_norm": 0.14050711691379547,
      "learning_rate": 8.487582879184242e-05,
      "loss": 0.5772,
      "step": 2609
    },
    {
      "epoch": 0.5365402405180388,
      "grad_norm": 0.25031203031539917,
      "learning_rate": 8.48711989446691e-05,
      "loss": 0.6868,
      "step": 2610
    },
    {
      "epoch": 0.5367458114914174,
      "grad_norm": 0.2108568251132965,
      "learning_rate": 8.486656713322814e-05,
      "loss": 0.6894,
      "step": 2611
    },
    {
      "epoch": 0.536951382464796,
      "grad_norm": 0.22467973828315735,
      "learning_rate": 8.486193335774777e-05,
      "loss": 0.692,
      "step": 2612
    },
    {
      "epoch": 0.5371569534381745,
      "grad_norm": 0.2571062743663788,
      "learning_rate": 8.485729761845625e-05,
      "loss": 0.705,
      "step": 2613
    },
    {
      "epoch": 0.5373625244115531,
      "grad_norm": 0.21951597929000854,
      "learning_rate": 8.485265991558196e-05,
      "loss": 0.6824,
      "step": 2614
    },
    {
      "epoch": 0.5375680953849317,
      "grad_norm": 0.22675755620002747,
      "learning_rate": 8.48480202493534e-05,
      "loss": 0.7114,
      "step": 2615
    },
    {
      "epoch": 0.5377736663583103,
      "grad_norm": 0.2269049733877182,
      "learning_rate": 8.484337861999912e-05,
      "loss": 0.6641,
      "step": 2616
    },
    {
      "epoch": 0.5379792373316887,
      "grad_norm": 0.21990883350372314,
      "learning_rate": 8.48387350277478e-05,
      "loss": 0.7275,
      "step": 2617
    },
    {
      "epoch": 0.5381848083050673,
      "grad_norm": 0.21468190848827362,
      "learning_rate": 8.483408947282823e-05,
      "loss": 0.7202,
      "step": 2618
    },
    {
      "epoch": 0.5383903792784459,
      "grad_norm": 0.21018457412719727,
      "learning_rate": 8.482944195546925e-05,
      "loss": 0.6831,
      "step": 2619
    },
    {
      "epoch": 0.5385959502518245,
      "grad_norm": 0.2128850817680359,
      "learning_rate": 8.482479247589982e-05,
      "loss": 0.6809,
      "step": 2620
    },
    {
      "epoch": 0.538801521225203,
      "grad_norm": 0.23084747791290283,
      "learning_rate": 8.4820141034349e-05,
      "loss": 0.6099,
      "step": 2621
    },
    {
      "epoch": 0.5390070921985816,
      "grad_norm": 0.22527490556240082,
      "learning_rate": 8.481548763104597e-05,
      "loss": 0.7123,
      "step": 2622
    },
    {
      "epoch": 0.5392126631719601,
      "grad_norm": 0.22562628984451294,
      "learning_rate": 8.481083226621994e-05,
      "loss": 0.707,
      "step": 2623
    },
    {
      "epoch": 0.5394182341453386,
      "grad_norm": 0.21400360763072968,
      "learning_rate": 8.48061749401003e-05,
      "loss": 0.7019,
      "step": 2624
    },
    {
      "epoch": 0.5396238051187172,
      "grad_norm": 0.20809048414230347,
      "learning_rate": 8.480151565291646e-05,
      "loss": 0.7188,
      "step": 2625
    },
    {
      "epoch": 0.5398293760920958,
      "grad_norm": 0.21414582431316376,
      "learning_rate": 8.479685440489798e-05,
      "loss": 0.6698,
      "step": 2626
    },
    {
      "epoch": 0.5400349470654744,
      "grad_norm": 0.19604355096817017,
      "learning_rate": 8.47921911962745e-05,
      "loss": 0.6728,
      "step": 2627
    },
    {
      "epoch": 0.5402405180388529,
      "grad_norm": 0.2081209272146225,
      "learning_rate": 8.478752602727573e-05,
      "loss": 0.6839,
      "step": 2628
    },
    {
      "epoch": 0.5404460890122315,
      "grad_norm": 0.21594710648059845,
      "learning_rate": 8.478285889813153e-05,
      "loss": 0.6845,
      "step": 2629
    },
    {
      "epoch": 0.54065165998561,
      "grad_norm": 0.21320217847824097,
      "learning_rate": 8.477818980907183e-05,
      "loss": 0.7046,
      "step": 2630
    },
    {
      "epoch": 0.5408572309589886,
      "grad_norm": 0.20672303438186646,
      "learning_rate": 8.477351876032662e-05,
      "loss": 0.7343,
      "step": 2631
    },
    {
      "epoch": 0.5410628019323671,
      "grad_norm": 0.1888507753610611,
      "learning_rate": 8.476884575212606e-05,
      "loss": 0.6666,
      "step": 2632
    },
    {
      "epoch": 0.5412683729057457,
      "grad_norm": 0.19607265293598175,
      "learning_rate": 8.476417078470032e-05,
      "loss": 0.6881,
      "step": 2633
    },
    {
      "epoch": 0.5414739438791243,
      "grad_norm": 0.20374587178230286,
      "learning_rate": 8.475949385827977e-05,
      "loss": 0.6748,
      "step": 2634
    },
    {
      "epoch": 0.5416795148525029,
      "grad_norm": 0.2075163573026657,
      "learning_rate": 8.475481497309478e-05,
      "loss": 0.7178,
      "step": 2635
    },
    {
      "epoch": 0.5418850858258814,
      "grad_norm": 0.20457369089126587,
      "learning_rate": 8.475013412937587e-05,
      "loss": 0.6713,
      "step": 2636
    },
    {
      "epoch": 0.5420906567992599,
      "grad_norm": 0.22288042306900024,
      "learning_rate": 8.474545132735365e-05,
      "loss": 0.593,
      "step": 2637
    },
    {
      "epoch": 0.5422962277726385,
      "grad_norm": 0.2154739946126938,
      "learning_rate": 8.474076656725881e-05,
      "loss": 0.6944,
      "step": 2638
    },
    {
      "epoch": 0.5425017987460171,
      "grad_norm": 0.21423187851905823,
      "learning_rate": 8.473607984932215e-05,
      "loss": 0.6635,
      "step": 2639
    },
    {
      "epoch": 0.5427073697193956,
      "grad_norm": 0.24016740918159485,
      "learning_rate": 8.473139117377456e-05,
      "loss": 0.7088,
      "step": 2640
    },
    {
      "epoch": 0.5429129406927742,
      "grad_norm": 0.2100851833820343,
      "learning_rate": 8.472670054084704e-05,
      "loss": 0.6737,
      "step": 2641
    },
    {
      "epoch": 0.5431185116661528,
      "grad_norm": 0.20590589940547943,
      "learning_rate": 8.472200795077065e-05,
      "loss": 0.7015,
      "step": 2642
    },
    {
      "epoch": 0.5433240826395312,
      "grad_norm": 0.20215122401714325,
      "learning_rate": 8.47173134037766e-05,
      "loss": 0.6834,
      "step": 2643
    },
    {
      "epoch": 0.5435296536129098,
      "grad_norm": 0.17897242307662964,
      "learning_rate": 8.471261690009615e-05,
      "loss": 0.5736,
      "step": 2644
    },
    {
      "epoch": 0.5437352245862884,
      "grad_norm": 0.1412929892539978,
      "learning_rate": 8.470791843996068e-05,
      "loss": 0.5684,
      "step": 2645
    },
    {
      "epoch": 0.543940795559667,
      "grad_norm": 0.23520296812057495,
      "learning_rate": 8.470321802360167e-05,
      "loss": 0.6979,
      "step": 2646
    },
    {
      "epoch": 0.5441463665330455,
      "grad_norm": 0.22806185483932495,
      "learning_rate": 8.469851565125068e-05,
      "loss": 0.6768,
      "step": 2647
    },
    {
      "epoch": 0.5443519375064241,
      "grad_norm": 0.20918670296669006,
      "learning_rate": 8.469381132313938e-05,
      "loss": 0.669,
      "step": 2648
    },
    {
      "epoch": 0.5445575084798027,
      "grad_norm": 0.21143250167369843,
      "learning_rate": 8.468910503949951e-05,
      "loss": 0.7044,
      "step": 2649
    },
    {
      "epoch": 0.5447630794531813,
      "grad_norm": 0.21474787592887878,
      "learning_rate": 8.468439680056295e-05,
      "loss": 0.7171,
      "step": 2650
    },
    {
      "epoch": 0.5449686504265597,
      "grad_norm": 0.20778292417526245,
      "learning_rate": 8.467968660656164e-05,
      "loss": 0.6719,
      "step": 2651
    },
    {
      "epoch": 0.5451742213999383,
      "grad_norm": 0.20223721861839294,
      "learning_rate": 8.467497445772764e-05,
      "loss": 0.5761,
      "step": 2652
    },
    {
      "epoch": 0.5453797923733169,
      "grad_norm": 0.16389262676239014,
      "learning_rate": 8.467026035429308e-05,
      "loss": 0.6203,
      "step": 2653
    },
    {
      "epoch": 0.5455853633466955,
      "grad_norm": 0.23996488749980927,
      "learning_rate": 8.466554429649022e-05,
      "loss": 0.7091,
      "step": 2654
    },
    {
      "epoch": 0.545790934320074,
      "grad_norm": 0.22990204393863678,
      "learning_rate": 8.466082628455138e-05,
      "loss": 0.6889,
      "step": 2655
    },
    {
      "epoch": 0.5459965052934526,
      "grad_norm": 0.20042270421981812,
      "learning_rate": 8.4656106318709e-05,
      "loss": 0.6864,
      "step": 2656
    },
    {
      "epoch": 0.5462020762668311,
      "grad_norm": 0.2556054890155792,
      "learning_rate": 8.465138439919563e-05,
      "loss": 0.6858,
      "step": 2657
    },
    {
      "epoch": 0.5464076472402097,
      "grad_norm": 0.20988969504833221,
      "learning_rate": 8.464666052624386e-05,
      "loss": 0.6907,
      "step": 2658
    },
    {
      "epoch": 0.5466132182135882,
      "grad_norm": 0.21028688549995422,
      "learning_rate": 8.464193470008646e-05,
      "loss": 0.7199,
      "step": 2659
    },
    {
      "epoch": 0.5468187891869668,
      "grad_norm": 0.20908872783184052,
      "learning_rate": 8.463720692095621e-05,
      "loss": 0.6965,
      "step": 2660
    },
    {
      "epoch": 0.5470243601603454,
      "grad_norm": 0.20974692702293396,
      "learning_rate": 8.463247718908604e-05,
      "loss": 0.6913,
      "step": 2661
    },
    {
      "epoch": 0.5472299311337239,
      "grad_norm": 0.3178030550479889,
      "learning_rate": 8.462774550470894e-05,
      "loss": 0.5966,
      "step": 2662
    },
    {
      "epoch": 0.5474355021071025,
      "grad_norm": 0.23371629416942596,
      "learning_rate": 8.462301186805807e-05,
      "loss": 0.6999,
      "step": 2663
    },
    {
      "epoch": 0.547641073080481,
      "grad_norm": 0.2393561601638794,
      "learning_rate": 8.461827627936658e-05,
      "loss": 0.6981,
      "step": 2664
    },
    {
      "epoch": 0.5478466440538596,
      "grad_norm": 0.21029163897037506,
      "learning_rate": 8.46135387388678e-05,
      "loss": 0.6925,
      "step": 2665
    },
    {
      "epoch": 0.5480522150272381,
      "grad_norm": 0.20427922904491425,
      "learning_rate": 8.460879924679513e-05,
      "loss": 0.648,
      "step": 2666
    },
    {
      "epoch": 0.5482577860006167,
      "grad_norm": 0.20650714635849,
      "learning_rate": 8.460405780338205e-05,
      "loss": 0.5918,
      "step": 2667
    },
    {
      "epoch": 0.5484633569739953,
      "grad_norm": 0.24088306725025177,
      "learning_rate": 8.459931440886214e-05,
      "loss": 0.7039,
      "step": 2668
    },
    {
      "epoch": 0.5486689279473739,
      "grad_norm": 0.22175416350364685,
      "learning_rate": 8.45945690634691e-05,
      "loss": 0.7038,
      "step": 2669
    },
    {
      "epoch": 0.5488744989207524,
      "grad_norm": 0.21606440842151642,
      "learning_rate": 8.45898217674367e-05,
      "loss": 0.6745,
      "step": 2670
    },
    {
      "epoch": 0.5490800698941309,
      "grad_norm": 0.22006148099899292,
      "learning_rate": 8.458507252099884e-05,
      "loss": 0.7169,
      "step": 2671
    },
    {
      "epoch": 0.5492856408675095,
      "grad_norm": 0.2132798433303833,
      "learning_rate": 8.458032132438947e-05,
      "loss": 0.6769,
      "step": 2672
    },
    {
      "epoch": 0.5494912118408881,
      "grad_norm": 0.2083420604467392,
      "learning_rate": 8.457556817784266e-05,
      "loss": 0.6845,
      "step": 2673
    },
    {
      "epoch": 0.5496967828142666,
      "grad_norm": 0.16094450652599335,
      "learning_rate": 8.457081308159259e-05,
      "loss": 0.573,
      "step": 2674
    },
    {
      "epoch": 0.5499023537876452,
      "grad_norm": 0.23418548703193665,
      "learning_rate": 8.456605603587351e-05,
      "loss": 0.6743,
      "step": 2675
    },
    {
      "epoch": 0.5501079247610238,
      "grad_norm": 0.2129811942577362,
      "learning_rate": 8.456129704091978e-05,
      "loss": 0.6956,
      "step": 2676
    },
    {
      "epoch": 0.5503134957344022,
      "grad_norm": 0.14898192882537842,
      "learning_rate": 8.455653609696585e-05,
      "loss": 0.5923,
      "step": 2677
    },
    {
      "epoch": 0.5505190667077808,
      "grad_norm": 0.22483858466148376,
      "learning_rate": 8.455177320424627e-05,
      "loss": 0.6918,
      "step": 2678
    },
    {
      "epoch": 0.5507246376811594,
      "grad_norm": 0.22401611506938934,
      "learning_rate": 8.454700836299571e-05,
      "loss": 0.6985,
      "step": 2679
    },
    {
      "epoch": 0.550930208654538,
      "grad_norm": 0.19923460483551025,
      "learning_rate": 8.454224157344887e-05,
      "loss": 0.729,
      "step": 2680
    },
    {
      "epoch": 0.5511357796279165,
      "grad_norm": 0.21183621883392334,
      "learning_rate": 8.453747283584061e-05,
      "loss": 0.677,
      "step": 2681
    },
    {
      "epoch": 0.5513413506012951,
      "grad_norm": 0.16109618544578552,
      "learning_rate": 8.453270215040588e-05,
      "loss": 0.5949,
      "step": 2682
    },
    {
      "epoch": 0.5515469215746737,
      "grad_norm": 0.21456550061702728,
      "learning_rate": 8.452792951737966e-05,
      "loss": 0.7069,
      "step": 2683
    },
    {
      "epoch": 0.5517524925480523,
      "grad_norm": 0.19927652180194855,
      "learning_rate": 8.452315493699713e-05,
      "loss": 0.6762,
      "step": 2684
    },
    {
      "epoch": 0.5519580635214307,
      "grad_norm": 0.19462721049785614,
      "learning_rate": 8.451837840949347e-05,
      "loss": 0.701,
      "step": 2685
    },
    {
      "epoch": 0.5521636344948093,
      "grad_norm": 0.22193773090839386,
      "learning_rate": 8.451359993510403e-05,
      "loss": 0.6949,
      "step": 2686
    },
    {
      "epoch": 0.5523692054681879,
      "grad_norm": 0.22146186232566833,
      "learning_rate": 8.450881951406419e-05,
      "loss": 0.7208,
      "step": 2687
    },
    {
      "epoch": 0.5525747764415665,
      "grad_norm": 0.19484825432300568,
      "learning_rate": 8.45040371466095e-05,
      "loss": 0.6823,
      "step": 2688
    },
    {
      "epoch": 0.552780347414945,
      "grad_norm": 0.20109498500823975,
      "learning_rate": 8.449925283297551e-05,
      "loss": 0.7008,
      "step": 2689
    },
    {
      "epoch": 0.5529859183883236,
      "grad_norm": 0.1965745985507965,
      "learning_rate": 8.449446657339798e-05,
      "loss": 0.7047,
      "step": 2690
    },
    {
      "epoch": 0.5531914893617021,
      "grad_norm": 0.19609376788139343,
      "learning_rate": 8.448967836811266e-05,
      "loss": 0.6856,
      "step": 2691
    },
    {
      "epoch": 0.5533970603350807,
      "grad_norm": 0.19566380977630615,
      "learning_rate": 8.448488821735546e-05,
      "loss": 0.6883,
      "step": 2692
    },
    {
      "epoch": 0.5536026313084592,
      "grad_norm": 0.18993543088436127,
      "learning_rate": 8.448009612136238e-05,
      "loss": 0.5882,
      "step": 2693
    },
    {
      "epoch": 0.5538082022818378,
      "grad_norm": 0.22677689790725708,
      "learning_rate": 8.44753020803695e-05,
      "loss": 0.695,
      "step": 2694
    },
    {
      "epoch": 0.5540137732552164,
      "grad_norm": 0.21654780209064484,
      "learning_rate": 8.447050609461299e-05,
      "loss": 0.7006,
      "step": 2695
    },
    {
      "epoch": 0.5542193442285949,
      "grad_norm": 0.1987585723400116,
      "learning_rate": 8.446570816432911e-05,
      "loss": 0.6786,
      "step": 2696
    },
    {
      "epoch": 0.5544249152019735,
      "grad_norm": 0.21320489048957825,
      "learning_rate": 8.446090828975427e-05,
      "loss": 0.7029,
      "step": 2697
    },
    {
      "epoch": 0.554630486175352,
      "grad_norm": 0.16352033615112305,
      "learning_rate": 8.445610647112492e-05,
      "loss": 0.5938,
      "step": 2698
    },
    {
      "epoch": 0.5548360571487306,
      "grad_norm": 0.21454685926437378,
      "learning_rate": 8.44513027086776e-05,
      "loss": 0.6759,
      "step": 2699
    },
    {
      "epoch": 0.5550416281221091,
      "grad_norm": 0.20842206478118896,
      "learning_rate": 8.444649700264902e-05,
      "loss": 0.6922,
      "step": 2700
    },
    {
      "epoch": 0.5552471990954877,
      "grad_norm": 0.1389513611793518,
      "learning_rate": 8.444168935327589e-05,
      "loss": 0.5826,
      "step": 2701
    },
    {
      "epoch": 0.5554527700688663,
      "grad_norm": 0.20907482504844666,
      "learning_rate": 8.443687976079507e-05,
      "loss": 0.6838,
      "step": 2702
    },
    {
      "epoch": 0.5556583410422449,
      "grad_norm": 0.21713374555110931,
      "learning_rate": 8.443206822544352e-05,
      "loss": 0.7058,
      "step": 2703
    },
    {
      "epoch": 0.5558639120156234,
      "grad_norm": 0.1558568924665451,
      "learning_rate": 8.442725474745827e-05,
      "loss": 0.5847,
      "step": 2704
    },
    {
      "epoch": 0.5560694829890019,
      "grad_norm": 0.20640867948532104,
      "learning_rate": 8.442243932707647e-05,
      "loss": 0.7049,
      "step": 2705
    },
    {
      "epoch": 0.5562750539623805,
      "grad_norm": 0.12573988735675812,
      "learning_rate": 8.441762196453534e-05,
      "loss": 0.5863,
      "step": 2706
    },
    {
      "epoch": 0.5564806249357591,
      "grad_norm": 0.21294710040092468,
      "learning_rate": 8.441280266007221e-05,
      "loss": 0.6913,
      "step": 2707
    },
    {
      "epoch": 0.5566861959091376,
      "grad_norm": 0.2014019787311554,
      "learning_rate": 8.44079814139245e-05,
      "loss": 0.6954,
      "step": 2708
    },
    {
      "epoch": 0.5568917668825162,
      "grad_norm": 0.2047373652458191,
      "learning_rate": 8.440315822632974e-05,
      "loss": 0.6976,
      "step": 2709
    },
    {
      "epoch": 0.5570973378558948,
      "grad_norm": 0.21064162254333496,
      "learning_rate": 8.439833309752556e-05,
      "loss": 0.6994,
      "step": 2710
    },
    {
      "epoch": 0.5573029088292734,
      "grad_norm": 0.21300119161605835,
      "learning_rate": 8.439350602774964e-05,
      "loss": 0.6748,
      "step": 2711
    },
    {
      "epoch": 0.5575084798026518,
      "grad_norm": 0.17572659254074097,
      "learning_rate": 8.438867701723982e-05,
      "loss": 0.5906,
      "step": 2712
    },
    {
      "epoch": 0.5577140507760304,
      "grad_norm": 0.13898785412311554,
      "learning_rate": 8.438384606623397e-05,
      "loss": 0.5679,
      "step": 2713
    },
    {
      "epoch": 0.557919621749409,
      "grad_norm": 0.24983015656471252,
      "learning_rate": 8.437901317497011e-05,
      "loss": 0.6696,
      "step": 2714
    },
    {
      "epoch": 0.5581251927227875,
      "grad_norm": 0.21426579356193542,
      "learning_rate": 8.437417834368632e-05,
      "loss": 0.6824,
      "step": 2715
    },
    {
      "epoch": 0.5583307636961661,
      "grad_norm": 0.20514623820781708,
      "learning_rate": 8.436934157262082e-05,
      "loss": 0.708,
      "step": 2716
    },
    {
      "epoch": 0.5585363346695447,
      "grad_norm": 0.21398288011550903,
      "learning_rate": 8.436450286201184e-05,
      "loss": 0.7051,
      "step": 2717
    },
    {
      "epoch": 0.5587419056429233,
      "grad_norm": 0.2091488540172577,
      "learning_rate": 8.435966221209782e-05,
      "loss": 0.6671,
      "step": 2718
    },
    {
      "epoch": 0.5589474766163017,
      "grad_norm": 0.21767988801002502,
      "learning_rate": 8.43548196231172e-05,
      "loss": 0.724,
      "step": 2719
    },
    {
      "epoch": 0.5591530475896803,
      "grad_norm": 0.2218277007341385,
      "learning_rate": 8.434997509530855e-05,
      "loss": 0.6924,
      "step": 2720
    },
    {
      "epoch": 0.5593586185630589,
      "grad_norm": 0.2099279761314392,
      "learning_rate": 8.434512862891058e-05,
      "loss": 0.6847,
      "step": 2721
    },
    {
      "epoch": 0.5595641895364375,
      "grad_norm": 0.2063916176557541,
      "learning_rate": 8.434028022416199e-05,
      "loss": 0.669,
      "step": 2722
    },
    {
      "epoch": 0.559769760509816,
      "grad_norm": 0.2331087738275528,
      "learning_rate": 8.433542988130168e-05,
      "loss": 0.6039,
      "step": 2723
    },
    {
      "epoch": 0.5599753314831946,
      "grad_norm": 0.22927048802375793,
      "learning_rate": 8.433057760056858e-05,
      "loss": 0.6982,
      "step": 2724
    },
    {
      "epoch": 0.5601809024565731,
      "grad_norm": 0.22356915473937988,
      "learning_rate": 8.432572338220177e-05,
      "loss": 0.6676,
      "step": 2725
    },
    {
      "epoch": 0.5603864734299517,
      "grad_norm": 0.21038733422756195,
      "learning_rate": 8.432086722644038e-05,
      "loss": 0.6922,
      "step": 2726
    },
    {
      "epoch": 0.5605920444033302,
      "grad_norm": 0.21845050156116486,
      "learning_rate": 8.431600913352363e-05,
      "loss": 0.6809,
      "step": 2727
    },
    {
      "epoch": 0.5607976153767088,
      "grad_norm": 0.20335665345191956,
      "learning_rate": 8.431114910369087e-05,
      "loss": 0.6561,
      "step": 2728
    },
    {
      "epoch": 0.5610031863500874,
      "grad_norm": 0.20789889991283417,
      "learning_rate": 8.430628713718156e-05,
      "loss": 0.7282,
      "step": 2729
    },
    {
      "epoch": 0.561208757323466,
      "grad_norm": 0.21542754769325256,
      "learning_rate": 8.430142323423518e-05,
      "loss": 0.6794,
      "step": 2730
    },
    {
      "epoch": 0.5614143282968445,
      "grad_norm": 0.19883479177951813,
      "learning_rate": 8.429655739509137e-05,
      "loss": 0.7022,
      "step": 2731
    },
    {
      "epoch": 0.561619899270223,
      "grad_norm": 0.2027217149734497,
      "learning_rate": 8.429168961998987e-05,
      "loss": 0.7122,
      "step": 2732
    },
    {
      "epoch": 0.5618254702436016,
      "grad_norm": 0.20962925255298615,
      "learning_rate": 8.428681990917045e-05,
      "loss": 0.702,
      "step": 2733
    },
    {
      "epoch": 0.5620310412169801,
      "grad_norm": 0.2032438963651657,
      "learning_rate": 8.428194826287304e-05,
      "loss": 0.6828,
      "step": 2734
    },
    {
      "epoch": 0.5622366121903587,
      "grad_norm": 0.19384074211120605,
      "learning_rate": 8.427707468133766e-05,
      "loss": 0.6693,
      "step": 2735
    },
    {
      "epoch": 0.5624421831637373,
      "grad_norm": 0.20118926465511322,
      "learning_rate": 8.427219916480437e-05,
      "loss": 0.7003,
      "step": 2736
    },
    {
      "epoch": 0.5626477541371159,
      "grad_norm": 0.21019205451011658,
      "learning_rate": 8.426732171351338e-05,
      "loss": 0.7088,
      "step": 2737
    },
    {
      "epoch": 0.5628533251104944,
      "grad_norm": 0.19624383747577667,
      "learning_rate": 8.426244232770501e-05,
      "loss": 0.6929,
      "step": 2738
    },
    {
      "epoch": 0.5630588960838729,
      "grad_norm": 0.20001311600208282,
      "learning_rate": 8.425756100761961e-05,
      "loss": 0.6641,
      "step": 2739
    },
    {
      "epoch": 0.5632644670572515,
      "grad_norm": 0.20031724870204926,
      "learning_rate": 8.425267775349766e-05,
      "loss": 0.7202,
      "step": 2740
    },
    {
      "epoch": 0.5634700380306301,
      "grad_norm": 0.20123572647571564,
      "learning_rate": 8.424779256557976e-05,
      "loss": 0.6924,
      "step": 2741
    },
    {
      "epoch": 0.5636756090040086,
      "grad_norm": 0.20444491505622864,
      "learning_rate": 8.424290544410654e-05,
      "loss": 0.6893,
      "step": 2742
    },
    {
      "epoch": 0.5638811799773872,
      "grad_norm": 0.1976771205663681,
      "learning_rate": 8.42380163893188e-05,
      "loss": 0.6709,
      "step": 2743
    },
    {
      "epoch": 0.5640867509507658,
      "grad_norm": 0.222488135099411,
      "learning_rate": 8.42331254014574e-05,
      "loss": 0.5918,
      "step": 2744
    },
    {
      "epoch": 0.5642923219241444,
      "grad_norm": 0.21417805552482605,
      "learning_rate": 8.422823248076329e-05,
      "loss": 0.6833,
      "step": 2745
    },
    {
      "epoch": 0.5644978928975228,
      "grad_norm": 0.21681103110313416,
      "learning_rate": 8.42233376274775e-05,
      "loss": 0.7288,
      "step": 2746
    },
    {
      "epoch": 0.5647034638709014,
      "grad_norm": 0.20778658986091614,
      "learning_rate": 8.42184408418412e-05,
      "loss": 0.6749,
      "step": 2747
    },
    {
      "epoch": 0.56490903484428,
      "grad_norm": 0.20677468180656433,
      "learning_rate": 8.421354212409563e-05,
      "loss": 0.7008,
      "step": 2748
    },
    {
      "epoch": 0.5651146058176586,
      "grad_norm": 0.15667958557605743,
      "learning_rate": 8.420864147448213e-05,
      "loss": 0.5793,
      "step": 2749
    },
    {
      "epoch": 0.5653201767910371,
      "grad_norm": 0.22153092920780182,
      "learning_rate": 8.42037388932421e-05,
      "loss": 0.6865,
      "step": 2750
    },
    {
      "epoch": 0.5655257477644157,
      "grad_norm": 0.22236353158950806,
      "learning_rate": 8.419883438061711e-05,
      "loss": 0.6672,
      "step": 2751
    },
    {
      "epoch": 0.5657313187377943,
      "grad_norm": 0.2081800103187561,
      "learning_rate": 8.419392793684878e-05,
      "loss": 0.7169,
      "step": 2752
    },
    {
      "epoch": 0.5659368897111727,
      "grad_norm": 0.16220282018184662,
      "learning_rate": 8.418901956217878e-05,
      "loss": 0.5878,
      "step": 2753
    },
    {
      "epoch": 0.5661424606845513,
      "grad_norm": 0.21759817004203796,
      "learning_rate": 8.418410925684898e-05,
      "loss": 0.7273,
      "step": 2754
    },
    {
      "epoch": 0.5663480316579299,
      "grad_norm": 0.22539561986923218,
      "learning_rate": 8.417919702110125e-05,
      "loss": 0.7,
      "step": 2755
    },
    {
      "epoch": 0.5665536026313085,
      "grad_norm": 0.196711003780365,
      "learning_rate": 8.41742828551776e-05,
      "loss": 0.7179,
      "step": 2756
    },
    {
      "epoch": 0.566759173604687,
      "grad_norm": 0.210893914103508,
      "learning_rate": 8.416936675932015e-05,
      "loss": 0.708,
      "step": 2757
    },
    {
      "epoch": 0.5669647445780656,
      "grad_norm": 0.19233620166778564,
      "learning_rate": 8.416444873377108e-05,
      "loss": 0.5911,
      "step": 2758
    },
    {
      "epoch": 0.5671703155514441,
      "grad_norm": 0.21840979158878326,
      "learning_rate": 8.415952877877266e-05,
      "loss": 0.6871,
      "step": 2759
    },
    {
      "epoch": 0.5673758865248227,
      "grad_norm": 0.216123566031456,
      "learning_rate": 8.41546068945673e-05,
      "loss": 0.7381,
      "step": 2760
    },
    {
      "epoch": 0.5675814574982012,
      "grad_norm": 0.14728981256484985,
      "learning_rate": 8.414968308139747e-05,
      "loss": 0.5818,
      "step": 2761
    },
    {
      "epoch": 0.5677870284715798,
      "grad_norm": 0.16224178671836853,
      "learning_rate": 8.414475733950572e-05,
      "loss": 0.5819,
      "step": 2762
    },
    {
      "epoch": 0.5679925994449584,
      "grad_norm": 0.23816072940826416,
      "learning_rate": 8.413982966913475e-05,
      "loss": 0.7021,
      "step": 2763
    },
    {
      "epoch": 0.568198170418337,
      "grad_norm": 0.2145988643169403,
      "learning_rate": 8.413490007052731e-05,
      "loss": 0.712,
      "step": 2764
    },
    {
      "epoch": 0.5684037413917155,
      "grad_norm": 0.1928829550743103,
      "learning_rate": 8.412996854392625e-05,
      "loss": 0.6792,
      "step": 2765
    },
    {
      "epoch": 0.568609312365094,
      "grad_norm": 0.22511503100395203,
      "learning_rate": 8.412503508957455e-05,
      "loss": 0.6914,
      "step": 2766
    },
    {
      "epoch": 0.5688148833384726,
      "grad_norm": 0.23448607325553894,
      "learning_rate": 8.412009970771524e-05,
      "loss": 0.7113,
      "step": 2767
    },
    {
      "epoch": 0.5690204543118512,
      "grad_norm": 0.21442458033561707,
      "learning_rate": 8.411516239859146e-05,
      "loss": 0.7,
      "step": 2768
    },
    {
      "epoch": 0.5692260252852297,
      "grad_norm": 0.18232490122318268,
      "learning_rate": 8.411022316244645e-05,
      "loss": 0.5882,
      "step": 2769
    },
    {
      "epoch": 0.5694315962586083,
      "grad_norm": 0.1396799087524414,
      "learning_rate": 8.410528199952354e-05,
      "loss": 0.5754,
      "step": 2770
    },
    {
      "epoch": 0.5696371672319869,
      "grad_norm": 0.2816780209541321,
      "learning_rate": 8.410033891006617e-05,
      "loss": 0.6885,
      "step": 2771
    },
    {
      "epoch": 0.5698427382053654,
      "grad_norm": 0.26476380228996277,
      "learning_rate": 8.409539389431785e-05,
      "loss": 0.6791,
      "step": 2772
    },
    {
      "epoch": 0.5700483091787439,
      "grad_norm": 0.2113625705242157,
      "learning_rate": 8.409044695252221e-05,
      "loss": 0.7115,
      "step": 2773
    },
    {
      "epoch": 0.5702538801521225,
      "grad_norm": 0.21605044603347778,
      "learning_rate": 8.408549808492296e-05,
      "loss": 0.7098,
      "step": 2774
    },
    {
      "epoch": 0.5704594511255011,
      "grad_norm": 0.23488545417785645,
      "learning_rate": 8.40805472917639e-05,
      "loss": 0.6791,
      "step": 2775
    },
    {
      "epoch": 0.5706650220988796,
      "grad_norm": 0.23377586901187897,
      "learning_rate": 8.407559457328894e-05,
      "loss": 0.7159,
      "step": 2776
    },
    {
      "epoch": 0.5708705930722582,
      "grad_norm": 0.2001940906047821,
      "learning_rate": 8.407063992974208e-05,
      "loss": 0.6831,
      "step": 2777
    },
    {
      "epoch": 0.5710761640456368,
      "grad_norm": 0.20575560629367828,
      "learning_rate": 8.40656833613674e-05,
      "loss": 0.6893,
      "step": 2778
    },
    {
      "epoch": 0.5712817350190154,
      "grad_norm": 0.21755361557006836,
      "learning_rate": 8.406072486840909e-05,
      "loss": 0.6912,
      "step": 2779
    },
    {
      "epoch": 0.5714873059923938,
      "grad_norm": 0.21302054822444916,
      "learning_rate": 8.405576445111144e-05,
      "loss": 0.5823,
      "step": 2780
    },
    {
      "epoch": 0.5716928769657724,
      "grad_norm": 0.2074202299118042,
      "learning_rate": 8.405080210971882e-05,
      "loss": 0.6948,
      "step": 2781
    },
    {
      "epoch": 0.571898447939151,
      "grad_norm": 0.2045622020959854,
      "learning_rate": 8.40458378444757e-05,
      "loss": 0.6982,
      "step": 2782
    },
    {
      "epoch": 0.5721040189125296,
      "grad_norm": 0.20877763628959656,
      "learning_rate": 8.404087165562664e-05,
      "loss": 0.696,
      "step": 2783
    },
    {
      "epoch": 0.5723095898859081,
      "grad_norm": 0.21138116717338562,
      "learning_rate": 8.403590354341632e-05,
      "loss": 0.6767,
      "step": 2784
    },
    {
      "epoch": 0.5725151608592867,
      "grad_norm": 0.20857292413711548,
      "learning_rate": 8.40309335080895e-05,
      "loss": 0.6847,
      "step": 2785
    },
    {
      "epoch": 0.5727207318326653,
      "grad_norm": 0.20251955091953278,
      "learning_rate": 8.4025961549891e-05,
      "loss": 0.7044,
      "step": 2786
    },
    {
      "epoch": 0.5729263028060438,
      "grad_norm": 0.23925918340682983,
      "learning_rate": 8.40209876690658e-05,
      "loss": 0.6971,
      "step": 2787
    },
    {
      "epoch": 0.5731318737794223,
      "grad_norm": 0.19959931075572968,
      "learning_rate": 8.401601186585888e-05,
      "loss": 0.5827,
      "step": 2788
    },
    {
      "epoch": 0.5733374447528009,
      "grad_norm": 0.22731555998325348,
      "learning_rate": 8.401103414051545e-05,
      "loss": 0.6834,
      "step": 2789
    },
    {
      "epoch": 0.5735430157261795,
      "grad_norm": 0.13042806088924408,
      "learning_rate": 8.400605449328069e-05,
      "loss": 0.584,
      "step": 2790
    },
    {
      "epoch": 0.573748586699558,
      "grad_norm": 0.22589558362960815,
      "learning_rate": 8.400107292439996e-05,
      "loss": 0.6953,
      "step": 2791
    },
    {
      "epoch": 0.5739541576729366,
      "grad_norm": 0.2052125185728073,
      "learning_rate": 8.399608943411864e-05,
      "loss": 0.6918,
      "step": 2792
    },
    {
      "epoch": 0.5741597286463151,
      "grad_norm": 0.2042934000492096,
      "learning_rate": 8.399110402268226e-05,
      "loss": 0.7068,
      "step": 2793
    },
    {
      "epoch": 0.5743652996196937,
      "grad_norm": 0.20587709546089172,
      "learning_rate": 8.398611669033642e-05,
      "loss": 0.6933,
      "step": 2794
    },
    {
      "epoch": 0.5745708705930722,
      "grad_norm": 0.1982177048921585,
      "learning_rate": 8.398112743732685e-05,
      "loss": 0.6884,
      "step": 2795
    },
    {
      "epoch": 0.5747764415664508,
      "grad_norm": 0.19220708310604095,
      "learning_rate": 8.397613626389933e-05,
      "loss": 0.5803,
      "step": 2796
    },
    {
      "epoch": 0.5749820125398294,
      "grad_norm": 0.20522017776966095,
      "learning_rate": 8.397114317029975e-05,
      "loss": 0.6739,
      "step": 2797
    },
    {
      "epoch": 0.575187583513208,
      "grad_norm": 0.20296591520309448,
      "learning_rate": 8.396614815677408e-05,
      "loss": 0.6968,
      "step": 2798
    },
    {
      "epoch": 0.5753931544865865,
      "grad_norm": 0.21436072885990143,
      "learning_rate": 8.396115122356844e-05,
      "loss": 0.7124,
      "step": 2799
    },
    {
      "epoch": 0.575598725459965,
      "grad_norm": 0.1649683117866516,
      "learning_rate": 8.395615237092896e-05,
      "loss": 0.5981,
      "step": 2800
    },
    {
      "epoch": 0.5758042964333436,
      "grad_norm": 0.20267538726329803,
      "learning_rate": 8.395115159910193e-05,
      "loss": 0.6791,
      "step": 2801
    },
    {
      "epoch": 0.5760098674067222,
      "grad_norm": 0.2140885293483734,
      "learning_rate": 8.394614890833374e-05,
      "loss": 0.7054,
      "step": 2802
    },
    {
      "epoch": 0.5762154383801007,
      "grad_norm": 0.20777259767055511,
      "learning_rate": 8.394114429887083e-05,
      "loss": 0.68,
      "step": 2803
    },
    {
      "epoch": 0.5764210093534793,
      "grad_norm": 0.2137485295534134,
      "learning_rate": 8.393613777095974e-05,
      "loss": 0.7086,
      "step": 2804
    },
    {
      "epoch": 0.5766265803268579,
      "grad_norm": 0.20304176211357117,
      "learning_rate": 8.393112932484713e-05,
      "loss": 0.6617,
      "step": 2805
    },
    {
      "epoch": 0.5768321513002365,
      "grad_norm": 0.21544432640075684,
      "learning_rate": 8.392611896077973e-05,
      "loss": 0.7053,
      "step": 2806
    },
    {
      "epoch": 0.5770377222736149,
      "grad_norm": 0.21482408046722412,
      "learning_rate": 8.39211066790044e-05,
      "loss": 0.6994,
      "step": 2807
    },
    {
      "epoch": 0.5772432932469935,
      "grad_norm": 0.15521647036075592,
      "learning_rate": 8.391609247976805e-05,
      "loss": 0.5946,
      "step": 2808
    },
    {
      "epoch": 0.5774488642203721,
      "grad_norm": 0.19584627449512482,
      "learning_rate": 8.391107636331775e-05,
      "loss": 0.6638,
      "step": 2809
    },
    {
      "epoch": 0.5776544351937506,
      "grad_norm": 0.2126510590314865,
      "learning_rate": 8.390605832990055e-05,
      "loss": 0.7362,
      "step": 2810
    },
    {
      "epoch": 0.5778600061671292,
      "grad_norm": 0.1384701430797577,
      "learning_rate": 8.390103837976373e-05,
      "loss": 0.5919,
      "step": 2811
    },
    {
      "epoch": 0.5780655771405078,
      "grad_norm": 0.20149080455303192,
      "learning_rate": 8.389601651315454e-05,
      "loss": 0.6609,
      "step": 2812
    },
    {
      "epoch": 0.5782711481138864,
      "grad_norm": 0.13342009484767914,
      "learning_rate": 8.389099273032045e-05,
      "loss": 0.5691,
      "step": 2813
    },
    {
      "epoch": 0.5784767190872648,
      "grad_norm": 0.20240166783332825,
      "learning_rate": 8.38859670315089e-05,
      "loss": 0.6667,
      "step": 2814
    },
    {
      "epoch": 0.5786822900606434,
      "grad_norm": 0.14066733419895172,
      "learning_rate": 8.388093941696752e-05,
      "loss": 0.5841,
      "step": 2815
    },
    {
      "epoch": 0.578887861034022,
      "grad_norm": 0.20561981201171875,
      "learning_rate": 8.387590988694398e-05,
      "loss": 0.6808,
      "step": 2816
    },
    {
      "epoch": 0.5790934320074006,
      "grad_norm": 0.19909094274044037,
      "learning_rate": 8.387087844168607e-05,
      "loss": 0.6827,
      "step": 2817
    },
    {
      "epoch": 0.5792990029807791,
      "grad_norm": 0.19748428463935852,
      "learning_rate": 8.386584508144166e-05,
      "loss": 0.6952,
      "step": 2818
    },
    {
      "epoch": 0.5795045739541577,
      "grad_norm": 0.203225240111351,
      "learning_rate": 8.386080980645872e-05,
      "loss": 0.711,
      "step": 2819
    },
    {
      "epoch": 0.5797101449275363,
      "grad_norm": 0.20350880920886993,
      "learning_rate": 8.385577261698531e-05,
      "loss": 0.6672,
      "step": 2820
    },
    {
      "epoch": 0.5799157159009148,
      "grad_norm": 0.19929729402065277,
      "learning_rate": 8.385073351326959e-05,
      "loss": 0.6749,
      "step": 2821
    },
    {
      "epoch": 0.5801212868742933,
      "grad_norm": 0.20175184309482574,
      "learning_rate": 8.384569249555983e-05,
      "loss": 0.6931,
      "step": 2822
    },
    {
      "epoch": 0.5803268578476719,
      "grad_norm": 0.18173432350158691,
      "learning_rate": 8.384064956410437e-05,
      "loss": 0.5901,
      "step": 2823
    },
    {
      "epoch": 0.5805324288210505,
      "grad_norm": 0.21010646224021912,
      "learning_rate": 8.383560471915162e-05,
      "loss": 0.6967,
      "step": 2824
    },
    {
      "epoch": 0.580737999794429,
      "grad_norm": 0.2225627601146698,
      "learning_rate": 8.383055796095018e-05,
      "loss": 0.7137,
      "step": 2825
    },
    {
      "epoch": 0.5809435707678076,
      "grad_norm": 0.19758129119873047,
      "learning_rate": 8.382550928974862e-05,
      "loss": 0.6991,
      "step": 2826
    },
    {
      "epoch": 0.5811491417411861,
      "grad_norm": 0.19794224202632904,
      "learning_rate": 8.382045870579569e-05,
      "loss": 0.6759,
      "step": 2827
    },
    {
      "epoch": 0.5813547127145647,
      "grad_norm": 0.20339448750019073,
      "learning_rate": 8.38154062093402e-05,
      "loss": 0.6621,
      "step": 2828
    },
    {
      "epoch": 0.5815602836879432,
      "grad_norm": 0.19173693656921387,
      "learning_rate": 8.381035180063107e-05,
      "loss": 0.6821,
      "step": 2829
    },
    {
      "epoch": 0.5817658546613218,
      "grad_norm": 0.1988253891468048,
      "learning_rate": 8.380529547991732e-05,
      "loss": 0.6803,
      "step": 2830
    },
    {
      "epoch": 0.5819714256347004,
      "grad_norm": 0.2126402109861374,
      "learning_rate": 8.380023724744802e-05,
      "loss": 0.6765,
      "step": 2831
    },
    {
      "epoch": 0.582176996608079,
      "grad_norm": 0.20873717963695526,
      "learning_rate": 8.379517710347238e-05,
      "loss": 0.6801,
      "step": 2832
    },
    {
      "epoch": 0.5823825675814575,
      "grad_norm": 0.1995771825313568,
      "learning_rate": 8.379011504823973e-05,
      "loss": 0.6837,
      "step": 2833
    },
    {
      "epoch": 0.582588138554836,
      "grad_norm": 0.9173756241798401,
      "learning_rate": 8.378505108199937e-05,
      "loss": 0.7294,
      "step": 2834
    },
    {
      "epoch": 0.5827937095282146,
      "grad_norm": 0.20103541016578674,
      "learning_rate": 8.377998520500086e-05,
      "loss": 0.6703,
      "step": 2835
    },
    {
      "epoch": 0.5829992805015932,
      "grad_norm": 0.20115043222904205,
      "learning_rate": 8.377491741749371e-05,
      "loss": 0.6794,
      "step": 2836
    },
    {
      "epoch": 0.5832048514749717,
      "grad_norm": 0.2085791677236557,
      "learning_rate": 8.376984771972763e-05,
      "loss": 0.6799,
      "step": 2837
    },
    {
      "epoch": 0.5834104224483503,
      "grad_norm": 0.2213800698518753,
      "learning_rate": 8.376477611195234e-05,
      "loss": 0.7313,
      "step": 2838
    },
    {
      "epoch": 0.5836159934217289,
      "grad_norm": 0.2140512466430664,
      "learning_rate": 8.375970259441773e-05,
      "loss": 0.693,
      "step": 2839
    },
    {
      "epoch": 0.5838215643951075,
      "grad_norm": 0.20790469646453857,
      "learning_rate": 8.375462716737375e-05,
      "loss": 0.6993,
      "step": 2840
    },
    {
      "epoch": 0.5840271353684859,
      "grad_norm": 0.2115468680858612,
      "learning_rate": 8.374954983107042e-05,
      "loss": 0.687,
      "step": 2841
    },
    {
      "epoch": 0.5842327063418645,
      "grad_norm": 0.21003267168998718,
      "learning_rate": 8.374447058575786e-05,
      "loss": 0.7148,
      "step": 2842
    },
    {
      "epoch": 0.5844382773152431,
      "grad_norm": 0.21963387727737427,
      "learning_rate": 8.373938943168635e-05,
      "loss": 0.6821,
      "step": 2843
    },
    {
      "epoch": 0.5846438482886216,
      "grad_norm": 0.20493534207344055,
      "learning_rate": 8.373430636910619e-05,
      "loss": 0.6842,
      "step": 2844
    },
    {
      "epoch": 0.5848494192620002,
      "grad_norm": 0.20353847742080688,
      "learning_rate": 8.37292213982678e-05,
      "loss": 0.6853,
      "step": 2845
    },
    {
      "epoch": 0.5850549902353788,
      "grad_norm": 0.17759917676448822,
      "learning_rate": 8.372413451942168e-05,
      "loss": 0.581,
      "step": 2846
    },
    {
      "epoch": 0.5852605612087574,
      "grad_norm": 0.14481404423713684,
      "learning_rate": 8.371904573281845e-05,
      "loss": 0.5929,
      "step": 2847
    },
    {
      "epoch": 0.5854661321821358,
      "grad_norm": 0.1454802304506302,
      "learning_rate": 8.371395503870882e-05,
      "loss": 0.5616,
      "step": 2848
    },
    {
      "epoch": 0.5856717031555144,
      "grad_norm": 0.24941618740558624,
      "learning_rate": 8.370886243734358e-05,
      "loss": 0.6982,
      "step": 2849
    },
    {
      "epoch": 0.585877274128893,
      "grad_norm": 0.21928314864635468,
      "learning_rate": 8.370376792897359e-05,
      "loss": 0.6931,
      "step": 2850
    },
    {
      "epoch": 0.5860828451022716,
      "grad_norm": 0.20207005739212036,
      "learning_rate": 8.369867151384987e-05,
      "loss": 0.6671,
      "step": 2851
    },
    {
      "epoch": 0.5862884160756501,
      "grad_norm": 0.22684946656227112,
      "learning_rate": 8.369357319222348e-05,
      "loss": 0.6684,
      "step": 2852
    },
    {
      "epoch": 0.5864939870490287,
      "grad_norm": 0.21584348380565643,
      "learning_rate": 8.368847296434557e-05,
      "loss": 0.7032,
      "step": 2853
    },
    {
      "epoch": 0.5866995580224073,
      "grad_norm": 0.209476038813591,
      "learning_rate": 8.368337083046747e-05,
      "loss": 0.6804,
      "step": 2854
    },
    {
      "epoch": 0.5869051289957858,
      "grad_norm": 0.22032958269119263,
      "learning_rate": 8.367826679084046e-05,
      "loss": 0.6868,
      "step": 2855
    },
    {
      "epoch": 0.5871106999691643,
      "grad_norm": 0.21995702385902405,
      "learning_rate": 8.367316084571603e-05,
      "loss": 0.6975,
      "step": 2856
    },
    {
      "epoch": 0.5873162709425429,
      "grad_norm": 0.20626819133758545,
      "learning_rate": 8.366805299534574e-05,
      "loss": 0.7272,
      "step": 2857
    },
    {
      "epoch": 0.5875218419159215,
      "grad_norm": 0.2072131335735321,
      "learning_rate": 8.36629432399812e-05,
      "loss": 0.6957,
      "step": 2858
    },
    {
      "epoch": 0.5877274128893001,
      "grad_norm": 0.21286934614181519,
      "learning_rate": 8.365783157987416e-05,
      "loss": 0.7193,
      "step": 2859
    },
    {
      "epoch": 0.5879329838626786,
      "grad_norm": 0.20594240725040436,
      "learning_rate": 8.365271801527644e-05,
      "loss": 0.6996,
      "step": 2860
    },
    {
      "epoch": 0.5881385548360571,
      "grad_norm": 0.20829501748085022,
      "learning_rate": 8.364760254643997e-05,
      "loss": 0.6832,
      "step": 2861
    },
    {
      "epoch": 0.5883441258094357,
      "grad_norm": 0.2092822641134262,
      "learning_rate": 8.364248517361676e-05,
      "loss": 0.7114,
      "step": 2862
    },
    {
      "epoch": 0.5885496967828142,
      "grad_norm": 0.19926267862319946,
      "learning_rate": 8.363736589705892e-05,
      "loss": 0.6744,
      "step": 2863
    },
    {
      "epoch": 0.5887552677561928,
      "grad_norm": 0.20233862102031708,
      "learning_rate": 8.363224471701866e-05,
      "loss": 0.69,
      "step": 2864
    },
    {
      "epoch": 0.5889608387295714,
      "grad_norm": 0.2081189900636673,
      "learning_rate": 8.362712163374826e-05,
      "loss": 0.7025,
      "step": 2865
    },
    {
      "epoch": 0.58916640970295,
      "grad_norm": 0.19669431447982788,
      "learning_rate": 8.362199664750012e-05,
      "loss": 0.6796,
      "step": 2866
    },
    {
      "epoch": 0.5893719806763285,
      "grad_norm": 0.20693160593509674,
      "learning_rate": 8.361686975852672e-05,
      "loss": 0.6996,
      "step": 2867
    },
    {
      "epoch": 0.589577551649707,
      "grad_norm": 0.20690032839775085,
      "learning_rate": 8.361174096708066e-05,
      "loss": 0.6977,
      "step": 2868
    },
    {
      "epoch": 0.5897831226230856,
      "grad_norm": 0.19090650975704193,
      "learning_rate": 8.360661027341459e-05,
      "loss": 0.6905,
      "step": 2869
    },
    {
      "epoch": 0.5899886935964642,
      "grad_norm": 0.1915200799703598,
      "learning_rate": 8.360147767778126e-05,
      "loss": 0.6921,
      "step": 2870
    },
    {
      "epoch": 0.5901942645698427,
      "grad_norm": 0.20431163907051086,
      "learning_rate": 8.359634318043356e-05,
      "loss": 0.6816,
      "step": 2871
    },
    {
      "epoch": 0.5903998355432213,
      "grad_norm": 0.20922903716564178,
      "learning_rate": 8.359120678162442e-05,
      "loss": 0.7141,
      "step": 2872
    },
    {
      "epoch": 0.5906054065165999,
      "grad_norm": 0.20200544595718384,
      "learning_rate": 8.358606848160692e-05,
      "loss": 0.6883,
      "step": 2873
    },
    {
      "epoch": 0.5908109774899785,
      "grad_norm": 0.22084182500839233,
      "learning_rate": 8.358092828063416e-05,
      "loss": 0.5962,
      "step": 2874
    },
    {
      "epoch": 0.5910165484633569,
      "grad_norm": 0.19920572638511658,
      "learning_rate": 8.357578617895939e-05,
      "loss": 0.6921,
      "step": 2875
    },
    {
      "epoch": 0.5912221194367355,
      "grad_norm": 0.21406704187393188,
      "learning_rate": 8.357064217683593e-05,
      "loss": 0.6809,
      "step": 2876
    },
    {
      "epoch": 0.5914276904101141,
      "grad_norm": 0.20186960697174072,
      "learning_rate": 8.356549627451723e-05,
      "loss": 0.7273,
      "step": 2877
    },
    {
      "epoch": 0.5916332613834927,
      "grad_norm": 0.20613306760787964,
      "learning_rate": 8.356034847225677e-05,
      "loss": 0.6998,
      "step": 2878
    },
    {
      "epoch": 0.5918388323568712,
      "grad_norm": 0.19980058073997498,
      "learning_rate": 8.355519877030818e-05,
      "loss": 0.6707,
      "step": 2879
    },
    {
      "epoch": 0.5920444033302498,
      "grad_norm": 0.17572249472141266,
      "learning_rate": 8.355004716892514e-05,
      "loss": 0.5905,
      "step": 2880
    },
    {
      "epoch": 0.5922499743036284,
      "grad_norm": 0.14615419507026672,
      "learning_rate": 8.354489366836147e-05,
      "loss": 0.5936,
      "step": 2881
    },
    {
      "epoch": 0.5924555452770068,
      "grad_norm": 0.265011191368103,
      "learning_rate": 8.353973826887105e-05,
      "loss": 0.7195,
      "step": 2882
    },
    {
      "epoch": 0.5926611162503854,
      "grad_norm": 0.22780616581439972,
      "learning_rate": 8.353458097070784e-05,
      "loss": 0.7003,
      "step": 2883
    },
    {
      "epoch": 0.592866687223764,
      "grad_norm": 0.2108001857995987,
      "learning_rate": 8.352942177412594e-05,
      "loss": 0.6791,
      "step": 2884
    },
    {
      "epoch": 0.5930722581971426,
      "grad_norm": 0.23062892258167267,
      "learning_rate": 8.352426067937953e-05,
      "loss": 0.7012,
      "step": 2885
    },
    {
      "epoch": 0.5932778291705211,
      "grad_norm": 0.22096315026283264,
      "learning_rate": 8.351909768672286e-05,
      "loss": 0.6848,
      "step": 2886
    },
    {
      "epoch": 0.5934834001438997,
      "grad_norm": 0.19417156279087067,
      "learning_rate": 8.351393279641026e-05,
      "loss": 0.6041,
      "step": 2887
    },
    {
      "epoch": 0.5936889711172783,
      "grad_norm": 0.21793076395988464,
      "learning_rate": 8.350876600869624e-05,
      "loss": 0.6832,
      "step": 2888
    },
    {
      "epoch": 0.5938945420906568,
      "grad_norm": 0.21608784794807434,
      "learning_rate": 8.350359732383528e-05,
      "loss": 0.693,
      "step": 2889
    },
    {
      "epoch": 0.5941001130640353,
      "grad_norm": 0.1427665799856186,
      "learning_rate": 8.349842674208205e-05,
      "loss": 0.6014,
      "step": 2890
    },
    {
      "epoch": 0.5943056840374139,
      "grad_norm": 0.21171724796295166,
      "learning_rate": 8.349325426369129e-05,
      "loss": 0.7155,
      "step": 2891
    },
    {
      "epoch": 0.5945112550107925,
      "grad_norm": 0.20547601580619812,
      "learning_rate": 8.348807988891778e-05,
      "loss": 0.6879,
      "step": 2892
    },
    {
      "epoch": 0.5947168259841711,
      "grad_norm": 0.20329566299915314,
      "learning_rate": 8.34829036180165e-05,
      "loss": 0.6956,
      "step": 2893
    },
    {
      "epoch": 0.5949223969575496,
      "grad_norm": 0.19427530467510223,
      "learning_rate": 8.347772545124241e-05,
      "loss": 0.6853,
      "step": 2894
    },
    {
      "epoch": 0.5951279679309281,
      "grad_norm": 0.19844532012939453,
      "learning_rate": 8.347254538885063e-05,
      "loss": 0.6805,
      "step": 2895
    },
    {
      "epoch": 0.5953335389043067,
      "grad_norm": 0.20042115449905396,
      "learning_rate": 8.346736343109637e-05,
      "loss": 0.6648,
      "step": 2896
    },
    {
      "epoch": 0.5955391098776853,
      "grad_norm": 0.1955205500125885,
      "learning_rate": 8.34621795782349e-05,
      "loss": 0.6676,
      "step": 2897
    },
    {
      "epoch": 0.5957446808510638,
      "grad_norm": 0.19705745577812195,
      "learning_rate": 8.345699383052162e-05,
      "loss": 0.6857,
      "step": 2898
    },
    {
      "epoch": 0.5959502518244424,
      "grad_norm": 0.19771529734134674,
      "learning_rate": 8.3451806188212e-05,
      "loss": 0.6992,
      "step": 2899
    },
    {
      "epoch": 0.596155822797821,
      "grad_norm": 0.1999768763780594,
      "learning_rate": 8.344661665156161e-05,
      "loss": 0.7006,
      "step": 2900
    },
    {
      "epoch": 0.5963613937711995,
      "grad_norm": 0.2035917341709137,
      "learning_rate": 8.344142522082612e-05,
      "loss": 0.7032,
      "step": 2901
    },
    {
      "epoch": 0.596566964744578,
      "grad_norm": 0.20297078788280487,
      "learning_rate": 8.343623189626129e-05,
      "loss": 0.681,
      "step": 2902
    },
    {
      "epoch": 0.5967725357179566,
      "grad_norm": 0.17843900620937347,
      "learning_rate": 8.343103667812295e-05,
      "loss": 0.5906,
      "step": 2903
    },
    {
      "epoch": 0.5969781066913352,
      "grad_norm": 0.2069201022386551,
      "learning_rate": 8.342583956666706e-05,
      "loss": 0.7137,
      "step": 2904
    },
    {
      "epoch": 0.5971836776647137,
      "grad_norm": 0.20919117331504822,
      "learning_rate": 8.342064056214967e-05,
      "loss": 0.6923,
      "step": 2905
    },
    {
      "epoch": 0.5973892486380923,
      "grad_norm": 0.1899642050266266,
      "learning_rate": 8.34154396648269e-05,
      "loss": 0.668,
      "step": 2906
    },
    {
      "epoch": 0.5975948196114709,
      "grad_norm": 0.1988193541765213,
      "learning_rate": 8.341023687495494e-05,
      "loss": 0.676,
      "step": 2907
    },
    {
      "epoch": 0.5978003905848495,
      "grad_norm": 0.21733912825584412,
      "learning_rate": 8.340503219279017e-05,
      "loss": 0.6999,
      "step": 2908
    },
    {
      "epoch": 0.5980059615582279,
      "grad_norm": 0.20647762715816498,
      "learning_rate": 8.339982561858896e-05,
      "loss": 0.694,
      "step": 2909
    },
    {
      "epoch": 0.5982115325316065,
      "grad_norm": 0.19566026329994202,
      "learning_rate": 8.339461715260781e-05,
      "loss": 0.6716,
      "step": 2910
    },
    {
      "epoch": 0.5984171035049851,
      "grad_norm": 0.2015964686870575,
      "learning_rate": 8.338940679510334e-05,
      "loss": 0.6869,
      "step": 2911
    },
    {
      "epoch": 0.5986226744783637,
      "grad_norm": 0.1712951958179474,
      "learning_rate": 8.338419454633224e-05,
      "loss": 0.5902,
      "step": 2912
    },
    {
      "epoch": 0.5988282454517422,
      "grad_norm": 0.13849389553070068,
      "learning_rate": 8.337898040655126e-05,
      "loss": 0.5992,
      "step": 2913
    },
    {
      "epoch": 0.5990338164251208,
      "grad_norm": 0.2373506873846054,
      "learning_rate": 8.33737643760173e-05,
      "loss": 0.6881,
      "step": 2914
    },
    {
      "epoch": 0.5992393873984994,
      "grad_norm": 0.2165384441614151,
      "learning_rate": 8.336854645498734e-05,
      "loss": 0.6805,
      "step": 2915
    },
    {
      "epoch": 0.599444958371878,
      "grad_norm": 0.21156401932239532,
      "learning_rate": 8.336332664371843e-05,
      "loss": 0.6781,
      "step": 2916
    },
    {
      "epoch": 0.5996505293452564,
      "grad_norm": 0.22182904183864594,
      "learning_rate": 8.335810494246772e-05,
      "loss": 0.7046,
      "step": 2917
    },
    {
      "epoch": 0.599856100318635,
      "grad_norm": 0.21610800921916962,
      "learning_rate": 8.335288135149246e-05,
      "loss": 0.7223,
      "step": 2918
    },
    {
      "epoch": 0.6000616712920136,
      "grad_norm": 0.21809829771518707,
      "learning_rate": 8.334765587105002e-05,
      "loss": 0.6088,
      "step": 2919
    },
    {
      "epoch": 0.6002672422653921,
      "grad_norm": 0.22887369990348816,
      "learning_rate": 8.334242850139779e-05,
      "loss": 0.6901,
      "step": 2920
    },
    {
      "epoch": 0.6004728132387707,
      "grad_norm": 0.22057749330997467,
      "learning_rate": 8.333719924279332e-05,
      "loss": 0.5969,
      "step": 2921
    },
    {
      "epoch": 0.6006783842121493,
      "grad_norm": 0.2292318344116211,
      "learning_rate": 8.333196809549422e-05,
      "loss": 0.6893,
      "step": 2922
    },
    {
      "epoch": 0.6008839551855278,
      "grad_norm": 0.15525048971176147,
      "learning_rate": 8.332673505975825e-05,
      "loss": 0.5925,
      "step": 2923
    },
    {
      "epoch": 0.6010895261589063,
      "grad_norm": 0.21504151821136475,
      "learning_rate": 8.332150013584315e-05,
      "loss": 0.678,
      "step": 2924
    },
    {
      "epoch": 0.6012950971322849,
      "grad_norm": 0.21480882167816162,
      "learning_rate": 8.331626332400689e-05,
      "loss": 0.6897,
      "step": 2925
    },
    {
      "epoch": 0.6015006681056635,
      "grad_norm": 0.14146551489830017,
      "learning_rate": 8.331102462450738e-05,
      "loss": 0.5684,
      "step": 2926
    },
    {
      "epoch": 0.6017062390790421,
      "grad_norm": 0.23041875660419464,
      "learning_rate": 8.330578403760277e-05,
      "loss": 0.6994,
      "step": 2927
    },
    {
      "epoch": 0.6019118100524206,
      "grad_norm": 0.20731528103351593,
      "learning_rate": 8.330054156355124e-05,
      "loss": 0.6792,
      "step": 2928
    },
    {
      "epoch": 0.6021173810257991,
      "grad_norm": 0.19797998666763306,
      "learning_rate": 8.329529720261103e-05,
      "loss": 0.6951,
      "step": 2929
    },
    {
      "epoch": 0.6023229519991777,
      "grad_norm": 0.2016698569059372,
      "learning_rate": 8.32900509550405e-05,
      "loss": 0.6833,
      "step": 2930
    },
    {
      "epoch": 0.6025285229725563,
      "grad_norm": 0.20054802298545837,
      "learning_rate": 8.328480282109816e-05,
      "loss": 0.6842,
      "step": 2931
    },
    {
      "epoch": 0.6027340939459348,
      "grad_norm": 0.19949203729629517,
      "learning_rate": 8.32795528010425e-05,
      "loss": 0.691,
      "step": 2932
    },
    {
      "epoch": 0.6029396649193134,
      "grad_norm": 0.17907802760601044,
      "learning_rate": 8.32743008951322e-05,
      "loss": 0.5825,
      "step": 2933
    },
    {
      "epoch": 0.603145235892692,
      "grad_norm": 0.2004586011171341,
      "learning_rate": 8.326904710362599e-05,
      "loss": 0.6639,
      "step": 2934
    },
    {
      "epoch": 0.6033508068660706,
      "grad_norm": 0.21539311110973358,
      "learning_rate": 8.32637914267827e-05,
      "loss": 0.6948,
      "step": 2935
    },
    {
      "epoch": 0.603556377839449,
      "grad_norm": 0.20301540195941925,
      "learning_rate": 8.325853386486126e-05,
      "loss": 0.7028,
      "step": 2936
    },
    {
      "epoch": 0.6037619488128276,
      "grad_norm": 0.19219626486301422,
      "learning_rate": 8.325327441812067e-05,
      "loss": 0.6727,
      "step": 2937
    },
    {
      "epoch": 0.6039675197862062,
      "grad_norm": 0.20149052143096924,
      "learning_rate": 8.324801308682004e-05,
      "loss": 0.6887,
      "step": 2938
    },
    {
      "epoch": 0.6041730907595847,
      "grad_norm": 0.20644250512123108,
      "learning_rate": 8.324274987121857e-05,
      "loss": 0.6764,
      "step": 2939
    },
    {
      "epoch": 0.6043786617329633,
      "grad_norm": 0.20564045011997223,
      "learning_rate": 8.323748477157557e-05,
      "loss": 0.6912,
      "step": 2940
    },
    {
      "epoch": 0.6045842327063419,
      "grad_norm": 0.18564823269844055,
      "learning_rate": 8.323221778815042e-05,
      "loss": 0.564,
      "step": 2941
    },
    {
      "epoch": 0.6047898036797205,
      "grad_norm": 0.2087641954421997,
      "learning_rate": 8.32269489212026e-05,
      "loss": 0.6865,
      "step": 2942
    },
    {
      "epoch": 0.6049953746530989,
      "grad_norm": 0.13221989572048187,
      "learning_rate": 8.322167817099166e-05,
      "loss": 0.5906,
      "step": 2943
    },
    {
      "epoch": 0.6052009456264775,
      "grad_norm": 0.13168349862098694,
      "learning_rate": 8.32164055377773e-05,
      "loss": 0.6099,
      "step": 2944
    },
    {
      "epoch": 0.6054065165998561,
      "grad_norm": 0.21939873695373535,
      "learning_rate": 8.321113102181925e-05,
      "loss": 0.6936,
      "step": 2945
    },
    {
      "epoch": 0.6056120875732347,
      "grad_norm": 0.21064333617687225,
      "learning_rate": 8.320585462337738e-05,
      "loss": 0.6805,
      "step": 2946
    },
    {
      "epoch": 0.6058176585466132,
      "grad_norm": 0.21517851948738098,
      "learning_rate": 8.320057634271162e-05,
      "loss": 0.6941,
      "step": 2947
    },
    {
      "epoch": 0.6060232295199918,
      "grad_norm": 0.19427655637264252,
      "learning_rate": 8.319529618008203e-05,
      "loss": 0.6989,
      "step": 2948
    },
    {
      "epoch": 0.6062288004933704,
      "grad_norm": 0.20321017503738403,
      "learning_rate": 8.31900141357487e-05,
      "loss": 0.6775,
      "step": 2949
    },
    {
      "epoch": 0.606434371466749,
      "grad_norm": 0.2060307115316391,
      "learning_rate": 8.318473020997188e-05,
      "loss": 0.712,
      "step": 2950
    },
    {
      "epoch": 0.6066399424401274,
      "grad_norm": 0.16920985281467438,
      "learning_rate": 8.317944440301188e-05,
      "loss": 0.5975,
      "step": 2951
    },
    {
      "epoch": 0.606845513413506,
      "grad_norm": 0.2233453392982483,
      "learning_rate": 8.31741567151291e-05,
      "loss": 0.6985,
      "step": 2952
    },
    {
      "epoch": 0.6070510843868846,
      "grad_norm": 0.21463671326637268,
      "learning_rate": 8.316886714658406e-05,
      "loss": 0.6661,
      "step": 2953
    },
    {
      "epoch": 0.6072566553602631,
      "grad_norm": 0.1969480812549591,
      "learning_rate": 8.316357569763732e-05,
      "loss": 0.7273,
      "step": 2954
    },
    {
      "epoch": 0.6074622263336417,
      "grad_norm": 0.17153163254261017,
      "learning_rate": 8.315828236854958e-05,
      "loss": 0.6041,
      "step": 2955
    },
    {
      "epoch": 0.6076677973070203,
      "grad_norm": 0.21503044664859772,
      "learning_rate": 8.315298715958165e-05,
      "loss": 0.6841,
      "step": 2956
    },
    {
      "epoch": 0.6078733682803988,
      "grad_norm": 0.2050783485174179,
      "learning_rate": 8.314769007099433e-05,
      "loss": 0.6952,
      "step": 2957
    },
    {
      "epoch": 0.6080789392537773,
      "grad_norm": 0.20447179675102234,
      "learning_rate": 8.314239110304864e-05,
      "loss": 0.7027,
      "step": 2958
    },
    {
      "epoch": 0.6082845102271559,
      "grad_norm": 0.20713284611701965,
      "learning_rate": 8.313709025600562e-05,
      "loss": 0.7172,
      "step": 2959
    },
    {
      "epoch": 0.6084900812005345,
      "grad_norm": 0.20058241486549377,
      "learning_rate": 8.31317875301264e-05,
      "loss": 0.6904,
      "step": 2960
    },
    {
      "epoch": 0.6086956521739131,
      "grad_norm": 0.19999080896377563,
      "learning_rate": 8.312648292567226e-05,
      "loss": 0.7054,
      "step": 2961
    },
    {
      "epoch": 0.6089012231472916,
      "grad_norm": 0.20129017531871796,
      "learning_rate": 8.31211764429045e-05,
      "loss": 0.6781,
      "step": 2962
    },
    {
      "epoch": 0.6091067941206701,
      "grad_norm": 0.2048570066690445,
      "learning_rate": 8.311586808208453e-05,
      "loss": 0.6995,
      "step": 2963
    },
    {
      "epoch": 0.6093123650940487,
      "grad_norm": 0.20518624782562256,
      "learning_rate": 8.311055784347392e-05,
      "loss": 0.6856,
      "step": 2964
    },
    {
      "epoch": 0.6095179360674273,
      "grad_norm": 0.14647917449474335,
      "learning_rate": 8.310524572733424e-05,
      "loss": 0.6034,
      "step": 2965
    },
    {
      "epoch": 0.6097235070408058,
      "grad_norm": 0.2090081423521042,
      "learning_rate": 8.309993173392722e-05,
      "loss": 0.6738,
      "step": 2966
    },
    {
      "epoch": 0.6099290780141844,
      "grad_norm": 0.13404381275177002,
      "learning_rate": 8.309461586351463e-05,
      "loss": 0.59,
      "step": 2967
    },
    {
      "epoch": 0.610134648987563,
      "grad_norm": 0.20760053396224976,
      "learning_rate": 8.308929811635837e-05,
      "loss": 0.7076,
      "step": 2968
    },
    {
      "epoch": 0.6103402199609416,
      "grad_norm": 0.2022329717874527,
      "learning_rate": 8.308397849272043e-05,
      "loss": 0.6992,
      "step": 2969
    },
    {
      "epoch": 0.61054579093432,
      "grad_norm": 0.20392966270446777,
      "learning_rate": 8.307865699286287e-05,
      "loss": 0.7017,
      "step": 2970
    },
    {
      "epoch": 0.6107513619076986,
      "grad_norm": 0.14375483989715576,
      "learning_rate": 8.307333361704786e-05,
      "loss": 0.5902,
      "step": 2971
    },
    {
      "epoch": 0.6109569328810772,
      "grad_norm": 0.20196297764778137,
      "learning_rate": 8.306800836553766e-05,
      "loss": 0.686,
      "step": 2972
    },
    {
      "epoch": 0.6111625038544557,
      "grad_norm": 0.23178908228874207,
      "learning_rate": 8.306268123859461e-05,
      "loss": 0.7128,
      "step": 2973
    },
    {
      "epoch": 0.6113680748278343,
      "grad_norm": 0.14498086273670197,
      "learning_rate": 8.305735223648117e-05,
      "loss": 0.5783,
      "step": 2974
    },
    {
      "epoch": 0.6115736458012129,
      "grad_norm": 0.21291960775852203,
      "learning_rate": 8.305202135945985e-05,
      "loss": 0.6836,
      "step": 2975
    },
    {
      "epoch": 0.6117792167745915,
      "grad_norm": 0.20154601335525513,
      "learning_rate": 8.30466886077933e-05,
      "loss": 0.6775,
      "step": 2976
    },
    {
      "epoch": 0.6119847877479699,
      "grad_norm": 0.1371108442544937,
      "learning_rate": 8.304135398174423e-05,
      "loss": 0.6029,
      "step": 2977
    },
    {
      "epoch": 0.6121903587213485,
      "grad_norm": 0.20939522981643677,
      "learning_rate": 8.303601748157545e-05,
      "loss": 0.7016,
      "step": 2978
    },
    {
      "epoch": 0.6123959296947271,
      "grad_norm": 0.1982061266899109,
      "learning_rate": 8.303067910754988e-05,
      "loss": 0.6724,
      "step": 2979
    },
    {
      "epoch": 0.6126015006681057,
      "grad_norm": 0.19184644520282745,
      "learning_rate": 8.302533885993051e-05,
      "loss": 0.6766,
      "step": 2980
    },
    {
      "epoch": 0.6128070716414842,
      "grad_norm": 0.1973457783460617,
      "learning_rate": 8.30199967389804e-05,
      "loss": 0.701,
      "step": 2981
    },
    {
      "epoch": 0.6130126426148628,
      "grad_norm": 0.23462116718292236,
      "learning_rate": 8.301465274496278e-05,
      "loss": 0.7119,
      "step": 2982
    },
    {
      "epoch": 0.6132182135882414,
      "grad_norm": 0.1940578669309616,
      "learning_rate": 8.300930687814089e-05,
      "loss": 0.6935,
      "step": 2983
    },
    {
      "epoch": 0.61342378456162,
      "grad_norm": 0.20462383329868317,
      "learning_rate": 8.30039591387781e-05,
      "loss": 0.7066,
      "step": 2984
    },
    {
      "epoch": 0.6136293555349984,
      "grad_norm": 0.1943095475435257,
      "learning_rate": 8.299860952713788e-05,
      "loss": 0.6764,
      "step": 2985
    },
    {
      "epoch": 0.613834926508377,
      "grad_norm": 0.18959608674049377,
      "learning_rate": 8.299325804348377e-05,
      "loss": 0.6501,
      "step": 2986
    },
    {
      "epoch": 0.6140404974817556,
      "grad_norm": 0.2010001540184021,
      "learning_rate": 8.298790468807941e-05,
      "loss": 0.6819,
      "step": 2987
    },
    {
      "epoch": 0.6142460684551342,
      "grad_norm": 0.20373772084712982,
      "learning_rate": 8.298254946118856e-05,
      "loss": 0.6776,
      "step": 2988
    },
    {
      "epoch": 0.6144516394285127,
      "grad_norm": 0.19308720529079437,
      "learning_rate": 8.2977192363075e-05,
      "loss": 0.6825,
      "step": 2989
    },
    {
      "epoch": 0.6146572104018913,
      "grad_norm": 0.19244827330112457,
      "learning_rate": 8.297183339400271e-05,
      "loss": 0.6819,
      "step": 2990
    },
    {
      "epoch": 0.6148627813752698,
      "grad_norm": 0.19886994361877441,
      "learning_rate": 8.296647255423566e-05,
      "loss": 0.6907,
      "step": 2991
    },
    {
      "epoch": 0.6150683523486483,
      "grad_norm": 0.194062277674675,
      "learning_rate": 8.296110984403794e-05,
      "loss": 0.6725,
      "step": 2992
    },
    {
      "epoch": 0.6152739233220269,
      "grad_norm": 0.19105246663093567,
      "learning_rate": 8.295574526367379e-05,
      "loss": 0.6895,
      "step": 2993
    },
    {
      "epoch": 0.6154794942954055,
      "grad_norm": 0.20439203083515167,
      "learning_rate": 8.295037881340746e-05,
      "loss": 0.6997,
      "step": 2994
    },
    {
      "epoch": 0.6156850652687841,
      "grad_norm": 0.2035692036151886,
      "learning_rate": 8.294501049350335e-05,
      "loss": 0.6797,
      "step": 2995
    },
    {
      "epoch": 0.6158906362421626,
      "grad_norm": 0.2011076956987381,
      "learning_rate": 8.293964030422593e-05,
      "loss": 0.6948,
      "step": 2996
    },
    {
      "epoch": 0.6160962072155411,
      "grad_norm": 0.1979755461215973,
      "learning_rate": 8.293426824583977e-05,
      "loss": 0.6984,
      "step": 2997
    },
    {
      "epoch": 0.6163017781889197,
      "grad_norm": 0.20361703634262085,
      "learning_rate": 8.29288943186095e-05,
      "loss": 0.6804,
      "step": 2998
    },
    {
      "epoch": 0.6165073491622983,
      "grad_norm": 0.19313938915729523,
      "learning_rate": 8.29235185227999e-05,
      "loss": 0.7105,
      "step": 2999
    },
    {
      "epoch": 0.6167129201356768,
      "grad_norm": 0.19516946375370026,
      "learning_rate": 8.291814085867579e-05,
      "loss": 0.7015,
      "step": 3000
    },
    {
      "epoch": 0.6169184911090554,
      "grad_norm": 0.19444262981414795,
      "learning_rate": 8.291276132650212e-05,
      "loss": 0.7028,
      "step": 3001
    },
    {
      "epoch": 0.617124062082434,
      "grad_norm": 0.19477610290050507,
      "learning_rate": 8.290737992654389e-05,
      "loss": 0.683,
      "step": 3002
    },
    {
      "epoch": 0.6173296330558126,
      "grad_norm": 0.20169807970523834,
      "learning_rate": 8.290199665906624e-05,
      "loss": 0.6816,
      "step": 3003
    },
    {
      "epoch": 0.617535204029191,
      "grad_norm": 0.1933300644159317,
      "learning_rate": 8.289661152433436e-05,
      "loss": 0.7073,
      "step": 3004
    },
    {
      "epoch": 0.6177407750025696,
      "grad_norm": 0.16266535222530365,
      "learning_rate": 8.289122452261356e-05,
      "loss": 0.5968,
      "step": 3005
    },
    {
      "epoch": 0.6179463459759482,
      "grad_norm": 0.19945891201496124,
      "learning_rate": 8.288583565416924e-05,
      "loss": 0.6826,
      "step": 3006
    },
    {
      "epoch": 0.6181519169493268,
      "grad_norm": 0.1400868445634842,
      "learning_rate": 8.288044491926687e-05,
      "loss": 0.6002,
      "step": 3007
    },
    {
      "epoch": 0.6183574879227053,
      "grad_norm": 0.12712964415550232,
      "learning_rate": 8.287505231817202e-05,
      "loss": 0.5836,
      "step": 3008
    },
    {
      "epoch": 0.6185630588960839,
      "grad_norm": 0.20722496509552002,
      "learning_rate": 8.286965785115038e-05,
      "loss": 0.6821,
      "step": 3009
    },
    {
      "epoch": 0.6187686298694625,
      "grad_norm": 0.1368006467819214,
      "learning_rate": 8.28642615184677e-05,
      "loss": 0.5909,
      "step": 3010
    },
    {
      "epoch": 0.6189742008428409,
      "grad_norm": 0.1366155594587326,
      "learning_rate": 8.285886332038983e-05,
      "loss": 0.5806,
      "step": 3011
    },
    {
      "epoch": 0.6191797718162195,
      "grad_norm": 0.20801199972629547,
      "learning_rate": 8.285346325718272e-05,
      "loss": 0.7111,
      "step": 3012
    },
    {
      "epoch": 0.6193853427895981,
      "grad_norm": 0.19898487627506256,
      "learning_rate": 8.28480613291124e-05,
      "loss": 0.6832,
      "step": 3013
    },
    {
      "epoch": 0.6195909137629767,
      "grad_norm": 0.19258826971054077,
      "learning_rate": 8.284265753644499e-05,
      "loss": 0.6962,
      "step": 3014
    },
    {
      "epoch": 0.6197964847363552,
      "grad_norm": 0.18354789912700653,
      "learning_rate": 8.283725187944674e-05,
      "loss": 0.6807,
      "step": 3015
    },
    {
      "epoch": 0.6200020557097338,
      "grad_norm": 0.15917901694774628,
      "learning_rate": 8.283184435838392e-05,
      "loss": 0.5927,
      "step": 3016
    },
    {
      "epoch": 0.6202076266831124,
      "grad_norm": 0.1983378827571869,
      "learning_rate": 8.282643497352296e-05,
      "loss": 0.6791,
      "step": 3017
    },
    {
      "epoch": 0.620413197656491,
      "grad_norm": 0.20160548388957977,
      "learning_rate": 8.282102372513035e-05,
      "loss": 0.6951,
      "step": 3018
    },
    {
      "epoch": 0.6206187686298694,
      "grad_norm": 0.19742833077907562,
      "learning_rate": 8.281561061347268e-05,
      "loss": 0.6848,
      "step": 3019
    },
    {
      "epoch": 0.620824339603248,
      "grad_norm": 0.19700521230697632,
      "learning_rate": 8.281019563881663e-05,
      "loss": 0.6975,
      "step": 3020
    },
    {
      "epoch": 0.6210299105766266,
      "grad_norm": 0.20055337250232697,
      "learning_rate": 8.280477880142895e-05,
      "loss": 0.6769,
      "step": 3021
    },
    {
      "epoch": 0.6212354815500052,
      "grad_norm": 0.23085735738277435,
      "learning_rate": 8.279936010157653e-05,
      "loss": 0.67,
      "step": 3022
    },
    {
      "epoch": 0.6214410525233837,
      "grad_norm": 0.20529572665691376,
      "learning_rate": 8.279393953952632e-05,
      "loss": 0.6962,
      "step": 3023
    },
    {
      "epoch": 0.6216466234967623,
      "grad_norm": 0.19554628431797028,
      "learning_rate": 8.278851711554532e-05,
      "loss": 0.6853,
      "step": 3024
    },
    {
      "epoch": 0.6218521944701408,
      "grad_norm": 0.1940753012895584,
      "learning_rate": 8.278309282990073e-05,
      "loss": 0.6549,
      "step": 3025
    },
    {
      "epoch": 0.6220577654435194,
      "grad_norm": 0.19746670126914978,
      "learning_rate": 8.277766668285977e-05,
      "loss": 0.6544,
      "step": 3026
    },
    {
      "epoch": 0.6222633364168979,
      "grad_norm": 0.19035373628139496,
      "learning_rate": 8.277223867468971e-05,
      "loss": 0.6773,
      "step": 3027
    },
    {
      "epoch": 0.6224689073902765,
      "grad_norm": 0.19404295086860657,
      "learning_rate": 8.276680880565803e-05,
      "loss": 0.6931,
      "step": 3028
    },
    {
      "epoch": 0.6226744783636551,
      "grad_norm": 0.1988229602575302,
      "learning_rate": 8.276137707603219e-05,
      "loss": 0.6812,
      "step": 3029
    },
    {
      "epoch": 0.6228800493370336,
      "grad_norm": 0.19786033034324646,
      "learning_rate": 8.27559434860798e-05,
      "loss": 0.6733,
      "step": 3030
    },
    {
      "epoch": 0.6230856203104121,
      "grad_norm": 0.19254696369171143,
      "learning_rate": 8.275050803606853e-05,
      "loss": 0.7066,
      "step": 3031
    },
    {
      "epoch": 0.6232911912837907,
      "grad_norm": 0.19956709444522858,
      "learning_rate": 8.274507072626619e-05,
      "loss": 0.681,
      "step": 3032
    },
    {
      "epoch": 0.6234967622571693,
      "grad_norm": 0.19668106734752655,
      "learning_rate": 8.273963155694062e-05,
      "loss": 0.676,
      "step": 3033
    },
    {
      "epoch": 0.6237023332305478,
      "grad_norm": 0.21287435293197632,
      "learning_rate": 8.273419052835981e-05,
      "loss": 0.704,
      "step": 3034
    },
    {
      "epoch": 0.6239079042039264,
      "grad_norm": 2.4127197265625,
      "learning_rate": 8.27287476407918e-05,
      "loss": 0.7001,
      "step": 3035
    },
    {
      "epoch": 0.624113475177305,
      "grad_norm": 0.20844995975494385,
      "learning_rate": 8.272330289450473e-05,
      "loss": 0.6808,
      "step": 3036
    },
    {
      "epoch": 0.6243190461506836,
      "grad_norm": 0.19834044575691223,
      "learning_rate": 8.271785628976686e-05,
      "loss": 0.5957,
      "step": 3037
    },
    {
      "epoch": 0.624524617124062,
      "grad_norm": 0.25713658332824707,
      "learning_rate": 8.271240782684649e-05,
      "loss": 0.6067,
      "step": 3038
    },
    {
      "epoch": 0.6247301880974406,
      "grad_norm": 0.755788266658783,
      "learning_rate": 8.270695750601206e-05,
      "loss": 0.7165,
      "step": 3039
    },
    {
      "epoch": 0.6249357590708192,
      "grad_norm": 0.23070074617862701,
      "learning_rate": 8.270150532753208e-05,
      "loss": 0.7086,
      "step": 3040
    },
    {
      "epoch": 0.6251413300441978,
      "grad_norm": 0.20264309644699097,
      "learning_rate": 8.269605129167514e-05,
      "loss": 0.5804,
      "step": 3041
    },
    {
      "epoch": 0.6253469010175763,
      "grad_norm": 0.25147226452827454,
      "learning_rate": 8.269059539870996e-05,
      "loss": 0.6841,
      "step": 3042
    },
    {
      "epoch": 0.6255524719909549,
      "grad_norm": 0.23628079891204834,
      "learning_rate": 8.268513764890528e-05,
      "loss": 0.7055,
      "step": 3043
    },
    {
      "epoch": 0.6257580429643335,
      "grad_norm": 0.2399078607559204,
      "learning_rate": 8.267967804253003e-05,
      "loss": 0.7238,
      "step": 3044
    },
    {
      "epoch": 0.625963613937712,
      "grad_norm": 0.2208731472492218,
      "learning_rate": 8.267421657985316e-05,
      "loss": 0.6938,
      "step": 3045
    },
    {
      "epoch": 0.6261691849110905,
      "grad_norm": 0.21366935968399048,
      "learning_rate": 8.266875326114372e-05,
      "loss": 0.5907,
      "step": 3046
    },
    {
      "epoch": 0.6263747558844691,
      "grad_norm": 0.22604869306087494,
      "learning_rate": 8.266328808667086e-05,
      "loss": 0.6977,
      "step": 3047
    },
    {
      "epoch": 0.6265803268578477,
      "grad_norm": 0.20610669255256653,
      "learning_rate": 8.265782105670385e-05,
      "loss": 0.6953,
      "step": 3048
    },
    {
      "epoch": 0.6267858978312262,
      "grad_norm": 0.2094089388847351,
      "learning_rate": 8.2652352171512e-05,
      "loss": 0.7114,
      "step": 3049
    },
    {
      "epoch": 0.6269914688046048,
      "grad_norm": 0.20464326441287994,
      "learning_rate": 8.264688143136474e-05,
      "loss": 0.6828,
      "step": 3050
    },
    {
      "epoch": 0.6271970397779834,
      "grad_norm": 0.20458531379699707,
      "learning_rate": 8.26414088365316e-05,
      "loss": 0.7172,
      "step": 3051
    },
    {
      "epoch": 0.6274026107513619,
      "grad_norm": 0.20255166292190552,
      "learning_rate": 8.26359343872822e-05,
      "loss": 0.7034,
      "step": 3052
    },
    {
      "epoch": 0.6276081817247404,
      "grad_norm": 0.20339445769786835,
      "learning_rate": 8.26304580838862e-05,
      "loss": 0.7053,
      "step": 3053
    },
    {
      "epoch": 0.627813752698119,
      "grad_norm": 0.20055994391441345,
      "learning_rate": 8.262497992661342e-05,
      "loss": 0.6917,
      "step": 3054
    },
    {
      "epoch": 0.6280193236714976,
      "grad_norm": 0.17087921500205994,
      "learning_rate": 8.261949991573374e-05,
      "loss": 0.6037,
      "step": 3055
    },
    {
      "epoch": 0.6282248946448762,
      "grad_norm": 0.2011025846004486,
      "learning_rate": 8.261401805151711e-05,
      "loss": 0.6748,
      "step": 3056
    },
    {
      "epoch": 0.6284304656182547,
      "grad_norm": 0.21176697313785553,
      "learning_rate": 8.260853433423366e-05,
      "loss": 0.6784,
      "step": 3057
    },
    {
      "epoch": 0.6286360365916333,
      "grad_norm": 0.2133779078722,
      "learning_rate": 8.260304876415348e-05,
      "loss": 0.7074,
      "step": 3058
    },
    {
      "epoch": 0.6288416075650118,
      "grad_norm": 0.21225228905677795,
      "learning_rate": 8.259756134154685e-05,
      "loss": 0.7336,
      "step": 3059
    },
    {
      "epoch": 0.6290471785383904,
      "grad_norm": 0.16129277646541595,
      "learning_rate": 8.25920720666841e-05,
      "loss": 0.5877,
      "step": 3060
    },
    {
      "epoch": 0.6292527495117689,
      "grad_norm": 0.2276839166879654,
      "learning_rate": 8.258658093983566e-05,
      "loss": 0.6943,
      "step": 3061
    },
    {
      "epoch": 0.6294583204851475,
      "grad_norm": 0.20884232223033905,
      "learning_rate": 8.258108796127206e-05,
      "loss": 0.6802,
      "step": 3062
    },
    {
      "epoch": 0.6296638914585261,
      "grad_norm": 0.21469639241695404,
      "learning_rate": 8.257559313126391e-05,
      "loss": 0.7264,
      "step": 3063
    },
    {
      "epoch": 0.6298694624319047,
      "grad_norm": 0.20983977615833282,
      "learning_rate": 8.257009645008191e-05,
      "loss": 0.7146,
      "step": 3064
    },
    {
      "epoch": 0.6300750334052831,
      "grad_norm": 0.20303663611412048,
      "learning_rate": 8.256459791799687e-05,
      "loss": 0.6593,
      "step": 3065
    },
    {
      "epoch": 0.6302806043786617,
      "grad_norm": 0.20967082679271698,
      "learning_rate": 8.255909753527968e-05,
      "loss": 0.6983,
      "step": 3066
    },
    {
      "epoch": 0.6304861753520403,
      "grad_norm": 0.15247072279453278,
      "learning_rate": 8.255359530220127e-05,
      "loss": 0.6055,
      "step": 3067
    },
    {
      "epoch": 0.6306917463254188,
      "grad_norm": 0.2263472080230713,
      "learning_rate": 8.254809121903276e-05,
      "loss": 0.6934,
      "step": 3068
    },
    {
      "epoch": 0.6308973172987974,
      "grad_norm": 0.22391130030155182,
      "learning_rate": 8.25425852860453e-05,
      "loss": 0.6984,
      "step": 3069
    },
    {
      "epoch": 0.631102888272176,
      "grad_norm": 0.19726432859897614,
      "learning_rate": 8.253707750351013e-05,
      "loss": 0.6938,
      "step": 3070
    },
    {
      "epoch": 0.6313084592455546,
      "grad_norm": 0.2162100374698639,
      "learning_rate": 8.25315678716986e-05,
      "loss": 0.675,
      "step": 3071
    },
    {
      "epoch": 0.631514030218933,
      "grad_norm": 0.2201918661594391,
      "learning_rate": 8.252605639088215e-05,
      "loss": 0.6931,
      "step": 3072
    },
    {
      "epoch": 0.6317196011923116,
      "grad_norm": 0.20799918472766876,
      "learning_rate": 8.25205430613323e-05,
      "loss": 0.6911,
      "step": 3073
    },
    {
      "epoch": 0.6319251721656902,
      "grad_norm": 0.19582496583461761,
      "learning_rate": 8.251502788332066e-05,
      "loss": 0.6763,
      "step": 3074
    },
    {
      "epoch": 0.6321307431390688,
      "grad_norm": 0.2054242044687271,
      "learning_rate": 8.250951085711894e-05,
      "loss": 0.6907,
      "step": 3075
    },
    {
      "epoch": 0.6323363141124473,
      "grad_norm": 0.15331074595451355,
      "learning_rate": 8.250399198299894e-05,
      "loss": 0.5903,
      "step": 3076
    },
    {
      "epoch": 0.6325418850858259,
      "grad_norm": 0.22686253488063812,
      "learning_rate": 8.249847126123253e-05,
      "loss": 0.6944,
      "step": 3077
    },
    {
      "epoch": 0.6327474560592045,
      "grad_norm": 0.2104145586490631,
      "learning_rate": 8.249294869209172e-05,
      "loss": 0.678,
      "step": 3078
    },
    {
      "epoch": 0.632953027032583,
      "grad_norm": 0.14177118241786957,
      "learning_rate": 8.248742427584858e-05,
      "loss": 0.5831,
      "step": 3079
    },
    {
      "epoch": 0.6331585980059615,
      "grad_norm": 0.2042471021413803,
      "learning_rate": 8.248189801277526e-05,
      "loss": 0.6831,
      "step": 3080
    },
    {
      "epoch": 0.6333641689793401,
      "grad_norm": 0.13382332026958466,
      "learning_rate": 8.2476369903144e-05,
      "loss": 0.5932,
      "step": 3081
    },
    {
      "epoch": 0.6335697399527187,
      "grad_norm": 0.21314536035060883,
      "learning_rate": 8.247083994722717e-05,
      "loss": 0.7024,
      "step": 3082
    },
    {
      "epoch": 0.6337753109260973,
      "grad_norm": 0.2022118717432022,
      "learning_rate": 8.24653081452972e-05,
      "loss": 0.6778,
      "step": 3083
    },
    {
      "epoch": 0.6339808818994758,
      "grad_norm": 0.1986151486635208,
      "learning_rate": 8.24597744976266e-05,
      "loss": 0.6955,
      "step": 3084
    },
    {
      "epoch": 0.6341864528728544,
      "grad_norm": 0.1944025456905365,
      "learning_rate": 8.245423900448802e-05,
      "loss": 0.6761,
      "step": 3085
    },
    {
      "epoch": 0.6343920238462329,
      "grad_norm": 0.1960417479276657,
      "learning_rate": 8.244870166615411e-05,
      "loss": 0.6694,
      "step": 3086
    },
    {
      "epoch": 0.6345975948196114,
      "grad_norm": 0.19537580013275146,
      "learning_rate": 8.244316248289771e-05,
      "loss": 0.7057,
      "step": 3087
    },
    {
      "epoch": 0.63480316579299,
      "grad_norm": 0.25191953778266907,
      "learning_rate": 8.243762145499173e-05,
      "loss": 0.7093,
      "step": 3088
    },
    {
      "epoch": 0.6350087367663686,
      "grad_norm": 0.21354857087135315,
      "learning_rate": 8.24320785827091e-05,
      "loss": 0.6912,
      "step": 3089
    },
    {
      "epoch": 0.6352143077397472,
      "grad_norm": 0.2095470279455185,
      "learning_rate": 8.242653386632292e-05,
      "loss": 0.6966,
      "step": 3090
    },
    {
      "epoch": 0.6354198787131257,
      "grad_norm": 0.19135965406894684,
      "learning_rate": 8.242098730610636e-05,
      "loss": 0.6868,
      "step": 3091
    },
    {
      "epoch": 0.6356254496865043,
      "grad_norm": 0.19568754732608795,
      "learning_rate": 8.241543890233263e-05,
      "loss": 0.6741,
      "step": 3092
    },
    {
      "epoch": 0.6358310206598828,
      "grad_norm": 0.19776469469070435,
      "learning_rate": 8.240988865527513e-05,
      "loss": 0.7092,
      "step": 3093
    },
    {
      "epoch": 0.6360365916332614,
      "grad_norm": 0.18224585056304932,
      "learning_rate": 8.240433656520727e-05,
      "loss": 0.6031,
      "step": 3094
    },
    {
      "epoch": 0.6362421626066399,
      "grad_norm": 0.203841432929039,
      "learning_rate": 8.239878263240256e-05,
      "loss": 0.6995,
      "step": 3095
    },
    {
      "epoch": 0.6364477335800185,
      "grad_norm": 0.13863101601600647,
      "learning_rate": 8.239322685713465e-05,
      "loss": 0.5863,
      "step": 3096
    },
    {
      "epoch": 0.6366533045533971,
      "grad_norm": 0.21603704988956451,
      "learning_rate": 8.238766923967722e-05,
      "loss": 0.7092,
      "step": 3097
    },
    {
      "epoch": 0.6368588755267757,
      "grad_norm": 0.20999345183372498,
      "learning_rate": 8.238210978030407e-05,
      "loss": 0.6738,
      "step": 3098
    },
    {
      "epoch": 0.6370644465001541,
      "grad_norm": 0.1540490984916687,
      "learning_rate": 8.23765484792891e-05,
      "loss": 0.589,
      "step": 3099
    },
    {
      "epoch": 0.6372700174735327,
      "grad_norm": 0.21293634176254272,
      "learning_rate": 8.237098533690628e-05,
      "loss": 0.6747,
      "step": 3100
    },
    {
      "epoch": 0.6374755884469113,
      "grad_norm": 0.23176319897174835,
      "learning_rate": 8.236542035342969e-05,
      "loss": 0.679,
      "step": 3101
    },
    {
      "epoch": 0.6376811594202898,
      "grad_norm": 0.19695045053958893,
      "learning_rate": 8.235985352913348e-05,
      "loss": 0.6856,
      "step": 3102
    },
    {
      "epoch": 0.6378867303936684,
      "grad_norm": 0.19714051485061646,
      "learning_rate": 8.235428486429191e-05,
      "loss": 0.697,
      "step": 3103
    },
    {
      "epoch": 0.638092301367047,
      "grad_norm": 0.21369072794914246,
      "learning_rate": 8.23487143591793e-05,
      "loss": 0.6986,
      "step": 3104
    },
    {
      "epoch": 0.6382978723404256,
      "grad_norm": 0.19707739353179932,
      "learning_rate": 8.234314201407012e-05,
      "loss": 0.7098,
      "step": 3105
    },
    {
      "epoch": 0.638503443313804,
      "grad_norm": 0.1957058161497116,
      "learning_rate": 8.233756782923888e-05,
      "loss": 0.6754,
      "step": 3106
    },
    {
      "epoch": 0.6387090142871826,
      "grad_norm": 0.19346770644187927,
      "learning_rate": 8.233199180496019e-05,
      "loss": 0.6703,
      "step": 3107
    },
    {
      "epoch": 0.6389145852605612,
      "grad_norm": 0.2065419703722,
      "learning_rate": 8.232641394150873e-05,
      "loss": 0.6961,
      "step": 3108
    },
    {
      "epoch": 0.6391201562339398,
      "grad_norm": 0.20303097367286682,
      "learning_rate": 8.232083423915932e-05,
      "loss": 0.6764,
      "step": 3109
    },
    {
      "epoch": 0.6393257272073183,
      "grad_norm": 0.19711004197597504,
      "learning_rate": 8.231525269818688e-05,
      "loss": 0.6965,
      "step": 3110
    },
    {
      "epoch": 0.6395312981806969,
      "grad_norm": 0.19637802243232727,
      "learning_rate": 8.230966931886631e-05,
      "loss": 0.7109,
      "step": 3111
    },
    {
      "epoch": 0.6397368691540755,
      "grad_norm": 0.20301949977874756,
      "learning_rate": 8.230408410147274e-05,
      "loss": 0.6824,
      "step": 3112
    },
    {
      "epoch": 0.639942440127454,
      "grad_norm": 1.2079687118530273,
      "learning_rate": 8.229849704628131e-05,
      "loss": 0.6643,
      "step": 3113
    },
    {
      "epoch": 0.6401480111008325,
      "grad_norm": 0.17537331581115723,
      "learning_rate": 8.229290815356723e-05,
      "loss": 0.5969,
      "step": 3114
    },
    {
      "epoch": 0.6403535820742111,
      "grad_norm": 0.2206054925918579,
      "learning_rate": 8.22873174236059e-05,
      "loss": 0.6856,
      "step": 3115
    },
    {
      "epoch": 0.6405591530475897,
      "grad_norm": 0.20161283016204834,
      "learning_rate": 8.228172485667273e-05,
      "loss": 0.6803,
      "step": 3116
    },
    {
      "epoch": 0.6407647240209683,
      "grad_norm": 0.5840950012207031,
      "learning_rate": 8.227613045304321e-05,
      "loss": 0.688,
      "step": 3117
    },
    {
      "epoch": 0.6409702949943468,
      "grad_norm": 0.19631561636924744,
      "learning_rate": 8.227053421299297e-05,
      "loss": 0.5931,
      "step": 3118
    },
    {
      "epoch": 0.6411758659677254,
      "grad_norm": 0.23822426795959473,
      "learning_rate": 8.226493613679772e-05,
      "loss": 0.5962,
      "step": 3119
    },
    {
      "epoch": 0.6413814369411039,
      "grad_norm": 0.15889045596122742,
      "learning_rate": 8.225933622473322e-05,
      "loss": 0.5809,
      "step": 3120
    },
    {
      "epoch": 0.6415870079144824,
      "grad_norm": 0.24698416888713837,
      "learning_rate": 8.22537344770754e-05,
      "loss": 0.6965,
      "step": 3121
    },
    {
      "epoch": 0.641792578887861,
      "grad_norm": 0.2314760684967041,
      "learning_rate": 8.224813089410021e-05,
      "loss": 0.6989,
      "step": 3122
    },
    {
      "epoch": 0.6419981498612396,
      "grad_norm": 0.20642580091953278,
      "learning_rate": 8.22425254760837e-05,
      "loss": 0.7141,
      "step": 3123
    },
    {
      "epoch": 0.6422037208346182,
      "grad_norm": 0.209413081407547,
      "learning_rate": 8.223691822330203e-05,
      "loss": 0.7117,
      "step": 3124
    },
    {
      "epoch": 0.6424092918079967,
      "grad_norm": 0.21780717372894287,
      "learning_rate": 8.223130913603144e-05,
      "loss": 0.6902,
      "step": 3125
    },
    {
      "epoch": 0.6426148627813753,
      "grad_norm": 0.21011175215244293,
      "learning_rate": 8.222569821454826e-05,
      "loss": 0.6963,
      "step": 3126
    },
    {
      "epoch": 0.6428204337547538,
      "grad_norm": 0.2518548369407654,
      "learning_rate": 8.222008545912895e-05,
      "loss": 0.6005,
      "step": 3127
    },
    {
      "epoch": 0.6430260047281324,
      "grad_norm": 0.21928563714027405,
      "learning_rate": 8.221447087004996e-05,
      "loss": 0.6957,
      "step": 3128
    },
    {
      "epoch": 0.6432315757015109,
      "grad_norm": 0.21237944066524506,
      "learning_rate": 8.220885444758796e-05,
      "loss": 0.6559,
      "step": 3129
    },
    {
      "epoch": 0.6434371466748895,
      "grad_norm": 0.22411003708839417,
      "learning_rate": 8.220323619201958e-05,
      "loss": 0.7081,
      "step": 3130
    },
    {
      "epoch": 0.6436427176482681,
      "grad_norm": 0.19972927868366241,
      "learning_rate": 8.219761610362168e-05,
      "loss": 0.6792,
      "step": 3131
    },
    {
      "epoch": 0.6438482886216467,
      "grad_norm": 0.24267856776714325,
      "learning_rate": 8.219199418267107e-05,
      "loss": 0.7113,
      "step": 3132
    },
    {
      "epoch": 0.6440538595950251,
      "grad_norm": 0.20243190228939056,
      "learning_rate": 8.218637042944476e-05,
      "loss": 0.6826,
      "step": 3133
    },
    {
      "epoch": 0.6442594305684037,
      "grad_norm": 0.19848772883415222,
      "learning_rate": 8.218074484421978e-05,
      "loss": 0.6965,
      "step": 3134
    },
    {
      "epoch": 0.6444650015417823,
      "grad_norm": 0.20293201506137848,
      "learning_rate": 8.217511742727327e-05,
      "loss": 0.6646,
      "step": 3135
    },
    {
      "epoch": 0.6446705725151609,
      "grad_norm": 0.20322081446647644,
      "learning_rate": 8.21694881788825e-05,
      "loss": 0.699,
      "step": 3136
    },
    {
      "epoch": 0.6448761434885394,
      "grad_norm": 0.20811443030834198,
      "learning_rate": 8.216385709932476e-05,
      "loss": 0.6561,
      "step": 3137
    },
    {
      "epoch": 0.645081714461918,
      "grad_norm": 0.21710549294948578,
      "learning_rate": 8.21582241888775e-05,
      "loss": 0.6903,
      "step": 3138
    },
    {
      "epoch": 0.6452872854352966,
      "grad_norm": 0.2017020285129547,
      "learning_rate": 8.21525894478182e-05,
      "loss": 0.6837,
      "step": 3139
    },
    {
      "epoch": 0.645492856408675,
      "grad_norm": 0.21228978037834167,
      "learning_rate": 8.214695287642448e-05,
      "loss": 0.7046,
      "step": 3140
    },
    {
      "epoch": 0.6456984273820536,
      "grad_norm": 0.19248290359973907,
      "learning_rate": 8.214131447497401e-05,
      "loss": 0.6838,
      "step": 3141
    },
    {
      "epoch": 0.6459039983554322,
      "grad_norm": 0.20567071437835693,
      "learning_rate": 8.213567424374458e-05,
      "loss": 0.6728,
      "step": 3142
    },
    {
      "epoch": 0.6461095693288108,
      "grad_norm": 0.19881267845630646,
      "learning_rate": 8.213003218301404e-05,
      "loss": 0.6937,
      "step": 3143
    },
    {
      "epoch": 0.6463151403021893,
      "grad_norm": 0.20884251594543457,
      "learning_rate": 8.212438829306037e-05,
      "loss": 0.6889,
      "step": 3144
    },
    {
      "epoch": 0.6465207112755679,
      "grad_norm": 0.196677565574646,
      "learning_rate": 8.21187425741616e-05,
      "loss": 0.6586,
      "step": 3145
    },
    {
      "epoch": 0.6467262822489465,
      "grad_norm": 0.19286644458770752,
      "learning_rate": 8.211309502659588e-05,
      "loss": 0.6643,
      "step": 3146
    },
    {
      "epoch": 0.646931853222325,
      "grad_norm": 0.19453571736812592,
      "learning_rate": 8.210744565064142e-05,
      "loss": 0.6898,
      "step": 3147
    },
    {
      "epoch": 0.6471374241957035,
      "grad_norm": 0.22043997049331665,
      "learning_rate": 8.210179444657658e-05,
      "loss": 0.5958,
      "step": 3148
    },
    {
      "epoch": 0.6473429951690821,
      "grad_norm": 0.2146371752023697,
      "learning_rate": 8.209614141467972e-05,
      "loss": 0.7184,
      "step": 3149
    },
    {
      "epoch": 0.6475485661424607,
      "grad_norm": 0.2086339145898819,
      "learning_rate": 8.209048655522937e-05,
      "loss": 0.6878,
      "step": 3150
    },
    {
      "epoch": 0.6477541371158393,
      "grad_norm": 0.19689536094665527,
      "learning_rate": 8.20848298685041e-05,
      "loss": 0.6693,
      "step": 3151
    },
    {
      "epoch": 0.6479597080892178,
      "grad_norm": 0.19254978001117706,
      "learning_rate": 8.207917135478259e-05,
      "loss": 0.6931,
      "step": 3152
    },
    {
      "epoch": 0.6481652790625964,
      "grad_norm": 0.19382552802562714,
      "learning_rate": 8.207351101434363e-05,
      "loss": 0.6691,
      "step": 3153
    },
    {
      "epoch": 0.6483708500359749,
      "grad_norm": 0.20275139808654785,
      "learning_rate": 8.206784884746607e-05,
      "loss": 0.7085,
      "step": 3154
    },
    {
      "epoch": 0.6485764210093535,
      "grad_norm": 0.19114693999290466,
      "learning_rate": 8.206218485442883e-05,
      "loss": 0.6732,
      "step": 3155
    },
    {
      "epoch": 0.648781991982732,
      "grad_norm": 0.19770143926143646,
      "learning_rate": 8.2056519035511e-05,
      "loss": 0.6691,
      "step": 3156
    },
    {
      "epoch": 0.6489875629561106,
      "grad_norm": 0.2007279098033905,
      "learning_rate": 8.205085139099165e-05,
      "loss": 0.6647,
      "step": 3157
    },
    {
      "epoch": 0.6491931339294892,
      "grad_norm": 0.19302336871623993,
      "learning_rate": 8.204518192115004e-05,
      "loss": 0.663,
      "step": 3158
    },
    {
      "epoch": 0.6493987049028677,
      "grad_norm": 0.19728437066078186,
      "learning_rate": 8.203951062626546e-05,
      "loss": 0.674,
      "step": 3159
    },
    {
      "epoch": 0.6496042758762463,
      "grad_norm": 0.20836929976940155,
      "learning_rate": 8.203383750661731e-05,
      "loss": 0.6827,
      "step": 3160
    },
    {
      "epoch": 0.6498098468496248,
      "grad_norm": 0.226349338889122,
      "learning_rate": 8.202816256248509e-05,
      "loss": 0.579,
      "step": 3161
    },
    {
      "epoch": 0.6500154178230034,
      "grad_norm": 0.203635573387146,
      "learning_rate": 8.202248579414837e-05,
      "loss": 0.6959,
      "step": 3162
    },
    {
      "epoch": 0.6502209887963819,
      "grad_norm": 0.14256790280342102,
      "learning_rate": 8.201680720188682e-05,
      "loss": 0.589,
      "step": 3163
    },
    {
      "epoch": 0.6504265597697605,
      "grad_norm": 0.214716836810112,
      "learning_rate": 8.201112678598018e-05,
      "loss": 0.6951,
      "step": 3164
    },
    {
      "epoch": 0.6506321307431391,
      "grad_norm": 0.20737797021865845,
      "learning_rate": 8.200544454670834e-05,
      "loss": 0.6921,
      "step": 3165
    },
    {
      "epoch": 0.6508377017165177,
      "grad_norm": 0.2059832364320755,
      "learning_rate": 8.199976048435118e-05,
      "loss": 0.6845,
      "step": 3166
    },
    {
      "epoch": 0.6510432726898961,
      "grad_norm": 0.20531848073005676,
      "learning_rate": 8.199407459918877e-05,
      "loss": 0.696,
      "step": 3167
    },
    {
      "epoch": 0.6512488436632747,
      "grad_norm": 0.20587943494319916,
      "learning_rate": 8.19883868915012e-05,
      "loss": 0.6877,
      "step": 3168
    },
    {
      "epoch": 0.6514544146366533,
      "grad_norm": 0.19502076506614685,
      "learning_rate": 8.198269736156872e-05,
      "loss": 0.6735,
      "step": 3169
    },
    {
      "epoch": 0.6516599856100319,
      "grad_norm": 0.1964626908302307,
      "learning_rate": 8.197700600967158e-05,
      "loss": 0.6702,
      "step": 3170
    },
    {
      "epoch": 0.6518655565834104,
      "grad_norm": 0.19854065775871277,
      "learning_rate": 8.19713128360902e-05,
      "loss": 0.6639,
      "step": 3171
    },
    {
      "epoch": 0.652071127556789,
      "grad_norm": 0.2041742503643036,
      "learning_rate": 8.196561784110502e-05,
      "loss": 0.6813,
      "step": 3172
    },
    {
      "epoch": 0.6522766985301676,
      "grad_norm": 0.19994084537029266,
      "learning_rate": 8.195992102499663e-05,
      "loss": 0.668,
      "step": 3173
    },
    {
      "epoch": 0.6524822695035462,
      "grad_norm": 0.1984533816576004,
      "learning_rate": 8.195422238804569e-05,
      "loss": 0.6839,
      "step": 3174
    },
    {
      "epoch": 0.6526878404769246,
      "grad_norm": 0.2585853338241577,
      "learning_rate": 8.194852193053293e-05,
      "loss": 0.5857,
      "step": 3175
    },
    {
      "epoch": 0.6528934114503032,
      "grad_norm": 0.21707791090011597,
      "learning_rate": 8.194281965273919e-05,
      "loss": 0.7002,
      "step": 3176
    },
    {
      "epoch": 0.6530989824236818,
      "grad_norm": 0.21522431075572968,
      "learning_rate": 8.193711555494541e-05,
      "loss": 0.6681,
      "step": 3177
    },
    {
      "epoch": 0.6533045533970603,
      "grad_norm": 0.20251545310020447,
      "learning_rate": 8.193140963743258e-05,
      "loss": 0.7119,
      "step": 3178
    },
    {
      "epoch": 0.6535101243704389,
      "grad_norm": 0.20081111788749695,
      "learning_rate": 8.192570190048181e-05,
      "loss": 0.7013,
      "step": 3179
    },
    {
      "epoch": 0.6537156953438175,
      "grad_norm": 0.20084579288959503,
      "learning_rate": 8.19199923443743e-05,
      "loss": 0.6996,
      "step": 3180
    },
    {
      "epoch": 0.653921266317196,
      "grad_norm": 0.2081523984670639,
      "learning_rate": 8.191428096939134e-05,
      "loss": 0.6774,
      "step": 3181
    },
    {
      "epoch": 0.6541268372905745,
      "grad_norm": 0.19181185960769653,
      "learning_rate": 8.190856777581427e-05,
      "loss": 0.5909,
      "step": 3182
    },
    {
      "epoch": 0.6543324082639531,
      "grad_norm": 0.21452546119689941,
      "learning_rate": 8.190285276392461e-05,
      "loss": 0.6737,
      "step": 3183
    },
    {
      "epoch": 0.6545379792373317,
      "grad_norm": 0.20853358507156372,
      "learning_rate": 8.189713593400385e-05,
      "loss": 0.6823,
      "step": 3184
    },
    {
      "epoch": 0.6547435502107103,
      "grad_norm": 0.20873308181762695,
      "learning_rate": 8.189141728633367e-05,
      "loss": 0.7007,
      "step": 3185
    },
    {
      "epoch": 0.6549491211840888,
      "grad_norm": 0.19929181039333344,
      "learning_rate": 8.188569682119579e-05,
      "loss": 0.6567,
      "step": 3186
    },
    {
      "epoch": 0.6551546921574674,
      "grad_norm": 0.19836626946926117,
      "learning_rate": 8.187997453887202e-05,
      "loss": 0.6607,
      "step": 3187
    },
    {
      "epoch": 0.6553602631308459,
      "grad_norm": 0.18740180134773254,
      "learning_rate": 8.187425043964429e-05,
      "loss": 0.6858,
      "step": 3188
    },
    {
      "epoch": 0.6555658341042245,
      "grad_norm": 0.20412470400333405,
      "learning_rate": 8.18685245237946e-05,
      "loss": 0.6895,
      "step": 3189
    },
    {
      "epoch": 0.655771405077603,
      "grad_norm": 0.15742400288581848,
      "learning_rate": 8.186279679160502e-05,
      "loss": 0.5842,
      "step": 3190
    },
    {
      "epoch": 0.6559769760509816,
      "grad_norm": 0.20259132981300354,
      "learning_rate": 8.185706724335773e-05,
      "loss": 0.6967,
      "step": 3191
    },
    {
      "epoch": 0.6561825470243602,
      "grad_norm": 1.9348865747451782,
      "learning_rate": 8.185133587933502e-05,
      "loss": 0.7117,
      "step": 3192
    },
    {
      "epoch": 0.6563881179977388,
      "grad_norm": 0.2033887505531311,
      "learning_rate": 8.184560269981922e-05,
      "loss": 0.6728,
      "step": 3193
    },
    {
      "epoch": 0.6565936889711173,
      "grad_norm": 0.15772481262683868,
      "learning_rate": 8.183986770509281e-05,
      "loss": 0.5949,
      "step": 3194
    },
    {
      "epoch": 0.6567992599444958,
      "grad_norm": 0.21117869019508362,
      "learning_rate": 8.18341308954383e-05,
      "loss": 0.7154,
      "step": 3195
    },
    {
      "epoch": 0.6570048309178744,
      "grad_norm": 0.21583619713783264,
      "learning_rate": 8.182839227113833e-05,
      "loss": 0.7056,
      "step": 3196
    },
    {
      "epoch": 0.6572104018912529,
      "grad_norm": 0.21002855896949768,
      "learning_rate": 8.18226518324756e-05,
      "loss": 0.7106,
      "step": 3197
    },
    {
      "epoch": 0.6574159728646315,
      "grad_norm": 0.20425178110599518,
      "learning_rate": 8.181690957973292e-05,
      "loss": 0.6785,
      "step": 3198
    },
    {
      "epoch": 0.6576215438380101,
      "grad_norm": 0.2083713412284851,
      "learning_rate": 8.181116551319319e-05,
      "loss": 0.707,
      "step": 3199
    },
    {
      "epoch": 0.6578271148113887,
      "grad_norm": 0.1998489499092102,
      "learning_rate": 8.180541963313939e-05,
      "loss": 0.6886,
      "step": 3200
    },
    {
      "epoch": 0.6580326857847671,
      "grad_norm": 0.20870743691921234,
      "learning_rate": 8.17996719398546e-05,
      "loss": 0.6931,
      "step": 3201
    },
    {
      "epoch": 0.6582382567581457,
      "grad_norm": 0.20594879984855652,
      "learning_rate": 8.179392243362195e-05,
      "loss": 0.6897,
      "step": 3202
    },
    {
      "epoch": 0.6584438277315243,
      "grad_norm": 0.19401825964450836,
      "learning_rate": 8.178817111472474e-05,
      "loss": 0.6719,
      "step": 3203
    },
    {
      "epoch": 0.6586493987049029,
      "grad_norm": 0.20549017190933228,
      "learning_rate": 8.178241798344627e-05,
      "loss": 0.666,
      "step": 3204
    },
    {
      "epoch": 0.6588549696782814,
      "grad_norm": 0.1869438886642456,
      "learning_rate": 8.177666304007e-05,
      "loss": 0.6728,
      "step": 3205
    },
    {
      "epoch": 0.65906054065166,
      "grad_norm": 0.19876159727573395,
      "learning_rate": 8.177090628487943e-05,
      "loss": 0.6646,
      "step": 3206
    },
    {
      "epoch": 0.6592661116250386,
      "grad_norm": 0.1998775601387024,
      "learning_rate": 8.176514771815818e-05,
      "loss": 0.7035,
      "step": 3207
    },
    {
      "epoch": 0.6594716825984172,
      "grad_norm": 0.19949300587177277,
      "learning_rate": 8.175938734018994e-05,
      "loss": 0.7035,
      "step": 3208
    },
    {
      "epoch": 0.6596772535717956,
      "grad_norm": 0.1943056583404541,
      "learning_rate": 8.175362515125849e-05,
      "loss": 0.702,
      "step": 3209
    },
    {
      "epoch": 0.6598828245451742,
      "grad_norm": 0.20226384699344635,
      "learning_rate": 8.174786115164773e-05,
      "loss": 0.6887,
      "step": 3210
    },
    {
      "epoch": 0.6600883955185528,
      "grad_norm": 0.19821226596832275,
      "learning_rate": 8.174209534164161e-05,
      "loss": 0.7097,
      "step": 3211
    },
    {
      "epoch": 0.6602939664919314,
      "grad_norm": 0.19110795855522156,
      "learning_rate": 8.173632772152416e-05,
      "loss": 0.6737,
      "step": 3212
    },
    {
      "epoch": 0.6604995374653099,
      "grad_norm": 0.19855926930904388,
      "learning_rate": 8.173055829157957e-05,
      "loss": 0.6818,
      "step": 3213
    },
    {
      "epoch": 0.6607051084386885,
      "grad_norm": 0.19995853304862976,
      "learning_rate": 8.172478705209204e-05,
      "loss": 0.6811,
      "step": 3214
    },
    {
      "epoch": 0.660910679412067,
      "grad_norm": 0.22749421000480652,
      "learning_rate": 8.171901400334591e-05,
      "loss": 0.6004,
      "step": 3215
    },
    {
      "epoch": 0.6611162503854455,
      "grad_norm": 0.2062731236219406,
      "learning_rate": 8.171323914562559e-05,
      "loss": 0.7145,
      "step": 3216
    },
    {
      "epoch": 0.6613218213588241,
      "grad_norm": 0.20264078676700592,
      "learning_rate": 8.170746247921555e-05,
      "loss": 0.6664,
      "step": 3217
    },
    {
      "epoch": 0.6615273923322027,
      "grad_norm": 0.20601505041122437,
      "learning_rate": 8.170168400440044e-05,
      "loss": 0.6727,
      "step": 3218
    },
    {
      "epoch": 0.6617329633055813,
      "grad_norm": 0.22924602031707764,
      "learning_rate": 8.169590372146487e-05,
      "loss": 0.6836,
      "step": 3219
    },
    {
      "epoch": 0.6619385342789598,
      "grad_norm": 0.19378581643104553,
      "learning_rate": 8.169012163069366e-05,
      "loss": 0.6851,
      "step": 3220
    },
    {
      "epoch": 0.6621441052523384,
      "grad_norm": 0.20838582515716553,
      "learning_rate": 8.168433773237164e-05,
      "loss": 0.6856,
      "step": 3221
    },
    {
      "epoch": 0.6623496762257169,
      "grad_norm": 0.21452072262763977,
      "learning_rate": 8.167855202678377e-05,
      "loss": 0.7068,
      "step": 3222
    },
    {
      "epoch": 0.6625552471990955,
      "grad_norm": 0.2000737488269806,
      "learning_rate": 8.167276451421506e-05,
      "loss": 0.6874,
      "step": 3223
    },
    {
      "epoch": 0.662760818172474,
      "grad_norm": 0.23498542606830597,
      "learning_rate": 8.166697519495066e-05,
      "loss": 0.5939,
      "step": 3224
    },
    {
      "epoch": 0.6629663891458526,
      "grad_norm": 0.2128230184316635,
      "learning_rate": 8.166118406927578e-05,
      "loss": 0.7094,
      "step": 3225
    },
    {
      "epoch": 0.6631719601192312,
      "grad_norm": 0.1330750733613968,
      "learning_rate": 8.16553911374757e-05,
      "loss": 0.6022,
      "step": 3226
    },
    {
      "epoch": 0.6633775310926098,
      "grad_norm": 0.21321649849414825,
      "learning_rate": 8.164959639983583e-05,
      "loss": 0.6905,
      "step": 3227
    },
    {
      "epoch": 0.6635831020659883,
      "grad_norm": 0.2014767974615097,
      "learning_rate": 8.164379985664166e-05,
      "loss": 0.685,
      "step": 3228
    },
    {
      "epoch": 0.6637886730393668,
      "grad_norm": 0.17292124032974243,
      "learning_rate": 8.163800150817872e-05,
      "loss": 0.5932,
      "step": 3229
    },
    {
      "epoch": 0.6639942440127454,
      "grad_norm": 0.20624692738056183,
      "learning_rate": 8.163220135473271e-05,
      "loss": 0.6831,
      "step": 3230
    },
    {
      "epoch": 0.6641998149861239,
      "grad_norm": 0.2030026912689209,
      "learning_rate": 8.162639939658935e-05,
      "loss": 0.7166,
      "step": 3231
    },
    {
      "epoch": 0.6644053859595025,
      "grad_norm": 0.19677379727363586,
      "learning_rate": 8.162059563403448e-05,
      "loss": 0.6646,
      "step": 3232
    },
    {
      "epoch": 0.6646109569328811,
      "grad_norm": 0.1929975152015686,
      "learning_rate": 8.161479006735404e-05,
      "loss": 0.671,
      "step": 3233
    },
    {
      "epoch": 0.6648165279062597,
      "grad_norm": 0.196861132979393,
      "learning_rate": 8.1608982696834e-05,
      "loss": 0.6899,
      "step": 3234
    },
    {
      "epoch": 0.6650220988796381,
      "grad_norm": 0.19990988075733185,
      "learning_rate": 8.160317352276053e-05,
      "loss": 0.6889,
      "step": 3235
    },
    {
      "epoch": 0.6652276698530167,
      "grad_norm": 0.1800822615623474,
      "learning_rate": 8.159736254541976e-05,
      "loss": 0.6149,
      "step": 3236
    },
    {
      "epoch": 0.6654332408263953,
      "grad_norm": 0.1930818259716034,
      "learning_rate": 8.159154976509801e-05,
      "loss": 0.6756,
      "step": 3237
    },
    {
      "epoch": 0.6656388117997739,
      "grad_norm": 0.18298830091953278,
      "learning_rate": 8.158573518208162e-05,
      "loss": 0.5984,
      "step": 3238
    },
    {
      "epoch": 0.6658443827731524,
      "grad_norm": 0.19836896657943726,
      "learning_rate": 8.157991879665706e-05,
      "loss": 0.6869,
      "step": 3239
    },
    {
      "epoch": 0.666049953746531,
      "grad_norm": 0.20596401393413544,
      "learning_rate": 8.157410060911087e-05,
      "loss": 0.6882,
      "step": 3240
    },
    {
      "epoch": 0.6662555247199096,
      "grad_norm": 0.1683359146118164,
      "learning_rate": 8.15682806197297e-05,
      "loss": 0.5799,
      "step": 3241
    },
    {
      "epoch": 0.6664610956932882,
      "grad_norm": 0.19776779413223267,
      "learning_rate": 8.156245882880026e-05,
      "loss": 0.6528,
      "step": 3242
    },
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 0.1920391172170639,
      "learning_rate": 8.155663523660936e-05,
      "loss": 0.6982,
      "step": 3243
    },
    {
      "epoch": 0.6668722376400452,
      "grad_norm": 0.1352914422750473,
      "learning_rate": 8.155080984344391e-05,
      "loss": 0.5837,
      "step": 3244
    },
    {
      "epoch": 0.6670778086134238,
      "grad_norm": 0.2184402048587799,
      "learning_rate": 8.15449826495909e-05,
      "loss": 0.6784,
      "step": 3245
    },
    {
      "epoch": 0.6672833795868024,
      "grad_norm": 0.19601434469223022,
      "learning_rate": 8.15391536553374e-05,
      "loss": 0.6778,
      "step": 3246
    },
    {
      "epoch": 0.6674889505601809,
      "grad_norm": 0.19717663526535034,
      "learning_rate": 8.15333228609706e-05,
      "loss": 0.7024,
      "step": 3247
    },
    {
      "epoch": 0.6676945215335595,
      "grad_norm": 0.19221165776252747,
      "learning_rate": 8.152749026677773e-05,
      "loss": 0.6951,
      "step": 3248
    },
    {
      "epoch": 0.667900092506938,
      "grad_norm": 0.15361624956130981,
      "learning_rate": 8.152165587304613e-05,
      "loss": 0.5739,
      "step": 3249
    },
    {
      "epoch": 0.6681056634803165,
      "grad_norm": 0.13391469419002533,
      "learning_rate": 8.151581968006325e-05,
      "loss": 0.5979,
      "step": 3250
    },
    {
      "epoch": 0.6683112344536951,
      "grad_norm": 0.21153193712234497,
      "learning_rate": 8.150998168811663e-05,
      "loss": 0.6651,
      "step": 3251
    },
    {
      "epoch": 0.6685168054270737,
      "grad_norm": 0.13939164578914642,
      "learning_rate": 8.150414189749385e-05,
      "loss": 0.5664,
      "step": 3252
    },
    {
      "epoch": 0.6687223764004523,
      "grad_norm": 0.21254399418830872,
      "learning_rate": 8.149830030848261e-05,
      "loss": 0.6856,
      "step": 3253
    },
    {
      "epoch": 0.6689279473738308,
      "grad_norm": 0.19342190027236938,
      "learning_rate": 8.14924569213707e-05,
      "loss": 0.6828,
      "step": 3254
    },
    {
      "epoch": 0.6691335183472094,
      "grad_norm": 0.19527758657932281,
      "learning_rate": 8.148661173644602e-05,
      "loss": 0.7009,
      "step": 3255
    },
    {
      "epoch": 0.6693390893205879,
      "grad_norm": 0.1978977620601654,
      "learning_rate": 8.148076475399651e-05,
      "loss": 0.7137,
      "step": 3256
    },
    {
      "epoch": 0.6695446602939665,
      "grad_norm": 0.20413827896118164,
      "learning_rate": 8.147491597431025e-05,
      "loss": 0.672,
      "step": 3257
    },
    {
      "epoch": 0.669750231267345,
      "grad_norm": 0.19834209978580475,
      "learning_rate": 8.146906539767534e-05,
      "loss": 0.6726,
      "step": 3258
    },
    {
      "epoch": 0.6699558022407236,
      "grad_norm": 0.1580744832754135,
      "learning_rate": 8.146321302438004e-05,
      "loss": 0.5621,
      "step": 3259
    },
    {
      "epoch": 0.6701613732141022,
      "grad_norm": 0.20448711514472961,
      "learning_rate": 8.145735885471266e-05,
      "loss": 0.6633,
      "step": 3260
    },
    {
      "epoch": 0.6703669441874808,
      "grad_norm": 0.12794892489910126,
      "learning_rate": 8.145150288896161e-05,
      "loss": 0.5989,
      "step": 3261
    },
    {
      "epoch": 0.6705725151608593,
      "grad_norm": 0.20495088398456573,
      "learning_rate": 8.144564512741539e-05,
      "loss": 0.6778,
      "step": 3262
    },
    {
      "epoch": 0.6707780861342378,
      "grad_norm": 0.13609834015369415,
      "learning_rate": 8.143978557036259e-05,
      "loss": 0.5879,
      "step": 3263
    },
    {
      "epoch": 0.6709836571076164,
      "grad_norm": 0.19716021418571472,
      "learning_rate": 8.143392421809186e-05,
      "loss": 0.6998,
      "step": 3264
    },
    {
      "epoch": 0.671189228080995,
      "grad_norm": 0.19806286692619324,
      "learning_rate": 8.142806107089198e-05,
      "loss": 0.6884,
      "step": 3265
    },
    {
      "epoch": 0.6713947990543735,
      "grad_norm": 0.14359678328037262,
      "learning_rate": 8.14221961290518e-05,
      "loss": 0.5788,
      "step": 3266
    },
    {
      "epoch": 0.6716003700277521,
      "grad_norm": 0.19541367888450623,
      "learning_rate": 8.141632939286026e-05,
      "loss": 0.704,
      "step": 3267
    },
    {
      "epoch": 0.6718059410011307,
      "grad_norm": 0.19442065060138702,
      "learning_rate": 8.141046086260636e-05,
      "loss": 0.6666,
      "step": 3268
    },
    {
      "epoch": 0.6720115119745091,
      "grad_norm": 0.1996643990278244,
      "learning_rate": 8.140459053857924e-05,
      "loss": 0.6888,
      "step": 3269
    },
    {
      "epoch": 0.6722170829478877,
      "grad_norm": 0.19437336921691895,
      "learning_rate": 8.13987184210681e-05,
      "loss": 0.7176,
      "step": 3270
    },
    {
      "epoch": 0.6724226539212663,
      "grad_norm": 0.14562220871448517,
      "learning_rate": 8.139284451036223e-05,
      "loss": 0.5886,
      "step": 3271
    },
    {
      "epoch": 0.6726282248946449,
      "grad_norm": 0.2078685313463211,
      "learning_rate": 8.138696880675102e-05,
      "loss": 0.6867,
      "step": 3272
    },
    {
      "epoch": 0.6728337958680234,
      "grad_norm": 0.20113688707351685,
      "learning_rate": 8.138109131052393e-05,
      "loss": 0.7112,
      "step": 3273
    },
    {
      "epoch": 0.673039366841402,
      "grad_norm": 0.19516409933567047,
      "learning_rate": 8.137521202197052e-05,
      "loss": 0.6735,
      "step": 3274
    },
    {
      "epoch": 0.6732449378147806,
      "grad_norm": 0.18511922657489777,
      "learning_rate": 8.136933094138042e-05,
      "loss": 0.6696,
      "step": 3275
    },
    {
      "epoch": 0.6734505087881592,
      "grad_norm": 0.18774795532226562,
      "learning_rate": 8.136344806904336e-05,
      "loss": 0.6739,
      "step": 3276
    },
    {
      "epoch": 0.6736560797615376,
      "grad_norm": 0.19817449152469635,
      "learning_rate": 8.135756340524919e-05,
      "loss": 0.6896,
      "step": 3277
    },
    {
      "epoch": 0.6738616507349162,
      "grad_norm": 0.19579534232616425,
      "learning_rate": 8.135167695028782e-05,
      "loss": 0.6669,
      "step": 3278
    },
    {
      "epoch": 0.6740672217082948,
      "grad_norm": 0.1967802196741104,
      "learning_rate": 8.13457887044492e-05,
      "loss": 0.6763,
      "step": 3279
    },
    {
      "epoch": 0.6742727926816734,
      "grad_norm": 0.1518080234527588,
      "learning_rate": 8.133989866802349e-05,
      "loss": 0.5755,
      "step": 3280
    },
    {
      "epoch": 0.6744783636550519,
      "grad_norm": 0.1956729292869568,
      "learning_rate": 8.13340068413008e-05,
      "loss": 0.6695,
      "step": 3281
    },
    {
      "epoch": 0.6746839346284305,
      "grad_norm": 0.20296379923820496,
      "learning_rate": 8.132811322457142e-05,
      "loss": 0.678,
      "step": 3282
    },
    {
      "epoch": 0.674889505601809,
      "grad_norm": 0.19922013580799103,
      "learning_rate": 8.132221781812571e-05,
      "loss": 0.6898,
      "step": 3283
    },
    {
      "epoch": 0.6750950765751876,
      "grad_norm": 0.1867515742778778,
      "learning_rate": 8.13163206222541e-05,
      "loss": 0.6911,
      "step": 3284
    },
    {
      "epoch": 0.6753006475485661,
      "grad_norm": 0.20013710856437683,
      "learning_rate": 8.13104216372471e-05,
      "loss": 0.6878,
      "step": 3285
    },
    {
      "epoch": 0.6755062185219447,
      "grad_norm": 0.19711051881313324,
      "learning_rate": 8.130452086339535e-05,
      "loss": 0.6755,
      "step": 3286
    },
    {
      "epoch": 0.6757117894953233,
      "grad_norm": 0.22560589015483856,
      "learning_rate": 8.129861830098953e-05,
      "loss": 0.6961,
      "step": 3287
    },
    {
      "epoch": 0.6759173604687018,
      "grad_norm": 0.1926925927400589,
      "learning_rate": 8.129271395032046e-05,
      "loss": 0.6887,
      "step": 3288
    },
    {
      "epoch": 0.6761229314420804,
      "grad_norm": 0.19523480534553528,
      "learning_rate": 8.1286807811679e-05,
      "loss": 0.7129,
      "step": 3289
    },
    {
      "epoch": 0.6763285024154589,
      "grad_norm": 0.19967713952064514,
      "learning_rate": 8.128089988535613e-05,
      "loss": 0.6985,
      "step": 3290
    },
    {
      "epoch": 0.6765340733888375,
      "grad_norm": 0.1905701607465744,
      "learning_rate": 8.127499017164289e-05,
      "loss": 0.6839,
      "step": 3291
    },
    {
      "epoch": 0.676739644362216,
      "grad_norm": 0.1880829632282257,
      "learning_rate": 8.126907867083043e-05,
      "loss": 0.6795,
      "step": 3292
    },
    {
      "epoch": 0.6769452153355946,
      "grad_norm": 0.19849906861782074,
      "learning_rate": 8.126316538320999e-05,
      "loss": 0.7022,
      "step": 3293
    },
    {
      "epoch": 0.6771507863089732,
      "grad_norm": 0.19704832136631012,
      "learning_rate": 8.125725030907289e-05,
      "loss": 0.6762,
      "step": 3294
    },
    {
      "epoch": 0.6773563572823518,
      "grad_norm": 0.20323243737220764,
      "learning_rate": 8.125133344871052e-05,
      "loss": 0.7123,
      "step": 3295
    },
    {
      "epoch": 0.6775619282557303,
      "grad_norm": 0.16344204545021057,
      "learning_rate": 8.124541480241441e-05,
      "loss": 0.5788,
      "step": 3296
    },
    {
      "epoch": 0.6777674992291088,
      "grad_norm": 0.212424173951149,
      "learning_rate": 8.123949437047611e-05,
      "loss": 0.6874,
      "step": 3297
    },
    {
      "epoch": 0.6779730702024874,
      "grad_norm": 0.2008782923221588,
      "learning_rate": 8.123357215318731e-05,
      "loss": 0.67,
      "step": 3298
    },
    {
      "epoch": 0.678178641175866,
      "grad_norm": 0.20118223130702972,
      "learning_rate": 8.122764815083976e-05,
      "loss": 0.6802,
      "step": 3299
    },
    {
      "epoch": 0.6783842121492445,
      "grad_norm": 0.1353181004524231,
      "learning_rate": 8.122172236372533e-05,
      "loss": 0.6006,
      "step": 3300
    },
    {
      "epoch": 0.6785897831226231,
      "grad_norm": 0.19989068806171417,
      "learning_rate": 8.121579479213591e-05,
      "loss": 0.6934,
      "step": 3301
    },
    {
      "epoch": 0.6787953540960017,
      "grad_norm": 0.20248281955718994,
      "learning_rate": 8.120986543636357e-05,
      "loss": 0.6721,
      "step": 3302
    },
    {
      "epoch": 0.6790009250693803,
      "grad_norm": 0.19119137525558472,
      "learning_rate": 8.12039342967004e-05,
      "loss": 0.6735,
      "step": 3303
    },
    {
      "epoch": 0.6792064960427587,
      "grad_norm": 0.19932256639003754,
      "learning_rate": 8.119800137343861e-05,
      "loss": 0.6672,
      "step": 3304
    },
    {
      "epoch": 0.6794120670161373,
      "grad_norm": 0.19938862323760986,
      "learning_rate": 8.119206666687047e-05,
      "loss": 0.681,
      "step": 3305
    },
    {
      "epoch": 0.6796176379895159,
      "grad_norm": 0.20113952457904816,
      "learning_rate": 8.118613017728839e-05,
      "loss": 0.6699,
      "step": 3306
    },
    {
      "epoch": 0.6798232089628944,
      "grad_norm": 0.19112683832645416,
      "learning_rate": 8.118019190498477e-05,
      "loss": 0.7142,
      "step": 3307
    },
    {
      "epoch": 0.680028779936273,
      "grad_norm": 0.19518610835075378,
      "learning_rate": 8.117425185025225e-05,
      "loss": 0.6599,
      "step": 3308
    },
    {
      "epoch": 0.6802343509096516,
      "grad_norm": 0.20748484134674072,
      "learning_rate": 8.116831001338338e-05,
      "loss": 0.6737,
      "step": 3309
    },
    {
      "epoch": 0.6804399218830302,
      "grad_norm": 0.19534945487976074,
      "learning_rate": 8.116236639467094e-05,
      "loss": 0.6724,
      "step": 3310
    },
    {
      "epoch": 0.6806454928564086,
      "grad_norm": 0.1551889032125473,
      "learning_rate": 8.115642099440773e-05,
      "loss": 0.5907,
      "step": 3311
    },
    {
      "epoch": 0.6808510638297872,
      "grad_norm": 0.223983034491539,
      "learning_rate": 8.115047381288667e-05,
      "loss": 0.6984,
      "step": 3312
    },
    {
      "epoch": 0.6810566348031658,
      "grad_norm": 0.2107374668121338,
      "learning_rate": 8.11445248504007e-05,
      "loss": 0.6801,
      "step": 3313
    },
    {
      "epoch": 0.6812622057765444,
      "grad_norm": 0.2035159170627594,
      "learning_rate": 8.113857410724294e-05,
      "loss": 0.6509,
      "step": 3314
    },
    {
      "epoch": 0.6814677767499229,
      "grad_norm": 0.1422436386346817,
      "learning_rate": 8.113262158370655e-05,
      "loss": 0.6071,
      "step": 3315
    },
    {
      "epoch": 0.6816733477233015,
      "grad_norm": 0.20899644494056702,
      "learning_rate": 8.11266672800848e-05,
      "loss": 0.6571,
      "step": 3316
    },
    {
      "epoch": 0.68187891869668,
      "grad_norm": 0.19945669174194336,
      "learning_rate": 8.112071119667098e-05,
      "loss": 0.7201,
      "step": 3317
    },
    {
      "epoch": 0.6820844896700586,
      "grad_norm": 0.21106722950935364,
      "learning_rate": 8.111475333375854e-05,
      "loss": 0.6759,
      "step": 3318
    },
    {
      "epoch": 0.6822900606434371,
      "grad_norm": 0.2076927125453949,
      "learning_rate": 8.110879369164101e-05,
      "loss": 0.6832,
      "step": 3319
    },
    {
      "epoch": 0.6824956316168157,
      "grad_norm": 0.20357108116149902,
      "learning_rate": 8.1102832270612e-05,
      "loss": 0.6636,
      "step": 3320
    },
    {
      "epoch": 0.6827012025901943,
      "grad_norm": 0.1578240841627121,
      "learning_rate": 8.109686907096517e-05,
      "loss": 0.6158,
      "step": 3321
    },
    {
      "epoch": 0.6829067735635729,
      "grad_norm": 0.20219643414020538,
      "learning_rate": 8.109090409299434e-05,
      "loss": 0.6839,
      "step": 3322
    },
    {
      "epoch": 0.6831123445369514,
      "grad_norm": 0.2029838114976883,
      "learning_rate": 8.108493733699335e-05,
      "loss": 0.6963,
      "step": 3323
    },
    {
      "epoch": 0.6833179155103299,
      "grad_norm": 0.19904999434947968,
      "learning_rate": 8.107896880325615e-05,
      "loss": 0.6648,
      "step": 3324
    },
    {
      "epoch": 0.6835234864837085,
      "grad_norm": 0.2000379115343094,
      "learning_rate": 8.10729984920768e-05,
      "loss": 0.6706,
      "step": 3325
    },
    {
      "epoch": 0.683729057457087,
      "grad_norm": 0.19663308560848236,
      "learning_rate": 8.106702640374939e-05,
      "loss": 0.6798,
      "step": 3326
    },
    {
      "epoch": 0.6839346284304656,
      "grad_norm": 0.2028771936893463,
      "learning_rate": 8.10610525385682e-05,
      "loss": 0.6919,
      "step": 3327
    },
    {
      "epoch": 0.6841401994038442,
      "grad_norm": 0.19258631765842438,
      "learning_rate": 8.105507689682748e-05,
      "loss": 0.653,
      "step": 3328
    },
    {
      "epoch": 0.6843457703772228,
      "grad_norm": 0.14250509440898895,
      "learning_rate": 8.104909947882165e-05,
      "loss": 0.5786,
      "step": 3329
    },
    {
      "epoch": 0.6845513413506013,
      "grad_norm": 0.2034870833158493,
      "learning_rate": 8.104312028484517e-05,
      "loss": 0.6705,
      "step": 3330
    },
    {
      "epoch": 0.6847569123239798,
      "grad_norm": 0.19610241055488586,
      "learning_rate": 8.103713931519263e-05,
      "loss": 0.7,
      "step": 3331
    },
    {
      "epoch": 0.6849624832973584,
      "grad_norm": 0.14964817464351654,
      "learning_rate": 8.103115657015868e-05,
      "loss": 0.5914,
      "step": 3332
    },
    {
      "epoch": 0.685168054270737,
      "grad_norm": 0.20991382002830505,
      "learning_rate": 8.102517205003804e-05,
      "loss": 0.6841,
      "step": 3333
    },
    {
      "epoch": 0.6853736252441155,
      "grad_norm": 0.20073123276233673,
      "learning_rate": 8.101918575512556e-05,
      "loss": 0.6919,
      "step": 3334
    },
    {
      "epoch": 0.6855791962174941,
      "grad_norm": 0.21147504448890686,
      "learning_rate": 8.101319768571616e-05,
      "loss": 0.6585,
      "step": 3335
    },
    {
      "epoch": 0.6857847671908727,
      "grad_norm": 0.20476599037647247,
      "learning_rate": 8.100720784210482e-05,
      "loss": 0.7009,
      "step": 3336
    },
    {
      "epoch": 0.6859903381642513,
      "grad_norm": 0.20010556280612946,
      "learning_rate": 8.100121622458666e-05,
      "loss": 0.6734,
      "step": 3337
    },
    {
      "epoch": 0.6861959091376297,
      "grad_norm": 0.1875293105840683,
      "learning_rate": 8.099522283345683e-05,
      "loss": 0.6779,
      "step": 3338
    },
    {
      "epoch": 0.6864014801110083,
      "grad_norm": 0.20071950554847717,
      "learning_rate": 8.098922766901063e-05,
      "loss": 0.6709,
      "step": 3339
    },
    {
      "epoch": 0.6866070510843869,
      "grad_norm": 0.19928574562072754,
      "learning_rate": 8.098323073154338e-05,
      "loss": 0.7085,
      "step": 3340
    },
    {
      "epoch": 0.6868126220577655,
      "grad_norm": 0.19401361048221588,
      "learning_rate": 8.097723202135054e-05,
      "loss": 0.6872,
      "step": 3341
    },
    {
      "epoch": 0.687018193031144,
      "grad_norm": 0.19485783576965332,
      "learning_rate": 8.097123153872765e-05,
      "loss": 0.6864,
      "step": 3342
    },
    {
      "epoch": 0.6872237640045226,
      "grad_norm": 0.1916022002696991,
      "learning_rate": 8.09652292839703e-05,
      "loss": 0.7022,
      "step": 3343
    },
    {
      "epoch": 0.6874293349779012,
      "grad_norm": 0.1911773532629013,
      "learning_rate": 8.09592252573742e-05,
      "loss": 0.708,
      "step": 3344
    },
    {
      "epoch": 0.6876349059512796,
      "grad_norm": 0.19738483428955078,
      "learning_rate": 8.095321945923515e-05,
      "loss": 0.7014,
      "step": 3345
    },
    {
      "epoch": 0.6878404769246582,
      "grad_norm": 0.16668002307415009,
      "learning_rate": 8.094721188984903e-05,
      "loss": 0.6045,
      "step": 3346
    },
    {
      "epoch": 0.6880460478980368,
      "grad_norm": 0.20171229541301727,
      "learning_rate": 8.094120254951179e-05,
      "loss": 0.6919,
      "step": 3347
    },
    {
      "epoch": 0.6882516188714154,
      "grad_norm": 0.19809181988239288,
      "learning_rate": 8.093519143851949e-05,
      "loss": 0.6767,
      "step": 3348
    },
    {
      "epoch": 0.6884571898447939,
      "grad_norm": 0.19745509326457977,
      "learning_rate": 8.092917855716826e-05,
      "loss": 0.6738,
      "step": 3349
    },
    {
      "epoch": 0.6886627608181725,
      "grad_norm": 0.19986550509929657,
      "learning_rate": 8.092316390575435e-05,
      "loss": 0.7112,
      "step": 3350
    },
    {
      "epoch": 0.688868331791551,
      "grad_norm": 0.19324201345443726,
      "learning_rate": 8.091714748457404e-05,
      "loss": 0.6906,
      "step": 3351
    },
    {
      "epoch": 0.6890739027649296,
      "grad_norm": 0.20095904171466827,
      "learning_rate": 8.091112929392376e-05,
      "loss": 0.6486,
      "step": 3352
    },
    {
      "epoch": 0.6892794737383081,
      "grad_norm": 0.1877359300851822,
      "learning_rate": 8.09051093341e-05,
      "loss": 0.6844,
      "step": 3353
    },
    {
      "epoch": 0.6894850447116867,
      "grad_norm": 0.19812311232089996,
      "learning_rate": 8.08990876053993e-05,
      "loss": 0.6795,
      "step": 3354
    },
    {
      "epoch": 0.6896906156850653,
      "grad_norm": 0.19134752452373505,
      "learning_rate": 8.089306410811836e-05,
      "loss": 0.703,
      "step": 3355
    },
    {
      "epoch": 0.6898961866584439,
      "grad_norm": 0.1890835165977478,
      "learning_rate": 8.088703884255393e-05,
      "loss": 0.6585,
      "step": 3356
    },
    {
      "epoch": 0.6901017576318224,
      "grad_norm": 0.18926945328712463,
      "learning_rate": 8.088101180900282e-05,
      "loss": 0.6694,
      "step": 3357
    },
    {
      "epoch": 0.6903073286052009,
      "grad_norm": 0.18181371688842773,
      "learning_rate": 8.087498300776194e-05,
      "loss": 0.5831,
      "step": 3358
    },
    {
      "epoch": 0.6905128995785795,
      "grad_norm": 0.1939140260219574,
      "learning_rate": 8.086895243912835e-05,
      "loss": 0.6658,
      "step": 3359
    },
    {
      "epoch": 0.690718470551958,
      "grad_norm": 0.13031508028507233,
      "learning_rate": 8.086292010339912e-05,
      "loss": 0.6073,
      "step": 3360
    },
    {
      "epoch": 0.6909240415253366,
      "grad_norm": 0.1984340101480484,
      "learning_rate": 8.085688600087144e-05,
      "loss": 0.6565,
      "step": 3361
    },
    {
      "epoch": 0.6911296124987152,
      "grad_norm": 0.20224301517009735,
      "learning_rate": 8.08508501318426e-05,
      "loss": 0.7191,
      "step": 3362
    },
    {
      "epoch": 0.6913351834720938,
      "grad_norm": 0.18884535133838654,
      "learning_rate": 8.084481249660991e-05,
      "loss": 0.7012,
      "step": 3363
    },
    {
      "epoch": 0.6915407544454722,
      "grad_norm": 0.1905461698770523,
      "learning_rate": 8.083877309547086e-05,
      "loss": 0.6861,
      "step": 3364
    },
    {
      "epoch": 0.6917463254188508,
      "grad_norm": 0.19112585484981537,
      "learning_rate": 8.083273192872297e-05,
      "loss": 0.6698,
      "step": 3365
    },
    {
      "epoch": 0.6919518963922294,
      "grad_norm": 0.19276300072669983,
      "learning_rate": 8.082668899666386e-05,
      "loss": 0.6939,
      "step": 3366
    },
    {
      "epoch": 0.692157467365608,
      "grad_norm": 0.1849944144487381,
      "learning_rate": 8.082064429959123e-05,
      "loss": 0.6653,
      "step": 3367
    },
    {
      "epoch": 0.6923630383389865,
      "grad_norm": 0.197621151804924,
      "learning_rate": 8.081459783780288e-05,
      "loss": 0.69,
      "step": 3368
    },
    {
      "epoch": 0.6925686093123651,
      "grad_norm": 0.20411409437656403,
      "learning_rate": 8.08085496115967e-05,
      "loss": 0.6928,
      "step": 3369
    },
    {
      "epoch": 0.6927741802857437,
      "grad_norm": 0.19879065454006195,
      "learning_rate": 8.080249962127064e-05,
      "loss": 0.6855,
      "step": 3370
    },
    {
      "epoch": 0.6929797512591223,
      "grad_norm": 0.19563095271587372,
      "learning_rate": 8.079644786712277e-05,
      "loss": 0.6692,
      "step": 3371
    },
    {
      "epoch": 0.6931853222325007,
      "grad_norm": 0.1997094601392746,
      "learning_rate": 8.079039434945124e-05,
      "loss": 0.6851,
      "step": 3372
    },
    {
      "epoch": 0.6933908932058793,
      "grad_norm": 0.19280613958835602,
      "learning_rate": 8.078433906855424e-05,
      "loss": 0.6731,
      "step": 3373
    },
    {
      "epoch": 0.6935964641792579,
      "grad_norm": 0.18386954069137573,
      "learning_rate": 8.077828202473013e-05,
      "loss": 0.6934,
      "step": 3374
    },
    {
      "epoch": 0.6938020351526365,
      "grad_norm": 0.20323842763900757,
      "learning_rate": 8.077222321827727e-05,
      "loss": 0.6856,
      "step": 3375
    },
    {
      "epoch": 0.694007606126015,
      "grad_norm": 0.1947094351053238,
      "learning_rate": 8.076616264949418e-05,
      "loss": 0.6884,
      "step": 3376
    },
    {
      "epoch": 0.6942131770993936,
      "grad_norm": 0.19289527833461761,
      "learning_rate": 8.076010031867944e-05,
      "loss": 0.589,
      "step": 3377
    },
    {
      "epoch": 0.6944187480727722,
      "grad_norm": 0.19861692190170288,
      "learning_rate": 8.075403622613168e-05,
      "loss": 0.7024,
      "step": 3378
    },
    {
      "epoch": 0.6946243190461506,
      "grad_norm": 0.21449032425880432,
      "learning_rate": 8.074797037214968e-05,
      "loss": 0.7021,
      "step": 3379
    },
    {
      "epoch": 0.6948298900195292,
      "grad_norm": 0.1875978410243988,
      "learning_rate": 8.074190275703227e-05,
      "loss": 0.6898,
      "step": 3380
    },
    {
      "epoch": 0.6950354609929078,
      "grad_norm": 0.15483641624450684,
      "learning_rate": 8.073583338107837e-05,
      "loss": 0.5851,
      "step": 3381
    },
    {
      "epoch": 0.6952410319662864,
      "grad_norm": 0.19564680755138397,
      "learning_rate": 8.072976224458697e-05,
      "loss": 0.6792,
      "step": 3382
    },
    {
      "epoch": 0.6954466029396649,
      "grad_norm": 0.20344282686710358,
      "learning_rate": 8.072368934785719e-05,
      "loss": 0.6869,
      "step": 3383
    },
    {
      "epoch": 0.6956521739130435,
      "grad_norm": 0.19657017290592194,
      "learning_rate": 8.071761469118822e-05,
      "loss": 0.6595,
      "step": 3384
    },
    {
      "epoch": 0.695857744886422,
      "grad_norm": 0.19356437027454376,
      "learning_rate": 8.071153827487931e-05,
      "loss": 0.6804,
      "step": 3385
    },
    {
      "epoch": 0.6960633158598006,
      "grad_norm": 0.19667509198188782,
      "learning_rate": 8.070546009922981e-05,
      "loss": 0.7075,
      "step": 3386
    },
    {
      "epoch": 0.6962688868331791,
      "grad_norm": 0.18919992446899414,
      "learning_rate": 8.06993801645392e-05,
      "loss": 0.6778,
      "step": 3387
    },
    {
      "epoch": 0.6964744578065577,
      "grad_norm": 0.15784306824207306,
      "learning_rate": 8.0693298471107e-05,
      "loss": 0.5685,
      "step": 3388
    },
    {
      "epoch": 0.6966800287799363,
      "grad_norm": 0.20536069571971893,
      "learning_rate": 8.068721501923279e-05,
      "loss": 0.6465,
      "step": 3389
    },
    {
      "epoch": 0.6968855997533149,
      "grad_norm": 0.1936463564634323,
      "learning_rate": 8.06811298092163e-05,
      "loss": 0.6918,
      "step": 3390
    },
    {
      "epoch": 0.6970911707266934,
      "grad_norm": 0.19561581313610077,
      "learning_rate": 8.067504284135732e-05,
      "loss": 0.673,
      "step": 3391
    },
    {
      "epoch": 0.6972967417000719,
      "grad_norm": 0.198947474360466,
      "learning_rate": 8.066895411595572e-05,
      "loss": 0.6773,
      "step": 3392
    },
    {
      "epoch": 0.6975023126734505,
      "grad_norm": 0.19654102623462677,
      "learning_rate": 8.066286363331147e-05,
      "loss": 0.6467,
      "step": 3393
    },
    {
      "epoch": 0.6977078836468291,
      "grad_norm": 0.1938384771347046,
      "learning_rate": 8.065677139372462e-05,
      "loss": 0.6993,
      "step": 3394
    },
    {
      "epoch": 0.6979134546202076,
      "grad_norm": 0.1924823522567749,
      "learning_rate": 8.06506773974953e-05,
      "loss": 0.6672,
      "step": 3395
    },
    {
      "epoch": 0.6981190255935862,
      "grad_norm": 0.19648601114749908,
      "learning_rate": 8.064458164492372e-05,
      "loss": 0.6478,
      "step": 3396
    },
    {
      "epoch": 0.6983245965669648,
      "grad_norm": 0.1876935362815857,
      "learning_rate": 8.063848413631023e-05,
      "loss": 0.6704,
      "step": 3397
    },
    {
      "epoch": 0.6985301675403432,
      "grad_norm": 0.19049161672592163,
      "learning_rate": 8.06323848719552e-05,
      "loss": 0.6582,
      "step": 3398
    },
    {
      "epoch": 0.6987357385137218,
      "grad_norm": 0.19286733865737915,
      "learning_rate": 8.06262838521591e-05,
      "loss": 0.7147,
      "step": 3399
    },
    {
      "epoch": 0.6989413094871004,
      "grad_norm": 0.19397635757923126,
      "learning_rate": 8.062018107722252e-05,
      "loss": 0.6801,
      "step": 3400
    },
    {
      "epoch": 0.699146880460479,
      "grad_norm": 0.20421355962753296,
      "learning_rate": 8.06140765474461e-05,
      "loss": 0.6723,
      "step": 3401
    },
    {
      "epoch": 0.6993524514338575,
      "grad_norm": 0.1797918975353241,
      "learning_rate": 8.060797026313059e-05,
      "loss": 0.5854,
      "step": 3402
    },
    {
      "epoch": 0.6995580224072361,
      "grad_norm": 0.19936294853687286,
      "learning_rate": 8.060186222457682e-05,
      "loss": 0.6819,
      "step": 3403
    },
    {
      "epoch": 0.6997635933806147,
      "grad_norm": 0.19907638430595398,
      "learning_rate": 8.05957524320857e-05,
      "loss": 0.6739,
      "step": 3404
    },
    {
      "epoch": 0.6999691643539933,
      "grad_norm": 0.20160700380802155,
      "learning_rate": 8.058964088595822e-05,
      "loss": 0.6694,
      "step": 3405
    },
    {
      "epoch": 0.7001747353273717,
      "grad_norm": 0.19310222566127777,
      "learning_rate": 8.05835275864955e-05,
      "loss": 0.6806,
      "step": 3406
    },
    {
      "epoch": 0.7003803063007503,
      "grad_norm": 0.1963704526424408,
      "learning_rate": 8.057741253399866e-05,
      "loss": 0.6816,
      "step": 3407
    },
    {
      "epoch": 0.7005858772741289,
      "grad_norm": 0.5723682641983032,
      "learning_rate": 8.057129572876903e-05,
      "loss": 0.6971,
      "step": 3408
    },
    {
      "epoch": 0.7007914482475075,
      "grad_norm": 0.1899087131023407,
      "learning_rate": 8.05651771711079e-05,
      "loss": 0.6834,
      "step": 3409
    },
    {
      "epoch": 0.700997019220886,
      "grad_norm": 0.1957729011774063,
      "learning_rate": 8.055905686131672e-05,
      "loss": 0.7188,
      "step": 3410
    },
    {
      "epoch": 0.7012025901942646,
      "grad_norm": 0.19298696517944336,
      "learning_rate": 8.055293479969702e-05,
      "loss": 0.6694,
      "step": 3411
    },
    {
      "epoch": 0.7014081611676432,
      "grad_norm": 0.1891012340784073,
      "learning_rate": 8.05468109865504e-05,
      "loss": 0.6817,
      "step": 3412
    },
    {
      "epoch": 0.7016137321410217,
      "grad_norm": 0.19800642132759094,
      "learning_rate": 8.054068542217854e-05,
      "loss": 0.6592,
      "step": 3413
    },
    {
      "epoch": 0.7018193031144002,
      "grad_norm": 0.18479777872562408,
      "learning_rate": 8.053455810688322e-05,
      "loss": 0.6702,
      "step": 3414
    },
    {
      "epoch": 0.7020248740877788,
      "grad_norm": 0.20111770927906036,
      "learning_rate": 8.052842904096631e-05,
      "loss": 0.7025,
      "step": 3415
    },
    {
      "epoch": 0.7022304450611574,
      "grad_norm": 0.19288669526576996,
      "learning_rate": 8.052229822472977e-05,
      "loss": 0.6858,
      "step": 3416
    },
    {
      "epoch": 0.7024360160345359,
      "grad_norm": 0.2072620391845703,
      "learning_rate": 8.051616565847562e-05,
      "loss": 0.6998,
      "step": 3417
    },
    {
      "epoch": 0.7026415870079145,
      "grad_norm": 0.1882101595401764,
      "learning_rate": 8.051003134250601e-05,
      "loss": 0.6669,
      "step": 3418
    },
    {
      "epoch": 0.702847157981293,
      "grad_norm": 0.2227669060230255,
      "learning_rate": 8.050389527712312e-05,
      "loss": 0.6115,
      "step": 3419
    },
    {
      "epoch": 0.7030527289546716,
      "grad_norm": 0.1958729773759842,
      "learning_rate": 8.049775746262924e-05,
      "loss": 0.7012,
      "step": 3420
    },
    {
      "epoch": 0.7032582999280501,
      "grad_norm": 0.14937171339988708,
      "learning_rate": 8.049161789932677e-05,
      "loss": 0.6124,
      "step": 3421
    },
    {
      "epoch": 0.7034638709014287,
      "grad_norm": 0.16276027262210846,
      "learning_rate": 8.048547658751817e-05,
      "loss": 0.5928,
      "step": 3422
    },
    {
      "epoch": 0.7036694418748073,
      "grad_norm": 0.15098173916339874,
      "learning_rate": 8.047933352750601e-05,
      "loss": 0.6122,
      "step": 3423
    },
    {
      "epoch": 0.7038750128481859,
      "grad_norm": 0.20423725247383118,
      "learning_rate": 8.047318871959292e-05,
      "loss": 0.6988,
      "step": 3424
    },
    {
      "epoch": 0.7040805838215644,
      "grad_norm": 0.19810713827610016,
      "learning_rate": 8.046704216408161e-05,
      "loss": 0.6585,
      "step": 3425
    },
    {
      "epoch": 0.7042861547949429,
      "grad_norm": 0.21174119412899017,
      "learning_rate": 8.046089386127491e-05,
      "loss": 0.5926,
      "step": 3426
    },
    {
      "epoch": 0.7044917257683215,
      "grad_norm": 0.18921788036823273,
      "learning_rate": 8.045474381147572e-05,
      "loss": 0.663,
      "step": 3427
    },
    {
      "epoch": 0.7046972967417001,
      "grad_norm": 0.21867318451404572,
      "learning_rate": 8.044859201498701e-05,
      "loss": 0.6619,
      "step": 3428
    },
    {
      "epoch": 0.7049028677150786,
      "grad_norm": 0.18937045335769653,
      "learning_rate": 8.044243847211186e-05,
      "loss": 0.6972,
      "step": 3429
    },
    {
      "epoch": 0.7051084386884572,
      "grad_norm": 0.20421583950519562,
      "learning_rate": 8.043628318315343e-05,
      "loss": 0.6855,
      "step": 3430
    },
    {
      "epoch": 0.7053140096618358,
      "grad_norm": 0.20946352183818817,
      "learning_rate": 8.043012614841493e-05,
      "loss": 0.5986,
      "step": 3431
    },
    {
      "epoch": 0.7055195806352144,
      "grad_norm": 0.21439684927463531,
      "learning_rate": 8.042396736819974e-05,
      "loss": 0.6642,
      "step": 3432
    },
    {
      "epoch": 0.7057251516085928,
      "grad_norm": 0.1428326517343521,
      "learning_rate": 8.041780684281124e-05,
      "loss": 0.5734,
      "step": 3433
    },
    {
      "epoch": 0.7059307225819714,
      "grad_norm": 0.21994005143642426,
      "learning_rate": 8.041164457255295e-05,
      "loss": 0.6916,
      "step": 3434
    },
    {
      "epoch": 0.70613629355535,
      "grad_norm": 0.19378912448883057,
      "learning_rate": 8.040548055772843e-05,
      "loss": 0.6845,
      "step": 3435
    },
    {
      "epoch": 0.7063418645287285,
      "grad_norm": 0.14617706835269928,
      "learning_rate": 8.039931479864138e-05,
      "loss": 0.5823,
      "step": 3436
    },
    {
      "epoch": 0.7065474355021071,
      "grad_norm": 0.2063405066728592,
      "learning_rate": 8.039314729559553e-05,
      "loss": 0.7163,
      "step": 3437
    },
    {
      "epoch": 0.7067530064754857,
      "grad_norm": 0.20391802489757538,
      "learning_rate": 8.038697804889476e-05,
      "loss": 0.6825,
      "step": 3438
    },
    {
      "epoch": 0.7069585774488643,
      "grad_norm": 0.1884995549917221,
      "learning_rate": 8.038080705884297e-05,
      "loss": 0.7005,
      "step": 3439
    },
    {
      "epoch": 0.7071641484222427,
      "grad_norm": 0.15203148126602173,
      "learning_rate": 8.03746343257442e-05,
      "loss": 0.5766,
      "step": 3440
    },
    {
      "epoch": 0.7073697193956213,
      "grad_norm": 0.1965416520833969,
      "learning_rate": 8.036845984990251e-05,
      "loss": 0.6746,
      "step": 3441
    },
    {
      "epoch": 0.7075752903689999,
      "grad_norm": 0.19438838958740234,
      "learning_rate": 8.036228363162214e-05,
      "loss": 0.68,
      "step": 3442
    },
    {
      "epoch": 0.7077808613423785,
      "grad_norm": 0.19313882291316986,
      "learning_rate": 8.035610567120731e-05,
      "loss": 0.6638,
      "step": 3443
    },
    {
      "epoch": 0.707986432315757,
      "grad_norm": 0.19299215078353882,
      "learning_rate": 8.034992596896244e-05,
      "loss": 0.6862,
      "step": 3444
    },
    {
      "epoch": 0.7081920032891356,
      "grad_norm": 0.20329324901103973,
      "learning_rate": 8.034374452519193e-05,
      "loss": 0.6824,
      "step": 3445
    },
    {
      "epoch": 0.7083975742625142,
      "grad_norm": 0.18780893087387085,
      "learning_rate": 8.033756134020032e-05,
      "loss": 0.662,
      "step": 3446
    },
    {
      "epoch": 0.7086031452358927,
      "grad_norm": 0.19197134673595428,
      "learning_rate": 8.033137641429223e-05,
      "loss": 0.6791,
      "step": 3447
    },
    {
      "epoch": 0.7088087162092712,
      "grad_norm": 0.19330036640167236,
      "learning_rate": 8.032518974777236e-05,
      "loss": 0.6907,
      "step": 3448
    },
    {
      "epoch": 0.7090142871826498,
      "grad_norm": 0.19305558502674103,
      "learning_rate": 8.03190013409455e-05,
      "loss": 0.6755,
      "step": 3449
    },
    {
      "epoch": 0.7092198581560284,
      "grad_norm": 0.17885883152484894,
      "learning_rate": 8.031281119411653e-05,
      "loss": 0.6032,
      "step": 3450
    },
    {
      "epoch": 0.709425429129407,
      "grad_norm": 0.19554337859153748,
      "learning_rate": 8.030661930759041e-05,
      "loss": 0.6943,
      "step": 3451
    },
    {
      "epoch": 0.7096310001027855,
      "grad_norm": 0.19464746117591858,
      "learning_rate": 8.030042568167216e-05,
      "loss": 0.6655,
      "step": 3452
    },
    {
      "epoch": 0.709836571076164,
      "grad_norm": 0.19761775434017181,
      "learning_rate": 8.029423031666694e-05,
      "loss": 0.6915,
      "step": 3453
    },
    {
      "epoch": 0.7100421420495426,
      "grad_norm": 0.20174358785152435,
      "learning_rate": 8.028803321287997e-05,
      "loss": 0.6715,
      "step": 3454
    },
    {
      "epoch": 0.7102477130229211,
      "grad_norm": 0.19728273153305054,
      "learning_rate": 8.028183437061653e-05,
      "loss": 0.7062,
      "step": 3455
    },
    {
      "epoch": 0.7104532839962997,
      "grad_norm": 0.1927875429391861,
      "learning_rate": 8.027563379018202e-05,
      "loss": 0.6685,
      "step": 3456
    },
    {
      "epoch": 0.7106588549696783,
      "grad_norm": 0.16123135387897491,
      "learning_rate": 8.02694314718819e-05,
      "loss": 0.5778,
      "step": 3457
    },
    {
      "epoch": 0.7108644259430569,
      "grad_norm": 0.1330617517232895,
      "learning_rate": 8.026322741602176e-05,
      "loss": 0.5941,
      "step": 3458
    },
    {
      "epoch": 0.7110699969164354,
      "grad_norm": 0.24413903057575226,
      "learning_rate": 8.025702162290721e-05,
      "loss": 0.6845,
      "step": 3459
    },
    {
      "epoch": 0.7112755678898139,
      "grad_norm": 0.21330687403678894,
      "learning_rate": 8.0250814092844e-05,
      "loss": 0.6724,
      "step": 3460
    },
    {
      "epoch": 0.7114811388631925,
      "grad_norm": 0.21365886926651,
      "learning_rate": 8.024460482613793e-05,
      "loss": 0.6668,
      "step": 3461
    },
    {
      "epoch": 0.7116867098365711,
      "grad_norm": 0.2229931354522705,
      "learning_rate": 8.023839382309493e-05,
      "loss": 0.6628,
      "step": 3462
    },
    {
      "epoch": 0.7118922808099496,
      "grad_norm": 0.21787157654762268,
      "learning_rate": 8.023218108402096e-05,
      "loss": 0.6776,
      "step": 3463
    },
    {
      "epoch": 0.7120978517833282,
      "grad_norm": 0.19112589955329895,
      "learning_rate": 8.022596660922212e-05,
      "loss": 0.5856,
      "step": 3464
    },
    {
      "epoch": 0.7123034227567068,
      "grad_norm": 0.20584847033023834,
      "learning_rate": 8.021975039900453e-05,
      "loss": 0.6659,
      "step": 3465
    },
    {
      "epoch": 0.7125089937300854,
      "grad_norm": 0.13937044143676758,
      "learning_rate": 8.021353245367445e-05,
      "loss": 0.581,
      "step": 3466
    },
    {
      "epoch": 0.7127145647034638,
      "grad_norm": 0.21949850022792816,
      "learning_rate": 8.020731277353824e-05,
      "loss": 0.6818,
      "step": 3467
    },
    {
      "epoch": 0.7129201356768424,
      "grad_norm": 0.19672751426696777,
      "learning_rate": 8.020109135890227e-05,
      "loss": 0.6788,
      "step": 3468
    },
    {
      "epoch": 0.713125706650221,
      "grad_norm": 0.18057693541049957,
      "learning_rate": 8.019486821007307e-05,
      "loss": 0.5962,
      "step": 3469
    },
    {
      "epoch": 0.7133312776235996,
      "grad_norm": 0.20432183146476746,
      "learning_rate": 8.01886433273572e-05,
      "loss": 0.6854,
      "step": 3470
    },
    {
      "epoch": 0.7135368485969781,
      "grad_norm": 0.20442970097064972,
      "learning_rate": 8.018241671106135e-05,
      "loss": 0.6755,
      "step": 3471
    },
    {
      "epoch": 0.7137424195703567,
      "grad_norm": 0.1377362608909607,
      "learning_rate": 8.017618836149227e-05,
      "loss": 0.5924,
      "step": 3472
    },
    {
      "epoch": 0.7139479905437353,
      "grad_norm": 0.20388440787792206,
      "learning_rate": 8.01699582789568e-05,
      "loss": 0.6946,
      "step": 3473
    },
    {
      "epoch": 0.7141535615171137,
      "grad_norm": 0.2007599174976349,
      "learning_rate": 8.016372646376188e-05,
      "loss": 0.6916,
      "step": 3474
    },
    {
      "epoch": 0.7143591324904923,
      "grad_norm": 0.1868349313735962,
      "learning_rate": 8.015749291621449e-05,
      "loss": 0.6758,
      "step": 3475
    },
    {
      "epoch": 0.7145647034638709,
      "grad_norm": 0.20039929449558258,
      "learning_rate": 8.015125763662177e-05,
      "loss": 0.6769,
      "step": 3476
    },
    {
      "epoch": 0.7147702744372495,
      "grad_norm": 0.1937168687582016,
      "learning_rate": 8.014502062529089e-05,
      "loss": 0.6572,
      "step": 3477
    },
    {
      "epoch": 0.714975845410628,
      "grad_norm": 0.16396324336528778,
      "learning_rate": 8.013878188252908e-05,
      "loss": 0.5781,
      "step": 3478
    },
    {
      "epoch": 0.7151814163840066,
      "grad_norm": 0.19520901143550873,
      "learning_rate": 8.013254140864376e-05,
      "loss": 0.7001,
      "step": 3479
    },
    {
      "epoch": 0.7153869873573852,
      "grad_norm": 0.1290317177772522,
      "learning_rate": 8.012629920394231e-05,
      "loss": 0.5826,
      "step": 3480
    },
    {
      "epoch": 0.7155925583307637,
      "grad_norm": 0.20711787045001984,
      "learning_rate": 8.012005526873228e-05,
      "loss": 0.7025,
      "step": 3481
    },
    {
      "epoch": 0.7157981293041422,
      "grad_norm": 0.20414526760578156,
      "learning_rate": 8.011380960332128e-05,
      "loss": 0.6697,
      "step": 3482
    },
    {
      "epoch": 0.7160037002775208,
      "grad_norm": 0.19431988894939423,
      "learning_rate": 8.010756220801702e-05,
      "loss": 0.6705,
      "step": 3483
    },
    {
      "epoch": 0.7162092712508994,
      "grad_norm": 0.1636938601732254,
      "learning_rate": 8.010131308312725e-05,
      "loss": 0.5727,
      "step": 3484
    },
    {
      "epoch": 0.716414842224278,
      "grad_norm": 0.19284431636333466,
      "learning_rate": 8.009506222895984e-05,
      "loss": 0.6772,
      "step": 3485
    },
    {
      "epoch": 0.7166204131976565,
      "grad_norm": 0.19347639381885529,
      "learning_rate": 8.008880964582275e-05,
      "loss": 0.6934,
      "step": 3486
    },
    {
      "epoch": 0.716825984171035,
      "grad_norm": 0.12324893474578857,
      "learning_rate": 8.008255533402403e-05,
      "loss": 0.5841,
      "step": 3487
    },
    {
      "epoch": 0.7170315551444136,
      "grad_norm": 0.12979742884635925,
      "learning_rate": 8.007629929387176e-05,
      "loss": 0.5726,
      "step": 3488
    },
    {
      "epoch": 0.7172371261177922,
      "grad_norm": 0.19342902302742004,
      "learning_rate": 8.007004152567417e-05,
      "loss": 0.6887,
      "step": 3489
    },
    {
      "epoch": 0.7174426970911707,
      "grad_norm": 0.13253627717494965,
      "learning_rate": 8.006378202973959e-05,
      "loss": 0.5835,
      "step": 3490
    },
    {
      "epoch": 0.7176482680645493,
      "grad_norm": 0.2006087452173233,
      "learning_rate": 8.005752080637632e-05,
      "loss": 0.6998,
      "step": 3491
    },
    {
      "epoch": 0.7178538390379279,
      "grad_norm": 0.12888813018798828,
      "learning_rate": 8.005125785589286e-05,
      "loss": 0.595,
      "step": 3492
    },
    {
      "epoch": 0.7180594100113064,
      "grad_norm": 0.1942748874425888,
      "learning_rate": 8.004499317859776e-05,
      "loss": 0.683,
      "step": 3493
    },
    {
      "epoch": 0.7182649809846849,
      "grad_norm": 0.18737460672855377,
      "learning_rate": 8.003872677479965e-05,
      "loss": 0.6861,
      "step": 3494
    },
    {
      "epoch": 0.7184705519580635,
      "grad_norm": 0.24117667973041534,
      "learning_rate": 8.003245864480724e-05,
      "loss": 0.6826,
      "step": 3495
    },
    {
      "epoch": 0.7186761229314421,
      "grad_norm": 0.19393832981586456,
      "learning_rate": 8.002618878892934e-05,
      "loss": 0.6682,
      "step": 3496
    },
    {
      "epoch": 0.7188816939048206,
      "grad_norm": 0.19202245771884918,
      "learning_rate": 8.001991720747481e-05,
      "loss": 0.683,
      "step": 3497
    },
    {
      "epoch": 0.7190872648781992,
      "grad_norm": 0.18830347061157227,
      "learning_rate": 8.001364390075266e-05,
      "loss": 0.6762,
      "step": 3498
    },
    {
      "epoch": 0.7192928358515778,
      "grad_norm": 0.18478117883205414,
      "learning_rate": 8.000736886907193e-05,
      "loss": 0.673,
      "step": 3499
    },
    {
      "epoch": 0.7194984068249564,
      "grad_norm": 0.19119176268577576,
      "learning_rate": 8.000109211274176e-05,
      "loss": 0.6683,
      "step": 3500
    },
    {
      "epoch": 0.7197039777983348,
      "grad_norm": 0.18504808843135834,
      "learning_rate": 7.999481363207136e-05,
      "loss": 0.6671,
      "step": 3501
    },
    {
      "epoch": 0.7199095487717134,
      "grad_norm": 0.18554535508155823,
      "learning_rate": 7.998853342737007e-05,
      "loss": 0.6531,
      "step": 3502
    },
    {
      "epoch": 0.720115119745092,
      "grad_norm": 0.20063155889511108,
      "learning_rate": 7.998225149894729e-05,
      "loss": 0.6826,
      "step": 3503
    },
    {
      "epoch": 0.7203206907184706,
      "grad_norm": 0.18054603040218353,
      "learning_rate": 7.997596784711245e-05,
      "loss": 0.6657,
      "step": 3504
    },
    {
      "epoch": 0.7205262616918491,
      "grad_norm": 0.19543704390525818,
      "learning_rate": 7.996968247217517e-05,
      "loss": 0.7077,
      "step": 3505
    },
    {
      "epoch": 0.7207318326652277,
      "grad_norm": 0.196107417345047,
      "learning_rate": 7.996339537444508e-05,
      "loss": 0.6607,
      "step": 3506
    },
    {
      "epoch": 0.7209374036386063,
      "grad_norm": 0.1699989140033722,
      "learning_rate": 7.995710655423193e-05,
      "loss": 0.5965,
      "step": 3507
    },
    {
      "epoch": 0.7211429746119847,
      "grad_norm": 0.13372716307640076,
      "learning_rate": 7.995081601184552e-05,
      "loss": 0.5885,
      "step": 3508
    },
    {
      "epoch": 0.7213485455853633,
      "grad_norm": 0.2239861637353897,
      "learning_rate": 7.994452374759577e-05,
      "loss": 0.6822,
      "step": 3509
    },
    {
      "epoch": 0.7215541165587419,
      "grad_norm": 0.20403791964054108,
      "learning_rate": 7.993822976179265e-05,
      "loss": 0.6794,
      "step": 3510
    },
    {
      "epoch": 0.7217596875321205,
      "grad_norm": 0.18789462745189667,
      "learning_rate": 7.993193405474626e-05,
      "loss": 0.6642,
      "step": 3511
    },
    {
      "epoch": 0.721965258505499,
      "grad_norm": 0.1892167031764984,
      "learning_rate": 7.992563662676676e-05,
      "loss": 0.6768,
      "step": 3512
    },
    {
      "epoch": 0.7221708294788776,
      "grad_norm": 0.19989047944545746,
      "learning_rate": 7.991933747816437e-05,
      "loss": 0.7015,
      "step": 3513
    },
    {
      "epoch": 0.7223764004522562,
      "grad_norm": 0.19818507134914398,
      "learning_rate": 7.991303660924944e-05,
      "loss": 0.6459,
      "step": 3514
    },
    {
      "epoch": 0.7225819714256347,
      "grad_norm": 0.20084840059280396,
      "learning_rate": 7.990673402033238e-05,
      "loss": 0.6967,
      "step": 3515
    },
    {
      "epoch": 0.7227875423990132,
      "grad_norm": 0.19589127600193024,
      "learning_rate": 7.990042971172369e-05,
      "loss": 0.6819,
      "step": 3516
    },
    {
      "epoch": 0.7229931133723918,
      "grad_norm": 0.2054595798254013,
      "learning_rate": 7.989412368373395e-05,
      "loss": 0.5563,
      "step": 3517
    },
    {
      "epoch": 0.7231986843457704,
      "grad_norm": 0.16840699315071106,
      "learning_rate": 7.988781593667382e-05,
      "loss": 0.5998,
      "step": 3518
    },
    {
      "epoch": 0.723404255319149,
      "grad_norm": 0.20174477994441986,
      "learning_rate": 7.988150647085408e-05,
      "loss": 0.6767,
      "step": 3519
    },
    {
      "epoch": 0.7236098262925275,
      "grad_norm": 0.2114832103252411,
      "learning_rate": 7.987519528658556e-05,
      "loss": 0.674,
      "step": 3520
    },
    {
      "epoch": 0.723815397265906,
      "grad_norm": 0.20603235065937042,
      "learning_rate": 7.986888238417915e-05,
      "loss": 0.6922,
      "step": 3521
    },
    {
      "epoch": 0.7240209682392846,
      "grad_norm": 0.19396202266216278,
      "learning_rate": 7.98625677639459e-05,
      "loss": 0.6542,
      "step": 3522
    },
    {
      "epoch": 0.7242265392126632,
      "grad_norm": 0.19188427925109863,
      "learning_rate": 7.985625142619688e-05,
      "loss": 0.6423,
      "step": 3523
    },
    {
      "epoch": 0.7244321101860417,
      "grad_norm": 0.24525907635688782,
      "learning_rate": 7.984993337124326e-05,
      "loss": 0.5969,
      "step": 3524
    },
    {
      "epoch": 0.7246376811594203,
      "grad_norm": 0.22921410202980042,
      "learning_rate": 7.984361359939632e-05,
      "loss": 0.6787,
      "step": 3525
    },
    {
      "epoch": 0.7248432521327989,
      "grad_norm": 0.23027624189853668,
      "learning_rate": 7.98372921109674e-05,
      "loss": 0.6958,
      "step": 3526
    },
    {
      "epoch": 0.7250488231061774,
      "grad_norm": 0.21798734366893768,
      "learning_rate": 7.983096890626792e-05,
      "loss": 0.7058,
      "step": 3527
    },
    {
      "epoch": 0.7252543940795559,
      "grad_norm": 0.1834592968225479,
      "learning_rate": 7.98246439856094e-05,
      "loss": 0.5576,
      "step": 3528
    },
    {
      "epoch": 0.7254599650529345,
      "grad_norm": 0.20253108441829681,
      "learning_rate": 7.981831734930344e-05,
      "loss": 0.6919,
      "step": 3529
    },
    {
      "epoch": 0.7256655360263131,
      "grad_norm": 0.2038789689540863,
      "learning_rate": 7.981198899766173e-05,
      "loss": 0.7226,
      "step": 3530
    },
    {
      "epoch": 0.7258711069996916,
      "grad_norm": 0.19789783656597137,
      "learning_rate": 7.980565893099604e-05,
      "loss": 0.6876,
      "step": 3531
    },
    {
      "epoch": 0.7260766779730702,
      "grad_norm": 0.14825506508350372,
      "learning_rate": 7.97993271496182e-05,
      "loss": 0.5838,
      "step": 3532
    },
    {
      "epoch": 0.7262822489464488,
      "grad_norm": 0.19643041491508484,
      "learning_rate": 7.979299365384017e-05,
      "loss": 0.6868,
      "step": 3533
    },
    {
      "epoch": 0.7264878199198274,
      "grad_norm": 0.20128373801708221,
      "learning_rate": 7.978665844397397e-05,
      "loss": 0.683,
      "step": 3534
    },
    {
      "epoch": 0.7266933908932058,
      "grad_norm": 0.2025127112865448,
      "learning_rate": 7.978032152033169e-05,
      "loss": 0.6801,
      "step": 3535
    },
    {
      "epoch": 0.7268989618665844,
      "grad_norm": 0.19767989218235016,
      "learning_rate": 7.977398288322554e-05,
      "loss": 0.6735,
      "step": 3536
    },
    {
      "epoch": 0.727104532839963,
      "grad_norm": 0.1649659425020218,
      "learning_rate": 7.976764253296779e-05,
      "loss": 0.5818,
      "step": 3537
    },
    {
      "epoch": 0.7273101038133416,
      "grad_norm": 0.20704413950443268,
      "learning_rate": 7.976130046987078e-05,
      "loss": 0.7032,
      "step": 3538
    },
    {
      "epoch": 0.7275156747867201,
      "grad_norm": 0.20047134160995483,
      "learning_rate": 7.975495669424698e-05,
      "loss": 0.6851,
      "step": 3539
    },
    {
      "epoch": 0.7277212457600987,
      "grad_norm": 0.14262793958187103,
      "learning_rate": 7.974861120640891e-05,
      "loss": 0.5911,
      "step": 3540
    },
    {
      "epoch": 0.7279268167334773,
      "grad_norm": 0.19910430908203125,
      "learning_rate": 7.974226400666918e-05,
      "loss": 0.6729,
      "step": 3541
    },
    {
      "epoch": 0.7281323877068558,
      "grad_norm": 0.1975426971912384,
      "learning_rate": 7.973591509534048e-05,
      "loss": 0.6614,
      "step": 3542
    },
    {
      "epoch": 0.7283379586802343,
      "grad_norm": 0.18741396069526672,
      "learning_rate": 7.972956447273561e-05,
      "loss": 0.6808,
      "step": 3543
    },
    {
      "epoch": 0.7285435296536129,
      "grad_norm": 0.19174180924892426,
      "learning_rate": 7.972321213916742e-05,
      "loss": 0.6732,
      "step": 3544
    },
    {
      "epoch": 0.7287491006269915,
      "grad_norm": 0.18941205739974976,
      "learning_rate": 7.971685809494886e-05,
      "loss": 0.6854,
      "step": 3545
    },
    {
      "epoch": 0.72895467160037,
      "grad_norm": 0.18745878338813782,
      "learning_rate": 7.971050234039298e-05,
      "loss": 0.6653,
      "step": 3546
    },
    {
      "epoch": 0.7291602425737486,
      "grad_norm": 0.2130347341299057,
      "learning_rate": 7.970414487581287e-05,
      "loss": 0.6932,
      "step": 3547
    },
    {
      "epoch": 0.7293658135471272,
      "grad_norm": 0.18765027821063995,
      "learning_rate": 7.969778570152175e-05,
      "loss": 0.6639,
      "step": 3548
    },
    {
      "epoch": 0.7295713845205057,
      "grad_norm": 0.1892290711402893,
      "learning_rate": 7.969142481783291e-05,
      "loss": 0.6788,
      "step": 3549
    },
    {
      "epoch": 0.7297769554938842,
      "grad_norm": 0.19938233494758606,
      "learning_rate": 7.968506222505972e-05,
      "loss": 0.6736,
      "step": 3550
    },
    {
      "epoch": 0.7299825264672628,
      "grad_norm": 0.19479283690452576,
      "learning_rate": 7.967869792351563e-05,
      "loss": 0.671,
      "step": 3551
    },
    {
      "epoch": 0.7301880974406414,
      "grad_norm": 0.18895529210567474,
      "learning_rate": 7.967233191351418e-05,
      "loss": 0.6559,
      "step": 3552
    },
    {
      "epoch": 0.73039366841402,
      "grad_norm": 0.19964531064033508,
      "learning_rate": 7.966596419536899e-05,
      "loss": 0.6772,
      "step": 3553
    },
    {
      "epoch": 0.7305992393873985,
      "grad_norm": 0.1866195648908615,
      "learning_rate": 7.965959476939377e-05,
      "loss": 0.642,
      "step": 3554
    },
    {
      "epoch": 0.730804810360777,
      "grad_norm": 0.15533728897571564,
      "learning_rate": 7.965322363590232e-05,
      "loss": 0.5754,
      "step": 3555
    },
    {
      "epoch": 0.7310103813341556,
      "grad_norm": 0.19216640293598175,
      "learning_rate": 7.964685079520851e-05,
      "loss": 0.6827,
      "step": 3556
    },
    {
      "epoch": 0.7312159523075342,
      "grad_norm": 0.1994984894990921,
      "learning_rate": 7.96404762476263e-05,
      "loss": 0.6814,
      "step": 3557
    },
    {
      "epoch": 0.7314215232809127,
      "grad_norm": 0.34993866086006165,
      "learning_rate": 7.963409999346974e-05,
      "loss": 0.7039,
      "step": 3558
    },
    {
      "epoch": 0.7316270942542913,
      "grad_norm": 0.13572952151298523,
      "learning_rate": 7.962772203305295e-05,
      "loss": 0.5847,
      "step": 3559
    },
    {
      "epoch": 0.7318326652276699,
      "grad_norm": 0.21044890582561493,
      "learning_rate": 7.962134236669015e-05,
      "loss": 0.6852,
      "step": 3560
    },
    {
      "epoch": 0.7320382362010485,
      "grad_norm": 0.13309255242347717,
      "learning_rate": 7.961496099469562e-05,
      "loss": 0.5953,
      "step": 3561
    },
    {
      "epoch": 0.7322438071744269,
      "grad_norm": 0.19451969861984253,
      "learning_rate": 7.960857791738376e-05,
      "loss": 0.6785,
      "step": 3562
    },
    {
      "epoch": 0.7324493781478055,
      "grad_norm": 0.12751372158527374,
      "learning_rate": 7.960219313506901e-05,
      "loss": 0.6012,
      "step": 3563
    },
    {
      "epoch": 0.7326549491211841,
      "grad_norm": 0.19144867360591888,
      "learning_rate": 7.959580664806594e-05,
      "loss": 0.6883,
      "step": 3564
    },
    {
      "epoch": 0.7328605200945626,
      "grad_norm": 0.18746548891067505,
      "learning_rate": 7.958941845668921e-05,
      "loss": 0.6731,
      "step": 3565
    },
    {
      "epoch": 0.7330660910679412,
      "grad_norm": 0.7065462470054626,
      "learning_rate": 7.958302856125347e-05,
      "loss": 0.595,
      "step": 3566
    },
    {
      "epoch": 0.7332716620413198,
      "grad_norm": 0.1951018124818802,
      "learning_rate": 7.957663696207355e-05,
      "loss": 0.6601,
      "step": 3567
    },
    {
      "epoch": 0.7334772330146984,
      "grad_norm": 0.13065175712108612,
      "learning_rate": 7.957024365946436e-05,
      "loss": 0.5551,
      "step": 3568
    },
    {
      "epoch": 0.7336828039880768,
      "grad_norm": 0.21272675693035126,
      "learning_rate": 7.956384865374082e-05,
      "loss": 0.6846,
      "step": 3569
    },
    {
      "epoch": 0.7338883749614554,
      "grad_norm": 0.19540101289749146,
      "learning_rate": 7.955745194521802e-05,
      "loss": 0.6747,
      "step": 3570
    },
    {
      "epoch": 0.734093945934834,
      "grad_norm": 0.19584521651268005,
      "learning_rate": 7.95510535342111e-05,
      "loss": 0.6877,
      "step": 3571
    },
    {
      "epoch": 0.7342995169082126,
      "grad_norm": 0.19038638472557068,
      "learning_rate": 7.954465342103525e-05,
      "loss": 0.6776,
      "step": 3572
    },
    {
      "epoch": 0.7345050878815911,
      "grad_norm": 0.1913788616657257,
      "learning_rate": 7.953825160600579e-05,
      "loss": 0.6754,
      "step": 3573
    },
    {
      "epoch": 0.7347106588549697,
      "grad_norm": 0.19518351554870605,
      "learning_rate": 7.953184808943808e-05,
      "loss": 0.675,
      "step": 3574
    },
    {
      "epoch": 0.7349162298283483,
      "grad_norm": 0.19314491748809814,
      "learning_rate": 7.952544287164763e-05,
      "loss": 0.6771,
      "step": 3575
    },
    {
      "epoch": 0.7351218008017268,
      "grad_norm": 0.2056049257516861,
      "learning_rate": 7.951903595295e-05,
      "loss": 0.6825,
      "step": 3576
    },
    {
      "epoch": 0.7353273717751053,
      "grad_norm": 0.19159257411956787,
      "learning_rate": 7.95126273336608e-05,
      "loss": 0.6783,
      "step": 3577
    },
    {
      "epoch": 0.7355329427484839,
      "grad_norm": 0.1686679869890213,
      "learning_rate": 7.950621701409577e-05,
      "loss": 0.581,
      "step": 3578
    },
    {
      "epoch": 0.7357385137218625,
      "grad_norm": 0.14951810240745544,
      "learning_rate": 7.94998049945707e-05,
      "loss": 0.5694,
      "step": 3579
    },
    {
      "epoch": 0.7359440846952411,
      "grad_norm": 0.2037050724029541,
      "learning_rate": 7.949339127540149e-05,
      "loss": 0.6722,
      "step": 3580
    },
    {
      "epoch": 0.7361496556686196,
      "grad_norm": 0.15541227161884308,
      "learning_rate": 7.948697585690412e-05,
      "loss": 0.6053,
      "step": 3581
    },
    {
      "epoch": 0.7363552266419982,
      "grad_norm": 0.20057538151741028,
      "learning_rate": 7.948055873939463e-05,
      "loss": 0.6745,
      "step": 3582
    },
    {
      "epoch": 0.7365607976153767,
      "grad_norm": 0.19490864872932434,
      "learning_rate": 7.947413992318918e-05,
      "loss": 0.6963,
      "step": 3583
    },
    {
      "epoch": 0.7367663685887552,
      "grad_norm": 0.19570674002170563,
      "learning_rate": 7.946771940860398e-05,
      "loss": 0.6913,
      "step": 3584
    },
    {
      "epoch": 0.7369719395621338,
      "grad_norm": 0.18625394999980927,
      "learning_rate": 7.946129719595535e-05,
      "loss": 0.6699,
      "step": 3585
    },
    {
      "epoch": 0.7371775105355124,
      "grad_norm": 0.6736593246459961,
      "learning_rate": 7.945487328555969e-05,
      "loss": 0.5934,
      "step": 3586
    },
    {
      "epoch": 0.737383081508891,
      "grad_norm": 0.1934710294008255,
      "learning_rate": 7.944844767773344e-05,
      "loss": 0.672,
      "step": 3587
    },
    {
      "epoch": 0.7375886524822695,
      "grad_norm": 0.20478187501430511,
      "learning_rate": 7.944202037279322e-05,
      "loss": 0.6703,
      "step": 3588
    },
    {
      "epoch": 0.737794223455648,
      "grad_norm": 0.1952143758535385,
      "learning_rate": 7.94355913710556e-05,
      "loss": 0.665,
      "step": 3589
    },
    {
      "epoch": 0.7379997944290266,
      "grad_norm": 0.2044733166694641,
      "learning_rate": 7.942916067283737e-05,
      "loss": 0.6705,
      "step": 3590
    },
    {
      "epoch": 0.7382053654024052,
      "grad_norm": 0.1511656492948532,
      "learning_rate": 7.942272827845531e-05,
      "loss": 0.5709,
      "step": 3591
    },
    {
      "epoch": 0.7384109363757837,
      "grad_norm": 0.20712168514728546,
      "learning_rate": 7.941629418822631e-05,
      "loss": 0.6822,
      "step": 3592
    },
    {
      "epoch": 0.7386165073491623,
      "grad_norm": 0.18875378370285034,
      "learning_rate": 7.940985840246738e-05,
      "loss": 0.6657,
      "step": 3593
    },
    {
      "epoch": 0.7388220783225409,
      "grad_norm": 0.20335470139980316,
      "learning_rate": 7.940342092149552e-05,
      "loss": 0.6803,
      "step": 3594
    },
    {
      "epoch": 0.7390276492959195,
      "grad_norm": 0.19990339875221252,
      "learning_rate": 7.939698174562795e-05,
      "loss": 0.6633,
      "step": 3595
    },
    {
      "epoch": 0.7392332202692979,
      "grad_norm": 0.19923284649848938,
      "learning_rate": 7.939054087518184e-05,
      "loss": 0.6894,
      "step": 3596
    },
    {
      "epoch": 0.7394387912426765,
      "grad_norm": 0.20602424442768097,
      "learning_rate": 7.938409831047452e-05,
      "loss": 0.7057,
      "step": 3597
    },
    {
      "epoch": 0.7396443622160551,
      "grad_norm": 0.19284965097904205,
      "learning_rate": 7.93776540518234e-05,
      "loss": 0.6619,
      "step": 3598
    },
    {
      "epoch": 0.7398499331894337,
      "grad_norm": 0.18483732640743256,
      "learning_rate": 7.937120809954593e-05,
      "loss": 0.664,
      "step": 3599
    },
    {
      "epoch": 0.7400555041628122,
      "grad_norm": 0.19070151448249817,
      "learning_rate": 7.93647604539597e-05,
      "loss": 0.6934,
      "step": 3600
    },
    {
      "epoch": 0.7402610751361908,
      "grad_norm": 0.1932380348443985,
      "learning_rate": 7.935831111538234e-05,
      "loss": 0.6692,
      "step": 3601
    },
    {
      "epoch": 0.7404666461095694,
      "grad_norm": 0.1923176348209381,
      "learning_rate": 7.935186008413158e-05,
      "loss": 0.6813,
      "step": 3602
    },
    {
      "epoch": 0.7406722170829478,
      "grad_norm": 0.19491972029209137,
      "learning_rate": 7.934540736052524e-05,
      "loss": 0.6571,
      "step": 3603
    },
    {
      "epoch": 0.7408777880563264,
      "grad_norm": 0.19038790464401245,
      "learning_rate": 7.93389529448812e-05,
      "loss": 0.6627,
      "step": 3604
    },
    {
      "epoch": 0.741083359029705,
      "grad_norm": 0.1902906596660614,
      "learning_rate": 7.933249683751745e-05,
      "loss": 0.6792,
      "step": 3605
    },
    {
      "epoch": 0.7412889300030836,
      "grad_norm": 0.18056754767894745,
      "learning_rate": 7.932603903875205e-05,
      "loss": 0.6706,
      "step": 3606
    },
    {
      "epoch": 0.7414945009764621,
      "grad_norm": 0.19401055574417114,
      "learning_rate": 7.931957954890316e-05,
      "loss": 0.6997,
      "step": 3607
    },
    {
      "epoch": 0.7417000719498407,
      "grad_norm": 0.19308343529701233,
      "learning_rate": 7.931311836828898e-05,
      "loss": 0.6804,
      "step": 3608
    },
    {
      "epoch": 0.7419056429232193,
      "grad_norm": 0.20034140348434448,
      "learning_rate": 7.930665549722784e-05,
      "loss": 0.6672,
      "step": 3609
    },
    {
      "epoch": 0.7421112138965978,
      "grad_norm": 0.1429484337568283,
      "learning_rate": 7.930019093603813e-05,
      "loss": 0.5769,
      "step": 3610
    },
    {
      "epoch": 0.7423167848699763,
      "grad_norm": 0.19549964368343353,
      "learning_rate": 7.929372468503834e-05,
      "loss": 0.68,
      "step": 3611
    },
    {
      "epoch": 0.7425223558433549,
      "grad_norm": 0.1939014494419098,
      "learning_rate": 7.928725674454702e-05,
      "loss": 0.6436,
      "step": 3612
    },
    {
      "epoch": 0.7427279268167335,
      "grad_norm": 0.1987033188343048,
      "learning_rate": 7.928078711488281e-05,
      "loss": 0.6975,
      "step": 3613
    },
    {
      "epoch": 0.7429334977901121,
      "grad_norm": 0.19069653749465942,
      "learning_rate": 7.927431579636445e-05,
      "loss": 0.6744,
      "step": 3614
    },
    {
      "epoch": 0.7431390687634906,
      "grad_norm": 0.14583733677864075,
      "learning_rate": 7.926784278931075e-05,
      "loss": 0.587,
      "step": 3615
    },
    {
      "epoch": 0.7433446397368692,
      "grad_norm": 0.19307653605937958,
      "learning_rate": 7.926136809404063e-05,
      "loss": 0.6458,
      "step": 3616
    },
    {
      "epoch": 0.7435502107102477,
      "grad_norm": 0.19686581194400787,
      "learning_rate": 7.9254891710873e-05,
      "loss": 0.6936,
      "step": 3617
    },
    {
      "epoch": 0.7437557816836263,
      "grad_norm": 0.19272616505622864,
      "learning_rate": 7.924841364012698e-05,
      "loss": 0.6931,
      "step": 3618
    },
    {
      "epoch": 0.7439613526570048,
      "grad_norm": 0.1832963228225708,
      "learning_rate": 7.92419338821217e-05,
      "loss": 0.6543,
      "step": 3619
    },
    {
      "epoch": 0.7441669236303834,
      "grad_norm": 0.1948852688074112,
      "learning_rate": 7.923545243717638e-05,
      "loss": 0.6934,
      "step": 3620
    },
    {
      "epoch": 0.744372494603762,
      "grad_norm": 0.19358238577842712,
      "learning_rate": 7.922896930561034e-05,
      "loss": 0.6901,
      "step": 3621
    },
    {
      "epoch": 0.7445780655771405,
      "grad_norm": 0.18982093036174774,
      "learning_rate": 7.922248448774296e-05,
      "loss": 0.6832,
      "step": 3622
    },
    {
      "epoch": 0.744783636550519,
      "grad_norm": 0.19411057233810425,
      "learning_rate": 7.921599798389372e-05,
      "loss": 0.6899,
      "step": 3623
    },
    {
      "epoch": 0.7449892075238976,
      "grad_norm": 0.1885984092950821,
      "learning_rate": 7.92095097943822e-05,
      "loss": 0.6699,
      "step": 3624
    },
    {
      "epoch": 0.7451947784972762,
      "grad_norm": 0.19820182025432587,
      "learning_rate": 7.920301991952802e-05,
      "loss": 0.6872,
      "step": 3625
    },
    {
      "epoch": 0.7454003494706547,
      "grad_norm": 0.18656107783317566,
      "learning_rate": 7.91965283596509e-05,
      "loss": 0.6982,
      "step": 3626
    },
    {
      "epoch": 0.7456059204440333,
      "grad_norm": 0.14508990943431854,
      "learning_rate": 7.919003511507069e-05,
      "loss": 0.5908,
      "step": 3627
    },
    {
      "epoch": 0.7458114914174119,
      "grad_norm": 0.2058647722005844,
      "learning_rate": 7.918354018610723e-05,
      "loss": 0.6962,
      "step": 3628
    },
    {
      "epoch": 0.7460170623907905,
      "grad_norm": 0.20024776458740234,
      "learning_rate": 7.917704357308052e-05,
      "loss": 0.6748,
      "step": 3629
    },
    {
      "epoch": 0.7462226333641689,
      "grad_norm": 0.18803846836090088,
      "learning_rate": 7.917054527631062e-05,
      "loss": 0.6878,
      "step": 3630
    },
    {
      "epoch": 0.7464282043375475,
      "grad_norm": 0.18676309287548065,
      "learning_rate": 7.916404529611768e-05,
      "loss": 0.6497,
      "step": 3631
    },
    {
      "epoch": 0.7466337753109261,
      "grad_norm": 0.18984469771385193,
      "learning_rate": 7.915754363282189e-05,
      "loss": 0.667,
      "step": 3632
    },
    {
      "epoch": 0.7468393462843047,
      "grad_norm": 0.1905134618282318,
      "learning_rate": 7.915104028674359e-05,
      "loss": 0.7037,
      "step": 3633
    },
    {
      "epoch": 0.7470449172576832,
      "grad_norm": 0.19282597303390503,
      "learning_rate": 7.914453525820314e-05,
      "loss": 0.6825,
      "step": 3634
    },
    {
      "epoch": 0.7472504882310618,
      "grad_norm": 0.191225066781044,
      "learning_rate": 7.913802854752105e-05,
      "loss": 0.6693,
      "step": 3635
    },
    {
      "epoch": 0.7474560592044404,
      "grad_norm": 0.19597823917865753,
      "learning_rate": 7.913152015501785e-05,
      "loss": 0.6854,
      "step": 3636
    },
    {
      "epoch": 0.7476616301778188,
      "grad_norm": 0.19076837599277496,
      "learning_rate": 7.912501008101417e-05,
      "loss": 0.6669,
      "step": 3637
    },
    {
      "epoch": 0.7478672011511974,
      "grad_norm": 0.15839332342147827,
      "learning_rate": 7.911849832583075e-05,
      "loss": 0.5823,
      "step": 3638
    },
    {
      "epoch": 0.748072772124576,
      "grad_norm": 0.19790640473365784,
      "learning_rate": 7.91119848897884e-05,
      "loss": 0.6758,
      "step": 3639
    },
    {
      "epoch": 0.7482783430979546,
      "grad_norm": 0.20291505753993988,
      "learning_rate": 7.910546977320799e-05,
      "loss": 0.6858,
      "step": 3640
    },
    {
      "epoch": 0.7484839140713331,
      "grad_norm": 0.19537273049354553,
      "learning_rate": 7.909895297641047e-05,
      "loss": 0.6818,
      "step": 3641
    },
    {
      "epoch": 0.7486894850447117,
      "grad_norm": 0.14734981954097748,
      "learning_rate": 7.909243449971693e-05,
      "loss": 0.5743,
      "step": 3642
    },
    {
      "epoch": 0.7488950560180903,
      "grad_norm": 0.15119509398937225,
      "learning_rate": 7.90859143434485e-05,
      "loss": 0.5797,
      "step": 3643
    },
    {
      "epoch": 0.7491006269914688,
      "grad_norm": 0.23732592165470123,
      "learning_rate": 7.907939250792638e-05,
      "loss": 0.6841,
      "step": 3644
    },
    {
      "epoch": 0.7493061979648473,
      "grad_norm": 0.2022113800048828,
      "learning_rate": 7.907286899347187e-05,
      "loss": 0.707,
      "step": 3645
    },
    {
      "epoch": 0.7495117689382259,
      "grad_norm": 0.19698172807693481,
      "learning_rate": 7.906634380040636e-05,
      "loss": 0.6966,
      "step": 3646
    },
    {
      "epoch": 0.7497173399116045,
      "grad_norm": 0.21839676797389984,
      "learning_rate": 7.905981692905133e-05,
      "loss": 0.6853,
      "step": 3647
    },
    {
      "epoch": 0.7499229108849831,
      "grad_norm": 0.20229050517082214,
      "learning_rate": 7.90532883797283e-05,
      "loss": 0.659,
      "step": 3648
    },
    {
      "epoch": 0.7501284818583616,
      "grad_norm": 0.18536463379859924,
      "learning_rate": 7.904675815275894e-05,
      "loss": 0.6534,
      "step": 3649
    },
    {
      "epoch": 0.7503340528317402,
      "grad_norm": 0.20928248763084412,
      "learning_rate": 7.904022624846491e-05,
      "loss": 0.6913,
      "step": 3650
    },
    {
      "epoch": 0.7505396238051187,
      "grad_norm": 0.20999811589717865,
      "learning_rate": 7.903369266716806e-05,
      "loss": 0.654,
      "step": 3651
    },
    {
      "epoch": 0.7507451947784973,
      "grad_norm": 0.19690896570682526,
      "learning_rate": 7.902715740919023e-05,
      "loss": 0.5836,
      "step": 3652
    },
    {
      "epoch": 0.7509507657518758,
      "grad_norm": 0.1489873230457306,
      "learning_rate": 7.902062047485341e-05,
      "loss": 0.5822,
      "step": 3653
    },
    {
      "epoch": 0.7511563367252544,
      "grad_norm": 0.2375965416431427,
      "learning_rate": 7.901408186447962e-05,
      "loss": 0.6857,
      "step": 3654
    },
    {
      "epoch": 0.751361907698633,
      "grad_norm": 0.2292969673871994,
      "learning_rate": 7.9007541578391e-05,
      "loss": 0.6998,
      "step": 3655
    },
    {
      "epoch": 0.7515674786720115,
      "grad_norm": 0.1982121616601944,
      "learning_rate": 7.900099961690976e-05,
      "loss": 0.6853,
      "step": 3656
    },
    {
      "epoch": 0.75177304964539,
      "grad_norm": 0.21135136485099792,
      "learning_rate": 7.899445598035819e-05,
      "loss": 0.6663,
      "step": 3657
    },
    {
      "epoch": 0.7519786206187686,
      "grad_norm": 0.2433331459760666,
      "learning_rate": 7.898791066905866e-05,
      "loss": 0.603,
      "step": 3658
    },
    {
      "epoch": 0.7521841915921472,
      "grad_norm": 0.19841930270195007,
      "learning_rate": 7.898136368333363e-05,
      "loss": 0.6507,
      "step": 3659
    },
    {
      "epoch": 0.7523897625655257,
      "grad_norm": 0.20042434334754944,
      "learning_rate": 7.897481502350565e-05,
      "loss": 0.6522,
      "step": 3660
    },
    {
      "epoch": 0.7525953335389043,
      "grad_norm": 0.2082412987947464,
      "learning_rate": 7.896826468989731e-05,
      "loss": 0.682,
      "step": 3661
    },
    {
      "epoch": 0.7528009045122829,
      "grad_norm": 0.2017931491136551,
      "learning_rate": 7.896171268283136e-05,
      "loss": 0.6729,
      "step": 3662
    },
    {
      "epoch": 0.7530064754856615,
      "grad_norm": 0.1931910514831543,
      "learning_rate": 7.895515900263055e-05,
      "loss": 0.6525,
      "step": 3663
    },
    {
      "epoch": 0.7532120464590399,
      "grad_norm": 0.21447621285915375,
      "learning_rate": 7.894860364961778e-05,
      "loss": 0.689,
      "step": 3664
    },
    {
      "epoch": 0.7534176174324185,
      "grad_norm": 0.20270651578903198,
      "learning_rate": 7.894204662411595e-05,
      "loss": 0.6926,
      "step": 3665
    },
    {
      "epoch": 0.7536231884057971,
      "grad_norm": 0.1878805160522461,
      "learning_rate": 7.893548792644815e-05,
      "loss": 0.6721,
      "step": 3666
    },
    {
      "epoch": 0.7538287593791757,
      "grad_norm": 0.19181132316589355,
      "learning_rate": 7.892892755693747e-05,
      "loss": 0.6734,
      "step": 3667
    },
    {
      "epoch": 0.7540343303525542,
      "grad_norm": 0.19380466639995575,
      "learning_rate": 7.892236551590712e-05,
      "loss": 0.6621,
      "step": 3668
    },
    {
      "epoch": 0.7542399013259328,
      "grad_norm": 0.20492911338806152,
      "learning_rate": 7.891580180368036e-05,
      "loss": 0.6827,
      "step": 3669
    },
    {
      "epoch": 0.7544454722993114,
      "grad_norm": 0.18449199199676514,
      "learning_rate": 7.890923642058058e-05,
      "loss": 0.6666,
      "step": 3670
    },
    {
      "epoch": 0.75465104327269,
      "grad_norm": 0.18999159336090088,
      "learning_rate": 7.890266936693121e-05,
      "loss": 0.6498,
      "step": 3671
    },
    {
      "epoch": 0.7548566142460684,
      "grad_norm": 0.19277434051036835,
      "learning_rate": 7.889610064305578e-05,
      "loss": 0.6759,
      "step": 3672
    },
    {
      "epoch": 0.755062185219447,
      "grad_norm": 0.1884971410036087,
      "learning_rate": 7.888953024927789e-05,
      "loss": 0.6745,
      "step": 3673
    },
    {
      "epoch": 0.7552677561928256,
      "grad_norm": 0.19598397612571716,
      "learning_rate": 7.888295818592125e-05,
      "loss": 0.6803,
      "step": 3674
    },
    {
      "epoch": 0.7554733271662041,
      "grad_norm": 0.19982978701591492,
      "learning_rate": 7.887638445330962e-05,
      "loss": 0.6736,
      "step": 3675
    },
    {
      "epoch": 0.7556788981395827,
      "grad_norm": 0.19140852987766266,
      "learning_rate": 7.886980905176689e-05,
      "loss": 0.6659,
      "step": 3676
    },
    {
      "epoch": 0.7558844691129613,
      "grad_norm": 0.18775241076946259,
      "learning_rate": 7.886323198161695e-05,
      "loss": 0.67,
      "step": 3677
    },
    {
      "epoch": 0.7560900400863398,
      "grad_norm": 0.1859831064939499,
      "learning_rate": 7.885665324318386e-05,
      "loss": 0.6554,
      "step": 3678
    },
    {
      "epoch": 0.7562956110597183,
      "grad_norm": 0.19015206396579742,
      "learning_rate": 7.885007283679173e-05,
      "loss": 0.7039,
      "step": 3679
    },
    {
      "epoch": 0.7565011820330969,
      "grad_norm": 0.19563472270965576,
      "learning_rate": 7.884349076276469e-05,
      "loss": 0.6769,
      "step": 3680
    },
    {
      "epoch": 0.7567067530064755,
      "grad_norm": 0.2165932059288025,
      "learning_rate": 7.883690702142706e-05,
      "loss": 0.5897,
      "step": 3681
    },
    {
      "epoch": 0.7569123239798541,
      "grad_norm": 0.19110572338104248,
      "learning_rate": 7.883032161310318e-05,
      "loss": 0.6666,
      "step": 3682
    },
    {
      "epoch": 0.7571178949532326,
      "grad_norm": 0.2043447345495224,
      "learning_rate": 7.882373453811745e-05,
      "loss": 0.6633,
      "step": 3683
    },
    {
      "epoch": 0.7573234659266112,
      "grad_norm": 0.19598691165447235,
      "learning_rate": 7.881714579679444e-05,
      "loss": 0.6601,
      "step": 3684
    },
    {
      "epoch": 0.7575290368999897,
      "grad_norm": 0.16248776018619537,
      "learning_rate": 7.88105553894587e-05,
      "loss": 0.585,
      "step": 3685
    },
    {
      "epoch": 0.7577346078733683,
      "grad_norm": 0.1903761625289917,
      "learning_rate": 7.880396331643496e-05,
      "loss": 0.6702,
      "step": 3686
    },
    {
      "epoch": 0.7579401788467468,
      "grad_norm": 0.19729363918304443,
      "learning_rate": 7.87973695780479e-05,
      "loss": 0.6762,
      "step": 3687
    },
    {
      "epoch": 0.7581457498201254,
      "grad_norm": 0.20168879628181458,
      "learning_rate": 7.879077417462244e-05,
      "loss": 0.7108,
      "step": 3688
    },
    {
      "epoch": 0.758351320793504,
      "grad_norm": 0.18572981655597687,
      "learning_rate": 7.878417710648346e-05,
      "loss": 0.6516,
      "step": 3689
    },
    {
      "epoch": 0.7585568917668826,
      "grad_norm": 0.18781378865242004,
      "learning_rate": 7.8777578373956e-05,
      "loss": 0.6767,
      "step": 3690
    },
    {
      "epoch": 0.758762462740261,
      "grad_norm": 0.1998245269060135,
      "learning_rate": 7.877097797736511e-05,
      "loss": 0.6723,
      "step": 3691
    },
    {
      "epoch": 0.7589680337136396,
      "grad_norm": 0.22822120785713196,
      "learning_rate": 7.876437591703598e-05,
      "loss": 0.668,
      "step": 3692
    },
    {
      "epoch": 0.7591736046870182,
      "grad_norm": 0.19273287057876587,
      "learning_rate": 7.875777219329386e-05,
      "loss": 0.6699,
      "step": 3693
    },
    {
      "epoch": 0.7593791756603967,
      "grad_norm": 0.2089652717113495,
      "learning_rate": 7.875116680646411e-05,
      "loss": 0.6664,
      "step": 3694
    },
    {
      "epoch": 0.7595847466337753,
      "grad_norm": 0.1920463740825653,
      "learning_rate": 7.87445597568721e-05,
      "loss": 0.6731,
      "step": 3695
    },
    {
      "epoch": 0.7597903176071539,
      "grad_norm": 0.19104163348674774,
      "learning_rate": 7.873795104484337e-05,
      "loss": 0.6813,
      "step": 3696
    },
    {
      "epoch": 0.7599958885805325,
      "grad_norm": 0.15439750254154205,
      "learning_rate": 7.873134067070347e-05,
      "loss": 0.56,
      "step": 3697
    },
    {
      "epoch": 0.7602014595539109,
      "grad_norm": 0.19592773914337158,
      "learning_rate": 7.872472863477808e-05,
      "loss": 0.6858,
      "step": 3698
    },
    {
      "epoch": 0.7604070305272895,
      "grad_norm": 0.19534648954868317,
      "learning_rate": 7.871811493739294e-05,
      "loss": 0.681,
      "step": 3699
    },
    {
      "epoch": 0.7606126015006681,
      "grad_norm": 0.13310682773590088,
      "learning_rate": 7.871149957887387e-05,
      "loss": 0.5885,
      "step": 3700
    },
    {
      "epoch": 0.7608181724740467,
      "grad_norm": 0.19378095865249634,
      "learning_rate": 7.870488255954679e-05,
      "loss": 0.667,
      "step": 3701
    },
    {
      "epoch": 0.7610237434474252,
      "grad_norm": 0.19437304139137268,
      "learning_rate": 7.869826387973768e-05,
      "loss": 0.6729,
      "step": 3702
    },
    {
      "epoch": 0.7612293144208038,
      "grad_norm": 0.19552649557590485,
      "learning_rate": 7.869164353977261e-05,
      "loss": 0.668,
      "step": 3703
    },
    {
      "epoch": 0.7614348853941824,
      "grad_norm": 0.15091755986213684,
      "learning_rate": 7.868502153997774e-05,
      "loss": 0.5726,
      "step": 3704
    },
    {
      "epoch": 0.761640456367561,
      "grad_norm": 0.2120988517999649,
      "learning_rate": 7.867839788067931e-05,
      "loss": 0.69,
      "step": 3705
    },
    {
      "epoch": 0.7618460273409394,
      "grad_norm": 0.1858333796262741,
      "learning_rate": 7.867177256220362e-05,
      "loss": 0.677,
      "step": 3706
    },
    {
      "epoch": 0.762051598314318,
      "grad_norm": 0.1518946886062622,
      "learning_rate": 7.866514558487709e-05,
      "loss": 0.5866,
      "step": 3707
    },
    {
      "epoch": 0.7622571692876966,
      "grad_norm": 0.20156964659690857,
      "learning_rate": 7.865851694902617e-05,
      "loss": 0.6694,
      "step": 3708
    },
    {
      "epoch": 0.7624627402610752,
      "grad_norm": 0.19284150004386902,
      "learning_rate": 7.865188665497744e-05,
      "loss": 0.6577,
      "step": 3709
    },
    {
      "epoch": 0.7626683112344537,
      "grad_norm": 0.13599884510040283,
      "learning_rate": 7.864525470305756e-05,
      "loss": 0.5647,
      "step": 3710
    },
    {
      "epoch": 0.7628738822078323,
      "grad_norm": 0.20330367982387543,
      "learning_rate": 7.863862109359322e-05,
      "loss": 0.6663,
      "step": 3711
    },
    {
      "epoch": 0.7630794531812108,
      "grad_norm": 0.1969096064567566,
      "learning_rate": 7.863198582691125e-05,
      "loss": 0.6966,
      "step": 3712
    },
    {
      "epoch": 0.7632850241545893,
      "grad_norm": 0.20115163922309875,
      "learning_rate": 7.862534890333854e-05,
      "loss": 0.7011,
      "step": 3713
    },
    {
      "epoch": 0.7634905951279679,
      "grad_norm": 0.20134492218494415,
      "learning_rate": 7.861871032320206e-05,
      "loss": 0.6588,
      "step": 3714
    },
    {
      "epoch": 0.7636961661013465,
      "grad_norm": 0.18914572894573212,
      "learning_rate": 7.861207008682884e-05,
      "loss": 0.6581,
      "step": 3715
    },
    {
      "epoch": 0.7639017370747251,
      "grad_norm": 0.430144339799881,
      "learning_rate": 7.860542819454603e-05,
      "loss": 0.6026,
      "step": 3716
    },
    {
      "epoch": 0.7641073080481036,
      "grad_norm": 0.18655115365982056,
      "learning_rate": 7.859878464668086e-05,
      "loss": 0.6869,
      "step": 3717
    },
    {
      "epoch": 0.7643128790214821,
      "grad_norm": 0.19397111237049103,
      "learning_rate": 7.85921394435606e-05,
      "loss": 0.6888,
      "step": 3718
    },
    {
      "epoch": 0.7645184499948607,
      "grad_norm": 0.18396249413490295,
      "learning_rate": 7.858549258551263e-05,
      "loss": 0.6527,
      "step": 3719
    },
    {
      "epoch": 0.7647240209682393,
      "grad_norm": 0.17971353232860565,
      "learning_rate": 7.857884407286442e-05,
      "loss": 0.6879,
      "step": 3720
    },
    {
      "epoch": 0.7649295919416178,
      "grad_norm": 0.1879139393568039,
      "learning_rate": 7.857219390594353e-05,
      "loss": 0.6821,
      "step": 3721
    },
    {
      "epoch": 0.7651351629149964,
      "grad_norm": 0.1858903020620346,
      "learning_rate": 7.856554208507755e-05,
      "loss": 0.6818,
      "step": 3722
    },
    {
      "epoch": 0.765340733888375,
      "grad_norm": 0.1843085139989853,
      "learning_rate": 7.85588886105942e-05,
      "loss": 0.6661,
      "step": 3723
    },
    {
      "epoch": 0.7655463048617536,
      "grad_norm": 0.18377020955085754,
      "learning_rate": 7.855223348282126e-05,
      "loss": 0.6742,
      "step": 3724
    },
    {
      "epoch": 0.765751875835132,
      "grad_norm": 0.1833381950855255,
      "learning_rate": 7.854557670208659e-05,
      "loss": 0.6676,
      "step": 3725
    },
    {
      "epoch": 0.7659574468085106,
      "grad_norm": 0.19020181894302368,
      "learning_rate": 7.853891826871816e-05,
      "loss": 0.6742,
      "step": 3726
    },
    {
      "epoch": 0.7661630177818892,
      "grad_norm": 0.18213771283626556,
      "learning_rate": 7.853225818304398e-05,
      "loss": 0.5946,
      "step": 3727
    },
    {
      "epoch": 0.7663685887552678,
      "grad_norm": 0.20896635949611664,
      "learning_rate": 7.852559644539216e-05,
      "loss": 0.6719,
      "step": 3728
    },
    {
      "epoch": 0.7665741597286463,
      "grad_norm": 0.19129472970962524,
      "learning_rate": 7.851893305609091e-05,
      "loss": 0.6838,
      "step": 3729
    },
    {
      "epoch": 0.7667797307020249,
      "grad_norm": 0.18608838319778442,
      "learning_rate": 7.85122680154685e-05,
      "loss": 0.6702,
      "step": 3730
    },
    {
      "epoch": 0.7669853016754035,
      "grad_norm": 0.13603243231773376,
      "learning_rate": 7.85056013238533e-05,
      "loss": 0.5653,
      "step": 3731
    },
    {
      "epoch": 0.7671908726487819,
      "grad_norm": 0.1969052106142044,
      "learning_rate": 7.849893298157369e-05,
      "loss": 0.6705,
      "step": 3732
    },
    {
      "epoch": 0.7673964436221605,
      "grad_norm": 0.19232457876205444,
      "learning_rate": 7.849226298895824e-05,
      "loss": 0.6542,
      "step": 3733
    },
    {
      "epoch": 0.7676020145955391,
      "grad_norm": 0.18796077370643616,
      "learning_rate": 7.848559134633555e-05,
      "loss": 0.6682,
      "step": 3734
    },
    {
      "epoch": 0.7678075855689177,
      "grad_norm": 0.19674451649188995,
      "learning_rate": 7.847891805403426e-05,
      "loss": 0.6574,
      "step": 3735
    },
    {
      "epoch": 0.7680131565422962,
      "grad_norm": 0.19735072553157806,
      "learning_rate": 7.847224311238316e-05,
      "loss": 0.6637,
      "step": 3736
    },
    {
      "epoch": 0.7682187275156748,
      "grad_norm": 0.22023150324821472,
      "learning_rate": 7.846556652171112e-05,
      "loss": 0.6634,
      "step": 3737
    },
    {
      "epoch": 0.7684242984890534,
      "grad_norm": 0.18101370334625244,
      "learning_rate": 7.845888828234701e-05,
      "loss": 0.6424,
      "step": 3738
    },
    {
      "epoch": 0.768629869462432,
      "grad_norm": 0.18563824892044067,
      "learning_rate": 7.845220839461987e-05,
      "loss": 0.6618,
      "step": 3739
    },
    {
      "epoch": 0.7688354404358104,
      "grad_norm": 0.18954195082187653,
      "learning_rate": 7.844552685885877e-05,
      "loss": 0.6885,
      "step": 3740
    },
    {
      "epoch": 0.769041011409189,
      "grad_norm": 0.14499548077583313,
      "learning_rate": 7.843884367539289e-05,
      "loss": 0.6127,
      "step": 3741
    },
    {
      "epoch": 0.7692465823825676,
      "grad_norm": 0.20436535775661469,
      "learning_rate": 7.843215884455147e-05,
      "loss": 0.6805,
      "step": 3742
    },
    {
      "epoch": 0.7694521533559462,
      "grad_norm": 0.20969851315021515,
      "learning_rate": 7.842547236666386e-05,
      "loss": 0.6548,
      "step": 3743
    },
    {
      "epoch": 0.7696577243293247,
      "grad_norm": 0.19497977197170258,
      "learning_rate": 7.841878424205944e-05,
      "loss": 0.7104,
      "step": 3744
    },
    {
      "epoch": 0.7698632953027033,
      "grad_norm": 0.1905307173728943,
      "learning_rate": 7.841209447106772e-05,
      "loss": 0.6676,
      "step": 3745
    },
    {
      "epoch": 0.7700688662760818,
      "grad_norm": 0.1859470009803772,
      "learning_rate": 7.840540305401828e-05,
      "loss": 0.6712,
      "step": 3746
    },
    {
      "epoch": 0.7702744372494604,
      "grad_norm": 0.19429220259189606,
      "learning_rate": 7.839870999124077e-05,
      "loss": 0.6763,
      "step": 3747
    },
    {
      "epoch": 0.7704800082228389,
      "grad_norm": 0.188473641872406,
      "learning_rate": 7.839201528306492e-05,
      "loss": 0.6856,
      "step": 3748
    },
    {
      "epoch": 0.7706855791962175,
      "grad_norm": 0.19540703296661377,
      "learning_rate": 7.838531892982057e-05,
      "loss": 0.6616,
      "step": 3749
    },
    {
      "epoch": 0.7708911501695961,
      "grad_norm": 0.1938808113336563,
      "learning_rate": 7.837862093183758e-05,
      "loss": 0.6553,
      "step": 3750
    },
    {
      "epoch": 0.7710967211429746,
      "grad_norm": 0.1836869865655899,
      "learning_rate": 7.837192128944594e-05,
      "loss": 0.6768,
      "step": 3751
    },
    {
      "epoch": 0.7713022921163531,
      "grad_norm": 0.1519763171672821,
      "learning_rate": 7.836522000297572e-05,
      "loss": 0.6059,
      "step": 3752
    },
    {
      "epoch": 0.7715078630897317,
      "grad_norm": 0.19223132729530334,
      "learning_rate": 7.835851707275707e-05,
      "loss": 0.7093,
      "step": 3753
    },
    {
      "epoch": 0.7717134340631103,
      "grad_norm": 0.19785994291305542,
      "learning_rate": 7.83518124991202e-05,
      "loss": 0.6557,
      "step": 3754
    },
    {
      "epoch": 0.7719190050364888,
      "grad_norm": 0.18960314989089966,
      "learning_rate": 7.834510628239541e-05,
      "loss": 0.6495,
      "step": 3755
    },
    {
      "epoch": 0.7721245760098674,
      "grad_norm": 0.1869727522134781,
      "learning_rate": 7.833839842291309e-05,
      "loss": 0.6561,
      "step": 3756
    },
    {
      "epoch": 0.772330146983246,
      "grad_norm": 0.19522154331207275,
      "learning_rate": 7.83316889210037e-05,
      "loss": 0.6781,
      "step": 3757
    },
    {
      "epoch": 0.7725357179566246,
      "grad_norm": 0.19209223985671997,
      "learning_rate": 7.832497777699779e-05,
      "loss": 0.6598,
      "step": 3758
    },
    {
      "epoch": 0.772741288930003,
      "grad_norm": 0.19709967076778412,
      "learning_rate": 7.831826499122599e-05,
      "loss": 0.6977,
      "step": 3759
    },
    {
      "epoch": 0.7729468599033816,
      "grad_norm": 0.19524455070495605,
      "learning_rate": 7.8311550564019e-05,
      "loss": 0.6701,
      "step": 3760
    },
    {
      "epoch": 0.7731524308767602,
      "grad_norm": 0.19056567549705505,
      "learning_rate": 7.830483449570762e-05,
      "loss": 0.652,
      "step": 3761
    },
    {
      "epoch": 0.7733580018501388,
      "grad_norm": 0.2009115368127823,
      "learning_rate": 7.829811678662269e-05,
      "loss": 0.6796,
      "step": 3762
    },
    {
      "epoch": 0.7735635728235173,
      "grad_norm": 0.1854369342327118,
      "learning_rate": 7.829139743709518e-05,
      "loss": 0.6959,
      "step": 3763
    },
    {
      "epoch": 0.7737691437968959,
      "grad_norm": 0.19334383308887482,
      "learning_rate": 7.828467644745614e-05,
      "loss": 0.6803,
      "step": 3764
    },
    {
      "epoch": 0.7739747147702745,
      "grad_norm": 0.1896241158246994,
      "learning_rate": 7.827795381803666e-05,
      "loss": 0.6589,
      "step": 3765
    },
    {
      "epoch": 0.774180285743653,
      "grad_norm": 0.19462954998016357,
      "learning_rate": 7.827122954916793e-05,
      "loss": 0.6884,
      "step": 3766
    },
    {
      "epoch": 0.7743858567170315,
      "grad_norm": 0.15615877509117126,
      "learning_rate": 7.826450364118124e-05,
      "loss": 0.5868,
      "step": 3767
    },
    {
      "epoch": 0.7745914276904101,
      "grad_norm": 0.21053725481033325,
      "learning_rate": 7.825777609440793e-05,
      "loss": 0.6619,
      "step": 3768
    },
    {
      "epoch": 0.7747969986637887,
      "grad_norm": 0.1837691068649292,
      "learning_rate": 7.825104690917943e-05,
      "loss": 0.68,
      "step": 3769
    },
    {
      "epoch": 0.7750025696371672,
      "grad_norm": 0.18419477343559265,
      "learning_rate": 7.824431608582728e-05,
      "loss": 0.6629,
      "step": 3770
    },
    {
      "epoch": 0.7752081406105458,
      "grad_norm": 0.19641302525997162,
      "learning_rate": 7.823758362468305e-05,
      "loss": 0.6919,
      "step": 3771
    },
    {
      "epoch": 0.7754137115839244,
      "grad_norm": 0.14012254774570465,
      "learning_rate": 7.823084952607842e-05,
      "loss": 0.5845,
      "step": 3772
    },
    {
      "epoch": 0.775619282557303,
      "grad_norm": 0.13224144279956818,
      "learning_rate": 7.822411379034516e-05,
      "loss": 0.5851,
      "step": 3773
    },
    {
      "epoch": 0.7758248535306814,
      "grad_norm": 0.20598402619361877,
      "learning_rate": 7.82173764178151e-05,
      "loss": 0.6987,
      "step": 3774
    },
    {
      "epoch": 0.77603042450406,
      "grad_norm": 0.19516415894031525,
      "learning_rate": 7.821063740882017e-05,
      "loss": 0.681,
      "step": 3775
    },
    {
      "epoch": 0.7762359954774386,
      "grad_norm": 0.192254900932312,
      "learning_rate": 7.820389676369237e-05,
      "loss": 0.6647,
      "step": 3776
    },
    {
      "epoch": 0.7764415664508172,
      "grad_norm": 0.21489369869232178,
      "learning_rate": 7.819715448276374e-05,
      "loss": 0.6804,
      "step": 3777
    },
    {
      "epoch": 0.7766471374241957,
      "grad_norm": 0.18683873116970062,
      "learning_rate": 7.81904105663665e-05,
      "loss": 0.6766,
      "step": 3778
    },
    {
      "epoch": 0.7768527083975743,
      "grad_norm": 0.19451092183589935,
      "learning_rate": 7.818366501483285e-05,
      "loss": 0.6689,
      "step": 3779
    },
    {
      "epoch": 0.7770582793709528,
      "grad_norm": 0.16607536375522614,
      "learning_rate": 7.817691782849512e-05,
      "loss": 0.6039,
      "step": 3780
    },
    {
      "epoch": 0.7772638503443314,
      "grad_norm": 0.20235170423984528,
      "learning_rate": 7.817016900768573e-05,
      "loss": 0.6846,
      "step": 3781
    },
    {
      "epoch": 0.7774694213177099,
      "grad_norm": 0.1997910737991333,
      "learning_rate": 7.816341855273715e-05,
      "loss": 0.665,
      "step": 3782
    },
    {
      "epoch": 0.7776749922910885,
      "grad_norm": 0.19691520929336548,
      "learning_rate": 7.815666646398193e-05,
      "loss": 0.6791,
      "step": 3783
    },
    {
      "epoch": 0.7778805632644671,
      "grad_norm": 0.14885997772216797,
      "learning_rate": 7.814991274175273e-05,
      "loss": 0.6101,
      "step": 3784
    },
    {
      "epoch": 0.7780861342378456,
      "grad_norm": 0.19798895716667175,
      "learning_rate": 7.814315738638227e-05,
      "loss": 0.6652,
      "step": 3785
    },
    {
      "epoch": 0.7782917052112241,
      "grad_norm": 0.13677549362182617,
      "learning_rate": 7.813640039820337e-05,
      "loss": 0.583,
      "step": 3786
    },
    {
      "epoch": 0.7784972761846027,
      "grad_norm": 0.19505973160266876,
      "learning_rate": 7.81296417775489e-05,
      "loss": 0.7306,
      "step": 3787
    },
    {
      "epoch": 0.7787028471579813,
      "grad_norm": 0.18989427387714386,
      "learning_rate": 7.812288152475182e-05,
      "loss": 0.6883,
      "step": 3788
    },
    {
      "epoch": 0.7789084181313598,
      "grad_norm": 0.18871872127056122,
      "learning_rate": 7.811611964014518e-05,
      "loss": 0.6781,
      "step": 3789
    },
    {
      "epoch": 0.7791139891047384,
      "grad_norm": 0.19525344669818878,
      "learning_rate": 7.81093561240621e-05,
      "loss": 0.657,
      "step": 3790
    },
    {
      "epoch": 0.779319560078117,
      "grad_norm": 0.1633206307888031,
      "learning_rate": 7.810259097683582e-05,
      "loss": 0.5749,
      "step": 3791
    },
    {
      "epoch": 0.7795251310514956,
      "grad_norm": 0.19155313074588776,
      "learning_rate": 7.80958241987996e-05,
      "loss": 0.6782,
      "step": 3792
    },
    {
      "epoch": 0.779730702024874,
      "grad_norm": 0.18953226506710052,
      "learning_rate": 7.80890557902868e-05,
      "loss": 0.668,
      "step": 3793
    },
    {
      "epoch": 0.7799362729982526,
      "grad_norm": 0.19336241483688354,
      "learning_rate": 7.808228575163088e-05,
      "loss": 0.6523,
      "step": 3794
    },
    {
      "epoch": 0.7801418439716312,
      "grad_norm": 0.18969465792179108,
      "learning_rate": 7.807551408316537e-05,
      "loss": 0.6893,
      "step": 3795
    },
    {
      "epoch": 0.7803474149450098,
      "grad_norm": 0.19042238593101501,
      "learning_rate": 7.806874078522388e-05,
      "loss": 0.64,
      "step": 3796
    },
    {
      "epoch": 0.7805529859183883,
      "grad_norm": 0.1883266568183899,
      "learning_rate": 7.80619658581401e-05,
      "loss": 0.6471,
      "step": 3797
    },
    {
      "epoch": 0.7807585568917669,
      "grad_norm": 0.1871403008699417,
      "learning_rate": 7.805518930224777e-05,
      "loss": 0.6642,
      "step": 3798
    },
    {
      "epoch": 0.7809641278651455,
      "grad_norm": 0.1827799677848816,
      "learning_rate": 7.804841111788078e-05,
      "loss": 0.677,
      "step": 3799
    },
    {
      "epoch": 0.781169698838524,
      "grad_norm": 0.18511800467967987,
      "learning_rate": 7.804163130537304e-05,
      "loss": 0.6586,
      "step": 3800
    },
    {
      "epoch": 0.7813752698119025,
      "grad_norm": 0.1907230168581009,
      "learning_rate": 7.803484986505855e-05,
      "loss": 0.6573,
      "step": 3801
    },
    {
      "epoch": 0.7815808407852811,
      "grad_norm": 0.18352137506008148,
      "learning_rate": 7.802806679727144e-05,
      "loss": 0.6952,
      "step": 3802
    },
    {
      "epoch": 0.7817864117586597,
      "grad_norm": 0.18589456379413605,
      "learning_rate": 7.802128210234583e-05,
      "loss": 0.6877,
      "step": 3803
    },
    {
      "epoch": 0.7819919827320382,
      "grad_norm": 0.19165122509002686,
      "learning_rate": 7.8014495780616e-05,
      "loss": 0.6721,
      "step": 3804
    },
    {
      "epoch": 0.7821975537054168,
      "grad_norm": 0.18092942237854004,
      "learning_rate": 7.800770783241627e-05,
      "loss": 0.6472,
      "step": 3805
    },
    {
      "epoch": 0.7824031246787954,
      "grad_norm": 0.1938347965478897,
      "learning_rate": 7.800091825808104e-05,
      "loss": 0.6875,
      "step": 3806
    },
    {
      "epoch": 0.782608695652174,
      "grad_norm": 0.18910136818885803,
      "learning_rate": 7.799412705794484e-05,
      "loss": 0.6634,
      "step": 3807
    },
    {
      "epoch": 0.7828142666255524,
      "grad_norm": 0.18492446839809418,
      "learning_rate": 7.798733423234219e-05,
      "loss": 0.6772,
      "step": 3808
    },
    {
      "epoch": 0.783019837598931,
      "grad_norm": 0.18603304028511047,
      "learning_rate": 7.798053978160777e-05,
      "loss": 0.6888,
      "step": 3809
    },
    {
      "epoch": 0.7832254085723096,
      "grad_norm": 0.1817874163389206,
      "learning_rate": 7.797374370607632e-05,
      "loss": 0.6675,
      "step": 3810
    },
    {
      "epoch": 0.7834309795456882,
      "grad_norm": 0.1888546198606491,
      "learning_rate": 7.796694600608261e-05,
      "loss": 0.6472,
      "step": 3811
    },
    {
      "epoch": 0.7836365505190667,
      "grad_norm": 0.18347470462322235,
      "learning_rate": 7.796014668196159e-05,
      "loss": 0.6368,
      "step": 3812
    },
    {
      "epoch": 0.7838421214924453,
      "grad_norm": 0.18692941963672638,
      "learning_rate": 7.795334573404817e-05,
      "loss": 0.6637,
      "step": 3813
    },
    {
      "epoch": 0.7840476924658238,
      "grad_norm": 0.18573735654354095,
      "learning_rate": 7.794654316267745e-05,
      "loss": 0.6716,
      "step": 3814
    },
    {
      "epoch": 0.7842532634392024,
      "grad_norm": 0.1885242462158203,
      "learning_rate": 7.793973896818452e-05,
      "loss": 0.6957,
      "step": 3815
    },
    {
      "epoch": 0.7844588344125809,
      "grad_norm": 0.19421452283859253,
      "learning_rate": 7.793293315090462e-05,
      "loss": 0.6977,
      "step": 3816
    },
    {
      "epoch": 0.7846644053859595,
      "grad_norm": 0.18501219153404236,
      "learning_rate": 7.792612571117304e-05,
      "loss": 0.676,
      "step": 3817
    },
    {
      "epoch": 0.7848699763593381,
      "grad_norm": 0.18256261944770813,
      "learning_rate": 7.791931664932514e-05,
      "loss": 0.6637,
      "step": 3818
    },
    {
      "epoch": 0.7850755473327167,
      "grad_norm": 0.16926661133766174,
      "learning_rate": 7.791250596569636e-05,
      "loss": 0.5883,
      "step": 3819
    },
    {
      "epoch": 0.7852811183060951,
      "grad_norm": 0.19965988397598267,
      "learning_rate": 7.790569366062226e-05,
      "loss": 0.6873,
      "step": 3820
    },
    {
      "epoch": 0.7854866892794737,
      "grad_norm": 0.19432468712329865,
      "learning_rate": 7.789887973443842e-05,
      "loss": 0.6727,
      "step": 3821
    },
    {
      "epoch": 0.7856922602528523,
      "grad_norm": 1.5224770307540894,
      "learning_rate": 7.789206418748055e-05,
      "loss": 0.6645,
      "step": 3822
    },
    {
      "epoch": 0.7858978312262308,
      "grad_norm": 0.25981712341308594,
      "learning_rate": 7.788524702008442e-05,
      "loss": 0.6693,
      "step": 3823
    },
    {
      "epoch": 0.7861034021996094,
      "grad_norm": 0.17504632472991943,
      "learning_rate": 7.787842823258587e-05,
      "loss": 0.6081,
      "step": 3824
    },
    {
      "epoch": 0.786308973172988,
      "grad_norm": 0.20936280488967896,
      "learning_rate": 7.787160782532084e-05,
      "loss": 0.6833,
      "step": 3825
    },
    {
      "epoch": 0.7865145441463666,
      "grad_norm": 0.2347778081893921,
      "learning_rate": 7.786478579862532e-05,
      "loss": 0.6824,
      "step": 3826
    },
    {
      "epoch": 0.786720115119745,
      "grad_norm": 0.19294393062591553,
      "learning_rate": 7.785796215283543e-05,
      "loss": 0.6811,
      "step": 3827
    },
    {
      "epoch": 0.7869256860931236,
      "grad_norm": 0.253738671541214,
      "learning_rate": 7.785113688828731e-05,
      "loss": 0.6015,
      "step": 3828
    },
    {
      "epoch": 0.7871312570665022,
      "grad_norm": 0.22543035447597504,
      "learning_rate": 7.784431000531722e-05,
      "loss": 0.6593,
      "step": 3829
    },
    {
      "epoch": 0.7873368280398808,
      "grad_norm": 0.19480814039707184,
      "learning_rate": 7.78374815042615e-05,
      "loss": 0.6131,
      "step": 3830
    },
    {
      "epoch": 0.7875423990132593,
      "grad_norm": 0.2131412923336029,
      "learning_rate": 7.783065138545655e-05,
      "loss": 0.6982,
      "step": 3831
    },
    {
      "epoch": 0.7877479699866379,
      "grad_norm": 0.20891313254833221,
      "learning_rate": 7.782381964923885e-05,
      "loss": 0.6981,
      "step": 3832
    },
    {
      "epoch": 0.7879535409600165,
      "grad_norm": 0.15176214277744293,
      "learning_rate": 7.781698629594498e-05,
      "loss": 0.5964,
      "step": 3833
    },
    {
      "epoch": 0.788159111933395,
      "grad_norm": 0.19954368472099304,
      "learning_rate": 7.781015132591156e-05,
      "loss": 0.681,
      "step": 3834
    },
    {
      "epoch": 0.7883646829067735,
      "grad_norm": 0.19388937950134277,
      "learning_rate": 7.780331473947537e-05,
      "loss": 0.6776,
      "step": 3835
    },
    {
      "epoch": 0.7885702538801521,
      "grad_norm": 0.19515137374401093,
      "learning_rate": 7.779647653697317e-05,
      "loss": 0.7054,
      "step": 3836
    },
    {
      "epoch": 0.7887758248535307,
      "grad_norm": 0.15485966205596924,
      "learning_rate": 7.778963671874186e-05,
      "loss": 0.5838,
      "step": 3837
    },
    {
      "epoch": 0.7889813958269093,
      "grad_norm": 0.2033955603837967,
      "learning_rate": 7.778279528511841e-05,
      "loss": 0.6831,
      "step": 3838
    },
    {
      "epoch": 0.7891869668002878,
      "grad_norm": 0.14127175509929657,
      "learning_rate": 7.777595223643985e-05,
      "loss": 0.5782,
      "step": 3839
    },
    {
      "epoch": 0.7893925377736664,
      "grad_norm": 0.19278831779956818,
      "learning_rate": 7.776910757304333e-05,
      "loss": 0.6604,
      "step": 3840
    },
    {
      "epoch": 0.789598108747045,
      "grad_norm": 0.19700968265533447,
      "learning_rate": 7.776226129526606e-05,
      "loss": 0.6487,
      "step": 3841
    },
    {
      "epoch": 0.7898036797204234,
      "grad_norm": 0.20007772743701935,
      "learning_rate": 7.775541340344528e-05,
      "loss": 0.7053,
      "step": 3842
    },
    {
      "epoch": 0.790009250693802,
      "grad_norm": 0.1945502907037735,
      "learning_rate": 7.774856389791838e-05,
      "loss": 0.6633,
      "step": 3843
    },
    {
      "epoch": 0.7902148216671806,
      "grad_norm": 0.18347761034965515,
      "learning_rate": 7.774171277902282e-05,
      "loss": 0.6509,
      "step": 3844
    },
    {
      "epoch": 0.7904203926405592,
      "grad_norm": 0.1927865594625473,
      "learning_rate": 7.773486004709608e-05,
      "loss": 0.6873,
      "step": 3845
    },
    {
      "epoch": 0.7906259636139377,
      "grad_norm": 0.1933821141719818,
      "learning_rate": 7.772800570247582e-05,
      "loss": 0.6784,
      "step": 3846
    },
    {
      "epoch": 0.7908315345873163,
      "grad_norm": 0.1437695473432541,
      "learning_rate": 7.772114974549966e-05,
      "loss": 0.5979,
      "step": 3847
    },
    {
      "epoch": 0.7910371055606948,
      "grad_norm": 0.20420506596565247,
      "learning_rate": 7.77142921765054e-05,
      "loss": 0.7083,
      "step": 3848
    },
    {
      "epoch": 0.7912426765340734,
      "grad_norm": 0.13508614897727966,
      "learning_rate": 7.770743299583089e-05,
      "loss": 0.5824,
      "step": 3849
    },
    {
      "epoch": 0.7914482475074519,
      "grad_norm": 0.1953742653131485,
      "learning_rate": 7.770057220381401e-05,
      "loss": 0.6655,
      "step": 3850
    },
    {
      "epoch": 0.7916538184808305,
      "grad_norm": 0.192901611328125,
      "learning_rate": 7.769370980079277e-05,
      "loss": 0.6922,
      "step": 3851
    },
    {
      "epoch": 0.7918593894542091,
      "grad_norm": 0.19612765312194824,
      "learning_rate": 7.768684578710528e-05,
      "loss": 0.6687,
      "step": 3852
    },
    {
      "epoch": 0.7920649604275877,
      "grad_norm": 0.19205497205257416,
      "learning_rate": 7.767998016308968e-05,
      "loss": 0.6837,
      "step": 3853
    },
    {
      "epoch": 0.7922705314009661,
      "grad_norm": 0.15582695603370667,
      "learning_rate": 7.767311292908419e-05,
      "loss": 0.5945,
      "step": 3854
    },
    {
      "epoch": 0.7924761023743447,
      "grad_norm": 0.18942193686962128,
      "learning_rate": 7.766624408542713e-05,
      "loss": 0.652,
      "step": 3855
    },
    {
      "epoch": 0.7926816733477233,
      "grad_norm": 0.19103151559829712,
      "learning_rate": 7.765937363245692e-05,
      "loss": 0.6518,
      "step": 3856
    },
    {
      "epoch": 0.7928872443211019,
      "grad_norm": 0.18634134531021118,
      "learning_rate": 7.765250157051202e-05,
      "loss": 0.6556,
      "step": 3857
    },
    {
      "epoch": 0.7930928152944804,
      "grad_norm": 0.1883394718170166,
      "learning_rate": 7.764562789993099e-05,
      "loss": 0.6736,
      "step": 3858
    },
    {
      "epoch": 0.793298386267859,
      "grad_norm": 0.18593887984752655,
      "learning_rate": 7.763875262105245e-05,
      "loss": 0.652,
      "step": 3859
    },
    {
      "epoch": 0.7935039572412376,
      "grad_norm": 0.2020663321018219,
      "learning_rate": 7.763187573421511e-05,
      "loss": 0.6447,
      "step": 3860
    },
    {
      "epoch": 0.793709528214616,
      "grad_norm": 0.18651576340198517,
      "learning_rate": 7.76249972397578e-05,
      "loss": 0.6746,
      "step": 3861
    },
    {
      "epoch": 0.7939150991879946,
      "grad_norm": 0.19070084393024445,
      "learning_rate": 7.761811713801935e-05,
      "loss": 0.6866,
      "step": 3862
    },
    {
      "epoch": 0.7941206701613732,
      "grad_norm": 0.18511120975017548,
      "learning_rate": 7.761123542933872e-05,
      "loss": 0.6491,
      "step": 3863
    },
    {
      "epoch": 0.7943262411347518,
      "grad_norm": 0.18863095343112946,
      "learning_rate": 7.760435211405495e-05,
      "loss": 0.672,
      "step": 3864
    },
    {
      "epoch": 0.7945318121081303,
      "grad_norm": 0.19631804525852203,
      "learning_rate": 7.759746719250714e-05,
      "loss": 0.6509,
      "step": 3865
    },
    {
      "epoch": 0.7947373830815089,
      "grad_norm": 0.17893162369728088,
      "learning_rate": 7.75905806650345e-05,
      "loss": 0.6707,
      "step": 3866
    },
    {
      "epoch": 0.7949429540548875,
      "grad_norm": 0.18233318626880646,
      "learning_rate": 7.758369253197626e-05,
      "loss": 0.657,
      "step": 3867
    },
    {
      "epoch": 0.795148525028266,
      "grad_norm": 0.19054913520812988,
      "learning_rate": 7.757680279367178e-05,
      "loss": 0.6796,
      "step": 3868
    },
    {
      "epoch": 0.7953540960016445,
      "grad_norm": 0.20700985193252563,
      "learning_rate": 7.75699114504605e-05,
      "loss": 0.6672,
      "step": 3869
    },
    {
      "epoch": 0.7955596669750231,
      "grad_norm": 0.1838599294424057,
      "learning_rate": 7.756301850268193e-05,
      "loss": 0.6721,
      "step": 3870
    },
    {
      "epoch": 0.7957652379484017,
      "grad_norm": 0.1944621503353119,
      "learning_rate": 7.755612395067562e-05,
      "loss": 0.6751,
      "step": 3871
    },
    {
      "epoch": 0.7959708089217803,
      "grad_norm": 0.18728716671466827,
      "learning_rate": 7.754922779478125e-05,
      "loss": 0.6765,
      "step": 3872
    },
    {
      "epoch": 0.7961763798951588,
      "grad_norm": 0.18458257615566254,
      "learning_rate": 7.754233003533856e-05,
      "loss": 0.6609,
      "step": 3873
    },
    {
      "epoch": 0.7963819508685374,
      "grad_norm": 0.18987616896629333,
      "learning_rate": 7.753543067268737e-05,
      "loss": 0.647,
      "step": 3874
    },
    {
      "epoch": 0.796587521841916,
      "grad_norm": 0.19032716751098633,
      "learning_rate": 7.752852970716761e-05,
      "loss": 0.6514,
      "step": 3875
    },
    {
      "epoch": 0.7967930928152945,
      "grad_norm": 0.18918365240097046,
      "learning_rate": 7.752162713911918e-05,
      "loss": 0.6705,
      "step": 3876
    },
    {
      "epoch": 0.796998663788673,
      "grad_norm": 0.18836969137191772,
      "learning_rate": 7.751472296888222e-05,
      "loss": 0.6651,
      "step": 3877
    },
    {
      "epoch": 0.7972042347620516,
      "grad_norm": 0.18875330686569214,
      "learning_rate": 7.750781719679683e-05,
      "loss": 0.6864,
      "step": 3878
    },
    {
      "epoch": 0.7974098057354302,
      "grad_norm": 0.18728755414485931,
      "learning_rate": 7.750090982320321e-05,
      "loss": 0.6629,
      "step": 3879
    },
    {
      "epoch": 0.7976153767088087,
      "grad_norm": 0.1937887966632843,
      "learning_rate": 7.749400084844169e-05,
      "loss": 0.6673,
      "step": 3880
    },
    {
      "epoch": 0.7978209476821873,
      "grad_norm": 0.16451017558574677,
      "learning_rate": 7.748709027285261e-05,
      "loss": 0.5989,
      "step": 3881
    },
    {
      "epoch": 0.7980265186555658,
      "grad_norm": 0.1364785134792328,
      "learning_rate": 7.748017809677646e-05,
      "loss": 0.5949,
      "step": 3882
    },
    {
      "epoch": 0.7982320896289444,
      "grad_norm": 0.14087210595607758,
      "learning_rate": 7.747326432055372e-05,
      "loss": 0.5753,
      "step": 3883
    },
    {
      "epoch": 0.7984376606023229,
      "grad_norm": 0.20993009209632874,
      "learning_rate": 7.746634894452504e-05,
      "loss": 0.7021,
      "step": 3884
    },
    {
      "epoch": 0.7986432315757015,
      "grad_norm": 0.1940746009349823,
      "learning_rate": 7.74594319690311e-05,
      "loss": 0.6743,
      "step": 3885
    },
    {
      "epoch": 0.7988488025490801,
      "grad_norm": 0.1924261897802353,
      "learning_rate": 7.745251339441265e-05,
      "loss": 0.6795,
      "step": 3886
    },
    {
      "epoch": 0.7990543735224587,
      "grad_norm": 0.1905447542667389,
      "learning_rate": 7.744559322101056e-05,
      "loss": 0.6862,
      "step": 3887
    },
    {
      "epoch": 0.7992599444958371,
      "grad_norm": 0.18997174501419067,
      "learning_rate": 7.743867144916573e-05,
      "loss": 0.5848,
      "step": 3888
    },
    {
      "epoch": 0.7994655154692157,
      "grad_norm": 0.1488848179578781,
      "learning_rate": 7.743174807921919e-05,
      "loss": 0.5842,
      "step": 3889
    },
    {
      "epoch": 0.7996710864425943,
      "grad_norm": 0.14569362998008728,
      "learning_rate": 7.7424823111512e-05,
      "loss": 0.5866,
      "step": 3890
    },
    {
      "epoch": 0.7998766574159729,
      "grad_norm": 0.22627940773963928,
      "learning_rate": 7.741789654638532e-05,
      "loss": 0.6954,
      "step": 3891
    },
    {
      "epoch": 0.8000822283893514,
      "grad_norm": 0.18143914639949799,
      "learning_rate": 7.74109683841804e-05,
      "loss": 0.5874,
      "step": 3892
    },
    {
      "epoch": 0.80028779936273,
      "grad_norm": 0.1479119211435318,
      "learning_rate": 7.740403862523857e-05,
      "loss": 0.5729,
      "step": 3893
    },
    {
      "epoch": 0.8004933703361086,
      "grad_norm": 0.20130044221878052,
      "learning_rate": 7.73971072699012e-05,
      "loss": 0.6855,
      "step": 3894
    },
    {
      "epoch": 0.8006989413094872,
      "grad_norm": 0.19785720109939575,
      "learning_rate": 7.739017431850978e-05,
      "loss": 0.687,
      "step": 3895
    },
    {
      "epoch": 0.8009045122828656,
      "grad_norm": 0.20219095051288605,
      "learning_rate": 7.738323977140587e-05,
      "loss": 0.585,
      "step": 3896
    },
    {
      "epoch": 0.8011100832562442,
      "grad_norm": 0.1963326632976532,
      "learning_rate": 7.737630362893109e-05,
      "loss": 0.6628,
      "step": 3897
    },
    {
      "epoch": 0.8013156542296228,
      "grad_norm": 0.18930426239967346,
      "learning_rate": 7.736936589142717e-05,
      "loss": 0.6674,
      "step": 3898
    },
    {
      "epoch": 0.8015212252030013,
      "grad_norm": 0.18726347386837006,
      "learning_rate": 7.736242655923587e-05,
      "loss": 0.6837,
      "step": 3899
    },
    {
      "epoch": 0.8017267961763799,
      "grad_norm": 0.19241462647914886,
      "learning_rate": 7.735548563269907e-05,
      "loss": 0.6677,
      "step": 3900
    },
    {
      "epoch": 0.8019323671497585,
      "grad_norm": 0.1922820508480072,
      "learning_rate": 7.734854311215874e-05,
      "loss": 0.6865,
      "step": 3901
    },
    {
      "epoch": 0.802137938123137,
      "grad_norm": 0.19233377277851105,
      "learning_rate": 7.734159899795688e-05,
      "loss": 0.6813,
      "step": 3902
    },
    {
      "epoch": 0.8023435090965155,
      "grad_norm": 0.18713760375976562,
      "learning_rate": 7.73346532904356e-05,
      "loss": 0.6537,
      "step": 3903
    },
    {
      "epoch": 0.8025490800698941,
      "grad_norm": 0.19880633056163788,
      "learning_rate": 7.732770598993708e-05,
      "loss": 0.6728,
      "step": 3904
    },
    {
      "epoch": 0.8027546510432727,
      "grad_norm": 0.19050458073616028,
      "learning_rate": 7.73207570968036e-05,
      "loss": 0.6749,
      "step": 3905
    },
    {
      "epoch": 0.8029602220166513,
      "grad_norm": 0.1801813244819641,
      "learning_rate": 7.731380661137747e-05,
      "loss": 0.5939,
      "step": 3906
    },
    {
      "epoch": 0.8031657929900298,
      "grad_norm": 0.19383971393108368,
      "learning_rate": 7.730685453400113e-05,
      "loss": 0.6826,
      "step": 3907
    },
    {
      "epoch": 0.8033713639634084,
      "grad_norm": 0.20955929160118103,
      "learning_rate": 7.729990086501707e-05,
      "loss": 0.6954,
      "step": 3908
    },
    {
      "epoch": 0.803576934936787,
      "grad_norm": 0.19068995118141174,
      "learning_rate": 7.729294560476786e-05,
      "loss": 0.6686,
      "step": 3909
    },
    {
      "epoch": 0.8037825059101655,
      "grad_norm": 0.19245314598083496,
      "learning_rate": 7.728598875359615e-05,
      "loss": 0.6619,
      "step": 3910
    },
    {
      "epoch": 0.803988076883544,
      "grad_norm": 0.1979014128446579,
      "learning_rate": 7.727903031184469e-05,
      "loss": 0.6614,
      "step": 3911
    },
    {
      "epoch": 0.8041936478569226,
      "grad_norm": 0.1900876760482788,
      "learning_rate": 7.727207027985626e-05,
      "loss": 0.6486,
      "step": 3912
    },
    {
      "epoch": 0.8043992188303012,
      "grad_norm": 0.17994777858257294,
      "learning_rate": 7.726510865797379e-05,
      "loss": 0.6729,
      "step": 3913
    },
    {
      "epoch": 0.8046047898036797,
      "grad_norm": 0.18554867804050446,
      "learning_rate": 7.725814544654021e-05,
      "loss": 0.6541,
      "step": 3914
    },
    {
      "epoch": 0.8048103607770583,
      "grad_norm": 0.24200813472270966,
      "learning_rate": 7.725118064589859e-05,
      "loss": 0.6514,
      "step": 3915
    },
    {
      "epoch": 0.8050159317504368,
      "grad_norm": 0.18101008236408234,
      "learning_rate": 7.724421425639201e-05,
      "loss": 0.6382,
      "step": 3916
    },
    {
      "epoch": 0.8052215027238154,
      "grad_norm": 0.18432863056659698,
      "learning_rate": 7.723724627836374e-05,
      "loss": 0.64,
      "step": 3917
    },
    {
      "epoch": 0.8054270736971939,
      "grad_norm": 0.19102488458156586,
      "learning_rate": 7.7230276712157e-05,
      "loss": 0.7106,
      "step": 3918
    },
    {
      "epoch": 0.8056326446705725,
      "grad_norm": 0.16466036438941956,
      "learning_rate": 7.722330555811519e-05,
      "loss": 0.5831,
      "step": 3919
    },
    {
      "epoch": 0.8058382156439511,
      "grad_norm": 0.19325773417949677,
      "learning_rate": 7.721633281658171e-05,
      "loss": 0.6855,
      "step": 3920
    },
    {
      "epoch": 0.8060437866173297,
      "grad_norm": 0.1921764314174652,
      "learning_rate": 7.720935848790009e-05,
      "loss": 0.6858,
      "step": 3921
    },
    {
      "epoch": 0.8062493575907081,
      "grad_norm": 0.1909746527671814,
      "learning_rate": 7.720238257241394e-05,
      "loss": 0.6825,
      "step": 3922
    },
    {
      "epoch": 0.8064549285640867,
      "grad_norm": 0.18359649181365967,
      "learning_rate": 7.71954050704669e-05,
      "loss": 0.6807,
      "step": 3923
    },
    {
      "epoch": 0.8066604995374653,
      "grad_norm": 0.1895141303539276,
      "learning_rate": 7.718842598240273e-05,
      "loss": 0.7047,
      "step": 3924
    },
    {
      "epoch": 0.8068660705108439,
      "grad_norm": 0.18683840334415436,
      "learning_rate": 7.718144530856527e-05,
      "loss": 0.6704,
      "step": 3925
    },
    {
      "epoch": 0.8070716414842224,
      "grad_norm": 0.19502970576286316,
      "learning_rate": 7.717446304929841e-05,
      "loss": 0.6785,
      "step": 3926
    },
    {
      "epoch": 0.807277212457601,
      "grad_norm": 0.1623646318912506,
      "learning_rate": 7.716747920494615e-05,
      "loss": 0.5998,
      "step": 3927
    },
    {
      "epoch": 0.8074827834309796,
      "grad_norm": 0.13050900399684906,
      "learning_rate": 7.716049377585252e-05,
      "loss": 0.5749,
      "step": 3928
    },
    {
      "epoch": 0.8076883544043582,
      "grad_norm": 0.2015300691127777,
      "learning_rate": 7.715350676236169e-05,
      "loss": 0.6902,
      "step": 3929
    },
    {
      "epoch": 0.8078939253777366,
      "grad_norm": 0.19763372838497162,
      "learning_rate": 7.714651816481788e-05,
      "loss": 0.6666,
      "step": 3930
    },
    {
      "epoch": 0.8080994963511152,
      "grad_norm": 0.19438831508159637,
      "learning_rate": 7.713952798356535e-05,
      "loss": 0.6901,
      "step": 3931
    },
    {
      "epoch": 0.8083050673244938,
      "grad_norm": 0.1897808313369751,
      "learning_rate": 7.71325362189485e-05,
      "loss": 0.6652,
      "step": 3932
    },
    {
      "epoch": 0.8085106382978723,
      "grad_norm": 0.2024880349636078,
      "learning_rate": 7.712554287131179e-05,
      "loss": 0.6983,
      "step": 3933
    },
    {
      "epoch": 0.8087162092712509,
      "grad_norm": 0.21040861308574677,
      "learning_rate": 7.711854794099973e-05,
      "loss": 0.6676,
      "step": 3934
    },
    {
      "epoch": 0.8089217802446295,
      "grad_norm": 0.19779765605926514,
      "learning_rate": 7.711155142835693e-05,
      "loss": 0.6699,
      "step": 3935
    },
    {
      "epoch": 0.809127351218008,
      "grad_norm": 0.18733692169189453,
      "learning_rate": 7.710455333372809e-05,
      "loss": 0.6876,
      "step": 3936
    },
    {
      "epoch": 0.8093329221913865,
      "grad_norm": 0.18417513370513916,
      "learning_rate": 7.709755365745796e-05,
      "loss": 0.6592,
      "step": 3937
    },
    {
      "epoch": 0.8095384931647651,
      "grad_norm": 0.19497236609458923,
      "learning_rate": 7.709055239989138e-05,
      "loss": 0.6704,
      "step": 3938
    },
    {
      "epoch": 0.8097440641381437,
      "grad_norm": 0.19937434792518616,
      "learning_rate": 7.708354956137329e-05,
      "loss": 0.6672,
      "step": 3939
    },
    {
      "epoch": 0.8099496351115223,
      "grad_norm": 0.18484531342983246,
      "learning_rate": 7.707654514224865e-05,
      "loss": 0.639,
      "step": 3940
    },
    {
      "epoch": 0.8101552060849008,
      "grad_norm": 0.21879440546035767,
      "learning_rate": 7.706953914286256e-05,
      "loss": 0.5811,
      "step": 3941
    },
    {
      "epoch": 0.8103607770582794,
      "grad_norm": 0.19117337465286255,
      "learning_rate": 7.706253156356018e-05,
      "loss": 0.6602,
      "step": 3942
    },
    {
      "epoch": 0.810566348031658,
      "grad_norm": 0.20928023755550385,
      "learning_rate": 7.705552240468672e-05,
      "loss": 0.6755,
      "step": 3943
    },
    {
      "epoch": 0.8107719190050365,
      "grad_norm": 0.1899488866329193,
      "learning_rate": 7.70485116665875e-05,
      "loss": 0.6596,
      "step": 3944
    },
    {
      "epoch": 0.810977489978415,
      "grad_norm": 0.1829700917005539,
      "learning_rate": 7.70414993496079e-05,
      "loss": 0.6536,
      "step": 3945
    },
    {
      "epoch": 0.8111830609517936,
      "grad_norm": 0.2187718152999878,
      "learning_rate": 7.70344854540934e-05,
      "loss": 0.6712,
      "step": 3946
    },
    {
      "epoch": 0.8113886319251722,
      "grad_norm": 0.1931912750005722,
      "learning_rate": 7.702746998038952e-05,
      "loss": 0.6848,
      "step": 3947
    },
    {
      "epoch": 0.8115942028985508,
      "grad_norm": 0.1904575526714325,
      "learning_rate": 7.70204529288419e-05,
      "loss": 0.6688,
      "step": 3948
    },
    {
      "epoch": 0.8117997738719293,
      "grad_norm": 0.18743041157722473,
      "learning_rate": 7.701343429979622e-05,
      "loss": 0.6804,
      "step": 3949
    },
    {
      "epoch": 0.8120053448453078,
      "grad_norm": 0.1948167085647583,
      "learning_rate": 7.700641409359827e-05,
      "loss": 0.6985,
      "step": 3950
    },
    {
      "epoch": 0.8122109158186864,
      "grad_norm": 0.19588027894496918,
      "learning_rate": 7.69993923105939e-05,
      "loss": 0.6802,
      "step": 3951
    },
    {
      "epoch": 0.8124164867920649,
      "grad_norm": 0.18361736834049225,
      "learning_rate": 7.699236895112903e-05,
      "loss": 0.5713,
      "step": 3952
    },
    {
      "epoch": 0.8126220577654435,
      "grad_norm": 0.1924244612455368,
      "learning_rate": 7.698534401554966e-05,
      "loss": 0.6732,
      "step": 3953
    },
    {
      "epoch": 0.8128276287388221,
      "grad_norm": 0.19700728356838226,
      "learning_rate": 7.697831750420189e-05,
      "loss": 0.6635,
      "step": 3954
    },
    {
      "epoch": 0.8130331997122007,
      "grad_norm": 0.20763562619686127,
      "learning_rate": 7.69712894174319e-05,
      "loss": 0.6926,
      "step": 3955
    },
    {
      "epoch": 0.8132387706855791,
      "grad_norm": 0.19522826373577118,
      "learning_rate": 7.69642597555859e-05,
      "loss": 0.6651,
      "step": 3956
    },
    {
      "epoch": 0.8134443416589577,
      "grad_norm": 0.18719004094600677,
      "learning_rate": 7.695722851901024e-05,
      "loss": 0.6871,
      "step": 3957
    },
    {
      "epoch": 0.8136499126323363,
      "grad_norm": 0.18853691220283508,
      "learning_rate": 7.695019570805129e-05,
      "loss": 0.6951,
      "step": 3958
    },
    {
      "epoch": 0.8138554836057149,
      "grad_norm": 0.191143199801445,
      "learning_rate": 7.694316132305553e-05,
      "loss": 0.6819,
      "step": 3959
    },
    {
      "epoch": 0.8140610545790934,
      "grad_norm": 0.20034968852996826,
      "learning_rate": 7.69361253643695e-05,
      "loss": 0.6813,
      "step": 3960
    },
    {
      "epoch": 0.814266625552472,
      "grad_norm": 0.1926213502883911,
      "learning_rate": 7.692908783233987e-05,
      "loss": 0.6766,
      "step": 3961
    },
    {
      "epoch": 0.8144721965258506,
      "grad_norm": 0.17970655858516693,
      "learning_rate": 7.692204872731329e-05,
      "loss": 0.6708,
      "step": 3962
    },
    {
      "epoch": 0.8146777674992292,
      "grad_norm": 0.18484726548194885,
      "learning_rate": 7.691500804963659e-05,
      "loss": 0.6606,
      "step": 3963
    },
    {
      "epoch": 0.8148833384726076,
      "grad_norm": 0.19342055916786194,
      "learning_rate": 7.690796579965661e-05,
      "loss": 0.6878,
      "step": 3964
    },
    {
      "epoch": 0.8150889094459862,
      "grad_norm": 0.17727455496788025,
      "learning_rate": 7.69009219777203e-05,
      "loss": 0.5893,
      "step": 3965
    },
    {
      "epoch": 0.8152944804193648,
      "grad_norm": 0.14557015895843506,
      "learning_rate": 7.689387658417466e-05,
      "loss": 0.5706,
      "step": 3966
    },
    {
      "epoch": 0.8155000513927434,
      "grad_norm": 0.20403575897216797,
      "learning_rate": 7.688682961936678e-05,
      "loss": 0.6717,
      "step": 3967
    },
    {
      "epoch": 0.8157056223661219,
      "grad_norm": 0.1949741244316101,
      "learning_rate": 7.687978108364386e-05,
      "loss": 0.6679,
      "step": 3968
    },
    {
      "epoch": 0.8159111933395005,
      "grad_norm": 0.18995149433612823,
      "learning_rate": 7.687273097735314e-05,
      "loss": 0.6625,
      "step": 3969
    },
    {
      "epoch": 0.816116764312879,
      "grad_norm": 0.1978754699230194,
      "learning_rate": 7.686567930084193e-05,
      "loss": 0.6665,
      "step": 3970
    },
    {
      "epoch": 0.8163223352862575,
      "grad_norm": 0.20074686408042908,
      "learning_rate": 7.685862605445763e-05,
      "loss": 0.585,
      "step": 3971
    },
    {
      "epoch": 0.8165279062596361,
      "grad_norm": 0.2053072452545166,
      "learning_rate": 7.685157123854774e-05,
      "loss": 0.6753,
      "step": 3972
    },
    {
      "epoch": 0.8167334772330147,
      "grad_norm": 0.19377997517585754,
      "learning_rate": 7.68445148534598e-05,
      "loss": 0.7029,
      "step": 3973
    },
    {
      "epoch": 0.8169390482063933,
      "grad_norm": 0.19419549405574799,
      "learning_rate": 7.683745689954146e-05,
      "loss": 0.6722,
      "step": 3974
    },
    {
      "epoch": 0.8171446191797718,
      "grad_norm": 0.1902785748243332,
      "learning_rate": 7.683039737714042e-05,
      "loss": 0.6982,
      "step": 3975
    },
    {
      "epoch": 0.8173501901531504,
      "grad_norm": 0.19267836213111877,
      "learning_rate": 7.68233362866045e-05,
      "loss": 0.6485,
      "step": 3976
    },
    {
      "epoch": 0.817555761126529,
      "grad_norm": 0.1380038857460022,
      "learning_rate": 7.681627362828152e-05,
      "loss": 0.583,
      "step": 3977
    },
    {
      "epoch": 0.8177613320999075,
      "grad_norm": 0.20162338018417358,
      "learning_rate": 7.680920940251947e-05,
      "loss": 0.662,
      "step": 3978
    },
    {
      "epoch": 0.817966903073286,
      "grad_norm": 0.12970632314682007,
      "learning_rate": 7.680214360966631e-05,
      "loss": 0.5716,
      "step": 3979
    },
    {
      "epoch": 0.8181724740466646,
      "grad_norm": 0.20082327723503113,
      "learning_rate": 7.679507625007021e-05,
      "loss": 0.681,
      "step": 3980
    },
    {
      "epoch": 0.8183780450200432,
      "grad_norm": 0.18788529932498932,
      "learning_rate": 7.67880073240793e-05,
      "loss": 0.6779,
      "step": 3981
    },
    {
      "epoch": 0.8185836159934218,
      "grad_norm": 0.1803288459777832,
      "learning_rate": 7.678093683204185e-05,
      "loss": 0.6553,
      "step": 3982
    },
    {
      "epoch": 0.8187891869668003,
      "grad_norm": 0.17987079918384552,
      "learning_rate": 7.677386477430619e-05,
      "loss": 0.6784,
      "step": 3983
    },
    {
      "epoch": 0.8189947579401788,
      "grad_norm": 0.14350593090057373,
      "learning_rate": 7.676679115122071e-05,
      "loss": 0.5904,
      "step": 3984
    },
    {
      "epoch": 0.8192003289135574,
      "grad_norm": 0.18889760971069336,
      "learning_rate": 7.675971596313391e-05,
      "loss": 0.6551,
      "step": 3985
    },
    {
      "epoch": 0.819405899886936,
      "grad_norm": 0.1940951943397522,
      "learning_rate": 7.675263921039436e-05,
      "loss": 0.6905,
      "step": 3986
    },
    {
      "epoch": 0.8196114708603145,
      "grad_norm": 0.18888835608959198,
      "learning_rate": 7.674556089335068e-05,
      "loss": 0.6613,
      "step": 3987
    },
    {
      "epoch": 0.8198170418336931,
      "grad_norm": 0.18659929931163788,
      "learning_rate": 7.673848101235161e-05,
      "loss": 0.6346,
      "step": 3988
    },
    {
      "epoch": 0.8200226128070717,
      "grad_norm": 0.19220280647277832,
      "learning_rate": 7.67313995677459e-05,
      "loss": 0.6835,
      "step": 3989
    },
    {
      "epoch": 0.8202281837804501,
      "grad_norm": 0.18803051114082336,
      "learning_rate": 7.672431655988245e-05,
      "loss": 0.6733,
      "step": 3990
    },
    {
      "epoch": 0.8204337547538287,
      "grad_norm": 0.15034914016723633,
      "learning_rate": 7.671723198911022e-05,
      "loss": 0.5774,
      "step": 3991
    },
    {
      "epoch": 0.8206393257272073,
      "grad_norm": 0.19378551840782166,
      "learning_rate": 7.671014585577821e-05,
      "loss": 0.6688,
      "step": 3992
    },
    {
      "epoch": 0.8208448967005859,
      "grad_norm": 0.22061464190483093,
      "learning_rate": 7.670305816023551e-05,
      "loss": 0.6763,
      "step": 3993
    },
    {
      "epoch": 0.8210504676739644,
      "grad_norm": 0.18267303705215454,
      "learning_rate": 7.669596890283132e-05,
      "loss": 0.6657,
      "step": 3994
    },
    {
      "epoch": 0.821256038647343,
      "grad_norm": 0.1902119219303131,
      "learning_rate": 7.66888780839149e-05,
      "loss": 0.6827,
      "step": 3995
    },
    {
      "epoch": 0.8214616096207216,
      "grad_norm": 0.1934443563222885,
      "learning_rate": 7.668178570383558e-05,
      "loss": 0.6979,
      "step": 3996
    },
    {
      "epoch": 0.8216671805941002,
      "grad_norm": 0.19263286888599396,
      "learning_rate": 7.667469176294272e-05,
      "loss": 0.6665,
      "step": 3997
    },
    {
      "epoch": 0.8218727515674786,
      "grad_norm": 0.13605189323425293,
      "learning_rate": 7.666759626158587e-05,
      "loss": 0.5615,
      "step": 3998
    },
    {
      "epoch": 0.8220783225408572,
      "grad_norm": 0.19073757529258728,
      "learning_rate": 7.666049920011457e-05,
      "loss": 0.6676,
      "step": 3999
    },
    {
      "epoch": 0.8222838935142358,
      "grad_norm": 0.193292036652565,
      "learning_rate": 7.665340057887844e-05,
      "loss": 0.6751,
      "step": 4000
    },
    {
      "epoch": 0.8224894644876144,
      "grad_norm": 0.18150904774665833,
      "learning_rate": 7.664630039822722e-05,
      "loss": 0.6678,
      "step": 4001
    },
    {
      "epoch": 0.8226950354609929,
      "grad_norm": 0.19092898070812225,
      "learning_rate": 7.663919865851071e-05,
      "loss": 0.6643,
      "step": 4002
    },
    {
      "epoch": 0.8229006064343715,
      "grad_norm": 0.1463061273097992,
      "learning_rate": 7.663209536007873e-05,
      "loss": 0.6015,
      "step": 4003
    },
    {
      "epoch": 0.82310617740775,
      "grad_norm": 0.13264085352420807,
      "learning_rate": 7.662499050328129e-05,
      "loss": 0.5761,
      "step": 4004
    },
    {
      "epoch": 0.8233117483811286,
      "grad_norm": 0.19010482728481293,
      "learning_rate": 7.661788408846837e-05,
      "loss": 0.6417,
      "step": 4005
    },
    {
      "epoch": 0.8235173193545071,
      "grad_norm": 0.1999100148677826,
      "learning_rate": 7.661077611599007e-05,
      "loss": 0.6863,
      "step": 4006
    },
    {
      "epoch": 0.8237228903278857,
      "grad_norm": 0.19514624774456024,
      "learning_rate": 7.660366658619658e-05,
      "loss": 0.6738,
      "step": 4007
    },
    {
      "epoch": 0.8239284613012643,
      "grad_norm": 0.18463024497032166,
      "learning_rate": 7.659655549943817e-05,
      "loss": 0.6723,
      "step": 4008
    },
    {
      "epoch": 0.8241340322746428,
      "grad_norm": 0.19612738490104675,
      "learning_rate": 7.658944285606515e-05,
      "loss": 0.6856,
      "step": 4009
    },
    {
      "epoch": 0.8243396032480214,
      "grad_norm": 0.18983608484268188,
      "learning_rate": 7.658232865642793e-05,
      "loss": 0.6705,
      "step": 4010
    },
    {
      "epoch": 0.8245451742214,
      "grad_norm": 0.18740776181221008,
      "learning_rate": 7.657521290087699e-05,
      "loss": 0.6769,
      "step": 4011
    },
    {
      "epoch": 0.8247507451947785,
      "grad_norm": 0.1823440045118332,
      "learning_rate": 7.656809558976289e-05,
      "loss": 0.663,
      "step": 4012
    },
    {
      "epoch": 0.824956316168157,
      "grad_norm": 0.18513023853302002,
      "learning_rate": 7.656097672343626e-05,
      "loss": 0.6657,
      "step": 4013
    },
    {
      "epoch": 0.8251618871415356,
      "grad_norm": 0.1865355670452118,
      "learning_rate": 7.655385630224783e-05,
      "loss": 0.649,
      "step": 4014
    },
    {
      "epoch": 0.8253674581149142,
      "grad_norm": 0.18735235929489136,
      "learning_rate": 7.654673432654839e-05,
      "loss": 0.6717,
      "step": 4015
    },
    {
      "epoch": 0.8255730290882928,
      "grad_norm": 0.25272443890571594,
      "learning_rate": 7.65396107966888e-05,
      "loss": 0.5985,
      "step": 4016
    },
    {
      "epoch": 0.8257786000616713,
      "grad_norm": 0.19560717046260834,
      "learning_rate": 7.653248571301998e-05,
      "loss": 0.6861,
      "step": 4017
    },
    {
      "epoch": 0.8259841710350498,
      "grad_norm": 0.2014644891023636,
      "learning_rate": 7.652535907589299e-05,
      "loss": 0.6849,
      "step": 4018
    },
    {
      "epoch": 0.8261897420084284,
      "grad_norm": 0.15079200267791748,
      "learning_rate": 7.65182308856589e-05,
      "loss": 0.5943,
      "step": 4019
    },
    {
      "epoch": 0.826395312981807,
      "grad_norm": 0.19071127474308014,
      "learning_rate": 7.651110114266889e-05,
      "loss": 0.672,
      "step": 4020
    },
    {
      "epoch": 0.8266008839551855,
      "grad_norm": 0.1912720799446106,
      "learning_rate": 7.650396984727422e-05,
      "loss": 0.672,
      "step": 4021
    },
    {
      "epoch": 0.8268064549285641,
      "grad_norm": 0.1873595118522644,
      "learning_rate": 7.64968369998262e-05,
      "loss": 0.6576,
      "step": 4022
    },
    {
      "epoch": 0.8270120259019427,
      "grad_norm": 0.19510895013809204,
      "learning_rate": 7.648970260067623e-05,
      "loss": 0.6711,
      "step": 4023
    },
    {
      "epoch": 0.8272175968753213,
      "grad_norm": 0.1938508152961731,
      "learning_rate": 7.64825666501758e-05,
      "loss": 0.6629,
      "step": 4024
    },
    {
      "epoch": 0.8274231678486997,
      "grad_norm": 0.1958763152360916,
      "learning_rate": 7.647542914867646e-05,
      "loss": 0.6749,
      "step": 4025
    },
    {
      "epoch": 0.8276287388220783,
      "grad_norm": 0.18302227556705475,
      "learning_rate": 7.646829009652985e-05,
      "loss": 0.6462,
      "step": 4026
    },
    {
      "epoch": 0.8278343097954569,
      "grad_norm": 0.15973201394081116,
      "learning_rate": 7.646114949408764e-05,
      "loss": 0.5734,
      "step": 4027
    },
    {
      "epoch": 0.8280398807688354,
      "grad_norm": 0.18773558735847473,
      "learning_rate": 7.645400734170168e-05,
      "loss": 0.6912,
      "step": 4028
    },
    {
      "epoch": 0.828245451742214,
      "grad_norm": 0.12838105857372284,
      "learning_rate": 7.644686363972378e-05,
      "loss": 0.5789,
      "step": 4029
    },
    {
      "epoch": 0.8284510227155926,
      "grad_norm": 0.19766302406787872,
      "learning_rate": 7.643971838850589e-05,
      "loss": 0.6654,
      "step": 4030
    },
    {
      "epoch": 0.8286565936889712,
      "grad_norm": 0.1896764189004898,
      "learning_rate": 7.643257158840001e-05,
      "loss": 0.7013,
      "step": 4031
    },
    {
      "epoch": 0.8288621646623496,
      "grad_norm": 0.14424748718738556,
      "learning_rate": 7.642542323975826e-05,
      "loss": 0.5759,
      "step": 4032
    },
    {
      "epoch": 0.8290677356357282,
      "grad_norm": 0.192418172955513,
      "learning_rate": 7.641827334293279e-05,
      "loss": 0.697,
      "step": 4033
    },
    {
      "epoch": 0.8292733066091068,
      "grad_norm": 0.19316205382347107,
      "learning_rate": 7.641112189827583e-05,
      "loss": 0.6466,
      "step": 4034
    },
    {
      "epoch": 0.8294788775824854,
      "grad_norm": 0.17913931608200073,
      "learning_rate": 7.640396890613972e-05,
      "loss": 0.6539,
      "step": 4035
    },
    {
      "epoch": 0.8296844485558639,
      "grad_norm": 0.1839427500963211,
      "learning_rate": 7.639681436687685e-05,
      "loss": 0.678,
      "step": 4036
    },
    {
      "epoch": 0.8298900195292425,
      "grad_norm": 0.18442392349243164,
      "learning_rate": 7.638965828083966e-05,
      "loss": 0.6628,
      "step": 4037
    },
    {
      "epoch": 0.830095590502621,
      "grad_norm": 0.1920039802789688,
      "learning_rate": 7.638250064838073e-05,
      "loss": 0.6813,
      "step": 4038
    },
    {
      "epoch": 0.8303011614759996,
      "grad_norm": 0.14554156363010406,
      "learning_rate": 7.637534146985269e-05,
      "loss": 0.5533,
      "step": 4039
    },
    {
      "epoch": 0.8305067324493781,
      "grad_norm": 0.13095219433307648,
      "learning_rate": 7.63681807456082e-05,
      "loss": 0.5738,
      "step": 4040
    },
    {
      "epoch": 0.8307123034227567,
      "grad_norm": 0.2078784555196762,
      "learning_rate": 7.636101847600008e-05,
      "loss": 0.6674,
      "step": 4041
    },
    {
      "epoch": 0.8309178743961353,
      "grad_norm": 0.21770761907100677,
      "learning_rate": 7.635385466138116e-05,
      "loss": 0.6671,
      "step": 4042
    },
    {
      "epoch": 0.8311234453695138,
      "grad_norm": 0.18896861374378204,
      "learning_rate": 7.634668930210436e-05,
      "loss": 0.6855,
      "step": 4043
    },
    {
      "epoch": 0.8313290163428924,
      "grad_norm": 0.14647965133190155,
      "learning_rate": 7.633952239852269e-05,
      "loss": 0.598,
      "step": 4044
    },
    {
      "epoch": 0.831534587316271,
      "grad_norm": 0.19375310838222504,
      "learning_rate": 7.633235395098923e-05,
      "loss": 0.6639,
      "step": 4045
    },
    {
      "epoch": 0.8317401582896495,
      "grad_norm": 0.19974082708358765,
      "learning_rate": 7.632518395985715e-05,
      "loss": 0.6907,
      "step": 4046
    },
    {
      "epoch": 0.831945729263028,
      "grad_norm": 0.19184468686580658,
      "learning_rate": 7.631801242547967e-05,
      "loss": 0.6713,
      "step": 4047
    },
    {
      "epoch": 0.8321513002364066,
      "grad_norm": 0.13093294203281403,
      "learning_rate": 7.631083934821008e-05,
      "loss": 0.5689,
      "step": 4048
    },
    {
      "epoch": 0.8323568712097852,
      "grad_norm": 0.19299007952213287,
      "learning_rate": 7.63036647284018e-05,
      "loss": 0.6664,
      "step": 4049
    },
    {
      "epoch": 0.8325624421831638,
      "grad_norm": 0.19684211909770966,
      "learning_rate": 7.629648856640827e-05,
      "loss": 0.6594,
      "step": 4050
    },
    {
      "epoch": 0.8327680131565423,
      "grad_norm": 0.1866525262594223,
      "learning_rate": 7.6289310862583e-05,
      "loss": 0.6664,
      "step": 4051
    },
    {
      "epoch": 0.8329735841299208,
      "grad_norm": 0.1905846893787384,
      "learning_rate": 7.628213161727966e-05,
      "loss": 0.6458,
      "step": 4052
    },
    {
      "epoch": 0.8331791551032994,
      "grad_norm": 0.19215607643127441,
      "learning_rate": 7.62749508308519e-05,
      "loss": 0.6508,
      "step": 4053
    },
    {
      "epoch": 0.833384726076678,
      "grad_norm": 0.18882425129413605,
      "learning_rate": 7.62677685036535e-05,
      "loss": 0.6679,
      "step": 4054
    },
    {
      "epoch": 0.8335902970500565,
      "grad_norm": 0.1906069815158844,
      "learning_rate": 7.626058463603828e-05,
      "loss": 0.6619,
      "step": 4055
    },
    {
      "epoch": 0.8337958680234351,
      "grad_norm": 0.18673735857009888,
      "learning_rate": 7.625339922836016e-05,
      "loss": 0.6658,
      "step": 4056
    },
    {
      "epoch": 0.8340014389968137,
      "grad_norm": 0.19083453714847565,
      "learning_rate": 7.624621228097316e-05,
      "loss": 0.6631,
      "step": 4057
    },
    {
      "epoch": 0.8342070099701923,
      "grad_norm": 0.18321901559829712,
      "learning_rate": 7.62390237942313e-05,
      "loss": 0.6579,
      "step": 4058
    },
    {
      "epoch": 0.8344125809435707,
      "grad_norm": 0.14776909351348877,
      "learning_rate": 7.623183376848878e-05,
      "loss": 0.5934,
      "step": 4059
    },
    {
      "epoch": 0.8346181519169493,
      "grad_norm": 0.20167462527751923,
      "learning_rate": 7.622464220409975e-05,
      "loss": 0.6709,
      "step": 4060
    },
    {
      "epoch": 0.8348237228903279,
      "grad_norm": 0.19711320102214813,
      "learning_rate": 7.621744910141858e-05,
      "loss": 0.6672,
      "step": 4061
    },
    {
      "epoch": 0.8350292938637064,
      "grad_norm": 0.18972383439540863,
      "learning_rate": 7.621025446079956e-05,
      "loss": 0.6677,
      "step": 4062
    },
    {
      "epoch": 0.835234864837085,
      "grad_norm": 0.19243162870407104,
      "learning_rate": 7.620305828259722e-05,
      "loss": 0.6874,
      "step": 4063
    },
    {
      "epoch": 0.8354404358104636,
      "grad_norm": 0.18802182376384735,
      "learning_rate": 7.619586056716601e-05,
      "loss": 0.6656,
      "step": 4064
    },
    {
      "epoch": 0.8356460067838422,
      "grad_norm": 0.14523807168006897,
      "learning_rate": 7.618866131486058e-05,
      "loss": 0.6011,
      "step": 4065
    },
    {
      "epoch": 0.8358515777572206,
      "grad_norm": 0.18922917544841766,
      "learning_rate": 7.618146052603557e-05,
      "loss": 0.6577,
      "step": 4066
    },
    {
      "epoch": 0.8360571487305992,
      "grad_norm": 0.19187946617603302,
      "learning_rate": 7.617425820104574e-05,
      "loss": 0.6774,
      "step": 4067
    },
    {
      "epoch": 0.8362627197039778,
      "grad_norm": 0.1862529069185257,
      "learning_rate": 7.616705434024593e-05,
      "loss": 0.6503,
      "step": 4068
    },
    {
      "epoch": 0.8364682906773564,
      "grad_norm": 0.19143825769424438,
      "learning_rate": 7.615984894399102e-05,
      "loss": 0.6803,
      "step": 4069
    },
    {
      "epoch": 0.8366738616507349,
      "grad_norm": 0.18703386187553406,
      "learning_rate": 7.615264201263599e-05,
      "loss": 0.6779,
      "step": 4070
    },
    {
      "epoch": 0.8368794326241135,
      "grad_norm": 0.18577006459236145,
      "learning_rate": 7.61454335465359e-05,
      "loss": 0.6671,
      "step": 4071
    },
    {
      "epoch": 0.837085003597492,
      "grad_norm": 0.18921016156673431,
      "learning_rate": 7.613822354604587e-05,
      "loss": 0.6955,
      "step": 4072
    },
    {
      "epoch": 0.8372905745708706,
      "grad_norm": 0.1349778026342392,
      "learning_rate": 7.613101201152111e-05,
      "loss": 0.568,
      "step": 4073
    },
    {
      "epoch": 0.8374961455442491,
      "grad_norm": 0.1813334822654724,
      "learning_rate": 7.612379894331689e-05,
      "loss": 0.6512,
      "step": 4074
    },
    {
      "epoch": 0.8377017165176277,
      "grad_norm": 0.1277725249528885,
      "learning_rate": 7.611658434178857e-05,
      "loss": 0.5773,
      "step": 4075
    },
    {
      "epoch": 0.8379072874910063,
      "grad_norm": 0.1959075778722763,
      "learning_rate": 7.610936820729157e-05,
      "loss": 0.6923,
      "step": 4076
    },
    {
      "epoch": 0.8381128584643849,
      "grad_norm": 0.19275759160518646,
      "learning_rate": 7.610215054018142e-05,
      "loss": 0.6868,
      "step": 4077
    },
    {
      "epoch": 0.8383184294377634,
      "grad_norm": 0.19022993743419647,
      "learning_rate": 7.609493134081367e-05,
      "loss": 0.636,
      "step": 4078
    },
    {
      "epoch": 0.838524000411142,
      "grad_norm": 0.1396605670452118,
      "learning_rate": 7.608771060954399e-05,
      "loss": 0.5913,
      "step": 4079
    },
    {
      "epoch": 0.8387295713845205,
      "grad_norm": 0.126824289560318,
      "learning_rate": 7.608048834672812e-05,
      "loss": 0.5857,
      "step": 4080
    },
    {
      "epoch": 0.838935142357899,
      "grad_norm": 0.20024533569812775,
      "learning_rate": 7.607326455272187e-05,
      "loss": 0.6722,
      "step": 4081
    },
    {
      "epoch": 0.8391407133312776,
      "grad_norm": 0.19841928780078888,
      "learning_rate": 7.606603922788108e-05,
      "loss": 0.6507,
      "step": 4082
    },
    {
      "epoch": 0.8393462843046562,
      "grad_norm": 0.17838910222053528,
      "learning_rate": 7.605881237256175e-05,
      "loss": 0.6203,
      "step": 4083
    },
    {
      "epoch": 0.8395518552780348,
      "grad_norm": 0.1466301828622818,
      "learning_rate": 7.605158398711991e-05,
      "loss": 0.5627,
      "step": 4084
    },
    {
      "epoch": 0.8397574262514133,
      "grad_norm": 0.1911042481660843,
      "learning_rate": 7.604435407191167e-05,
      "loss": 0.656,
      "step": 4085
    },
    {
      "epoch": 0.8399629972247918,
      "grad_norm": 0.1837422102689743,
      "learning_rate": 7.60371226272932e-05,
      "loss": 0.653,
      "step": 4086
    },
    {
      "epoch": 0.8401685681981704,
      "grad_norm": 0.1889040619134903,
      "learning_rate": 7.602988965362075e-05,
      "loss": 0.6757,
      "step": 4087
    },
    {
      "epoch": 0.840374139171549,
      "grad_norm": 0.18443772196769714,
      "learning_rate": 7.602265515125069e-05,
      "loss": 0.6627,
      "step": 4088
    },
    {
      "epoch": 0.8405797101449275,
      "grad_norm": 0.19531475007534027,
      "learning_rate": 7.601541912053939e-05,
      "loss": 0.6678,
      "step": 4089
    },
    {
      "epoch": 0.8407852811183061,
      "grad_norm": 0.18012624979019165,
      "learning_rate": 7.600818156184338e-05,
      "loss": 0.6605,
      "step": 4090
    },
    {
      "epoch": 0.8409908520916847,
      "grad_norm": 0.16611045598983765,
      "learning_rate": 7.600094247551918e-05,
      "loss": 0.606,
      "step": 4091
    },
    {
      "epoch": 0.8411964230650633,
      "grad_norm": 0.1904737800359726,
      "learning_rate": 7.599370186192345e-05,
      "loss": 0.6825,
      "step": 4092
    },
    {
      "epoch": 0.8414019940384417,
      "grad_norm": 0.1872866153717041,
      "learning_rate": 7.598645972141288e-05,
      "loss": 0.6555,
      "step": 4093
    },
    {
      "epoch": 0.8416075650118203,
      "grad_norm": 0.1912485808134079,
      "learning_rate": 7.59792160543443e-05,
      "loss": 0.667,
      "step": 4094
    },
    {
      "epoch": 0.8418131359851989,
      "grad_norm": 0.18316781520843506,
      "learning_rate": 7.597197086107451e-05,
      "loss": 0.6583,
      "step": 4095
    },
    {
      "epoch": 0.8420187069585775,
      "grad_norm": 0.18488352000713348,
      "learning_rate": 7.596472414196049e-05,
      "loss": 0.6619,
      "step": 4096
    },
    {
      "epoch": 0.842224277931956,
      "grad_norm": 0.16305844485759735,
      "learning_rate": 7.595747589735923e-05,
      "loss": 0.5869,
      "step": 4097
    },
    {
      "epoch": 0.8424298489053346,
      "grad_norm": 0.19764935970306396,
      "learning_rate": 7.595022612762786e-05,
      "loss": 0.6704,
      "step": 4098
    },
    {
      "epoch": 0.8426354198787132,
      "grad_norm": 0.2008553147315979,
      "learning_rate": 7.594297483312348e-05,
      "loss": 0.6928,
      "step": 4099
    },
    {
      "epoch": 0.8428409908520916,
      "grad_norm": 0.19005800783634186,
      "learning_rate": 7.593572201420336e-05,
      "loss": 0.68,
      "step": 4100
    },
    {
      "epoch": 0.8430465618254702,
      "grad_norm": 0.18260590732097626,
      "learning_rate": 7.592846767122481e-05,
      "loss": 0.6452,
      "step": 4101
    },
    {
      "epoch": 0.8432521327988488,
      "grad_norm": 0.24055607616901398,
      "learning_rate": 7.592121180454522e-05,
      "loss": 0.6555,
      "step": 4102
    },
    {
      "epoch": 0.8434577037722274,
      "grad_norm": 0.18779988586902618,
      "learning_rate": 7.591395441452205e-05,
      "loss": 0.6558,
      "step": 4103
    },
    {
      "epoch": 0.8436632747456059,
      "grad_norm": 0.19184498488903046,
      "learning_rate": 7.590669550151284e-05,
      "loss": 0.6737,
      "step": 4104
    },
    {
      "epoch": 0.8438688457189845,
      "grad_norm": 0.17881546914577484,
      "learning_rate": 7.58994350658752e-05,
      "loss": 0.6482,
      "step": 4105
    },
    {
      "epoch": 0.844074416692363,
      "grad_norm": 0.19403071701526642,
      "learning_rate": 7.589217310796682e-05,
      "loss": 0.6316,
      "step": 4106
    },
    {
      "epoch": 0.8442799876657416,
      "grad_norm": 0.18991516530513763,
      "learning_rate": 7.588490962814544e-05,
      "loss": 0.6286,
      "step": 4107
    },
    {
      "epoch": 0.8444855586391201,
      "grad_norm": 0.19792747497558594,
      "learning_rate": 7.587764462676895e-05,
      "loss": 0.6514,
      "step": 4108
    },
    {
      "epoch": 0.8446911296124987,
      "grad_norm": 0.18424390256404877,
      "learning_rate": 7.587037810419521e-05,
      "loss": 0.6726,
      "step": 4109
    },
    {
      "epoch": 0.8448967005858773,
      "grad_norm": 0.16541998088359833,
      "learning_rate": 7.586311006078223e-05,
      "loss": 0.5817,
      "step": 4110
    },
    {
      "epoch": 0.8451022715592559,
      "grad_norm": 0.19858099520206451,
      "learning_rate": 7.585584049688807e-05,
      "loss": 0.6799,
      "step": 4111
    },
    {
      "epoch": 0.8453078425326344,
      "grad_norm": 0.19580329954624176,
      "learning_rate": 7.58485694128709e-05,
      "loss": 0.6626,
      "step": 4112
    },
    {
      "epoch": 0.845513413506013,
      "grad_norm": 0.18652157485485077,
      "learning_rate": 7.584129680908886e-05,
      "loss": 0.6406,
      "step": 4113
    },
    {
      "epoch": 0.8457189844793915,
      "grad_norm": 0.1859186291694641,
      "learning_rate": 7.58340226859003e-05,
      "loss": 0.6477,
      "step": 4114
    },
    {
      "epoch": 0.8459245554527701,
      "grad_norm": 0.1960713267326355,
      "learning_rate": 7.582674704366354e-05,
      "loss": 0.6685,
      "step": 4115
    },
    {
      "epoch": 0.8461301264261486,
      "grad_norm": 0.19311878085136414,
      "learning_rate": 7.581946988273706e-05,
      "loss": 0.6976,
      "step": 4116
    },
    {
      "epoch": 0.8463356973995272,
      "grad_norm": 0.18788793683052063,
      "learning_rate": 7.581219120347933e-05,
      "loss": 0.6545,
      "step": 4117
    },
    {
      "epoch": 0.8465412683729058,
      "grad_norm": 0.1906074583530426,
      "learning_rate": 7.580491100624896e-05,
      "loss": 0.6772,
      "step": 4118
    },
    {
      "epoch": 0.8467468393462843,
      "grad_norm": 0.18752005696296692,
      "learning_rate": 7.579762929140462e-05,
      "loss": 0.672,
      "step": 4119
    },
    {
      "epoch": 0.8469524103196628,
      "grad_norm": 0.1863172948360443,
      "learning_rate": 7.579034605930502e-05,
      "loss": 0.6502,
      "step": 4120
    },
    {
      "epoch": 0.8471579812930414,
      "grad_norm": 0.18836906552314758,
      "learning_rate": 7.578306131030898e-05,
      "loss": 0.6438,
      "step": 4121
    },
    {
      "epoch": 0.84736355226642,
      "grad_norm": 0.1857694834470749,
      "learning_rate": 7.577577504477541e-05,
      "loss": 0.6595,
      "step": 4122
    },
    {
      "epoch": 0.8475691232397985,
      "grad_norm": 0.18018977344036102,
      "learning_rate": 7.576848726306323e-05,
      "loss": 0.6315,
      "step": 4123
    },
    {
      "epoch": 0.8477746942131771,
      "grad_norm": 0.18060006201267242,
      "learning_rate": 7.57611979655315e-05,
      "loss": 0.6764,
      "step": 4124
    },
    {
      "epoch": 0.8479802651865557,
      "grad_norm": 0.18697619438171387,
      "learning_rate": 7.575390715253932e-05,
      "loss": 0.6397,
      "step": 4125
    },
    {
      "epoch": 0.8481858361599343,
      "grad_norm": 0.19681645929813385,
      "learning_rate": 7.574661482444589e-05,
      "loss": 0.663,
      "step": 4126
    },
    {
      "epoch": 0.8483914071333127,
      "grad_norm": 0.18985417485237122,
      "learning_rate": 7.573932098161043e-05,
      "loss": 0.6413,
      "step": 4127
    },
    {
      "epoch": 0.8485969781066913,
      "grad_norm": 0.183248370885849,
      "learning_rate": 7.573202562439232e-05,
      "loss": 0.6521,
      "step": 4128
    },
    {
      "epoch": 0.8488025490800699,
      "grad_norm": 0.17444172501564026,
      "learning_rate": 7.572472875315095e-05,
      "loss": 0.5904,
      "step": 4129
    },
    {
      "epoch": 0.8490081200534485,
      "grad_norm": 0.21605822443962097,
      "learning_rate": 7.57174303682458e-05,
      "loss": 0.6615,
      "step": 4130
    },
    {
      "epoch": 0.849213691026827,
      "grad_norm": 0.20160672068595886,
      "learning_rate": 7.571013047003643e-05,
      "loss": 0.7124,
      "step": 4131
    },
    {
      "epoch": 0.8494192620002056,
      "grad_norm": 0.18523965775966644,
      "learning_rate": 7.570282905888246e-05,
      "loss": 0.6608,
      "step": 4132
    },
    {
      "epoch": 0.8496248329735842,
      "grad_norm": 0.19887828826904297,
      "learning_rate": 7.569552613514362e-05,
      "loss": 0.6699,
      "step": 4133
    },
    {
      "epoch": 0.8498304039469627,
      "grad_norm": 0.19583609700202942,
      "learning_rate": 7.568822169917967e-05,
      "loss": 0.6682,
      "step": 4134
    },
    {
      "epoch": 0.8500359749203412,
      "grad_norm": 0.19429847598075867,
      "learning_rate": 7.568091575135048e-05,
      "loss": 0.6828,
      "step": 4135
    },
    {
      "epoch": 0.8502415458937198,
      "grad_norm": 0.1865924745798111,
      "learning_rate": 7.567360829201597e-05,
      "loss": 0.674,
      "step": 4136
    },
    {
      "epoch": 0.8504471168670984,
      "grad_norm": 0.17295409739017487,
      "learning_rate": 7.566629932153615e-05,
      "loss": 0.5802,
      "step": 4137
    },
    {
      "epoch": 0.8506526878404769,
      "grad_norm": 0.1509198248386383,
      "learning_rate": 7.565898884027107e-05,
      "loss": 0.5835,
      "step": 4138
    },
    {
      "epoch": 0.8508582588138555,
      "grad_norm": 0.2158360481262207,
      "learning_rate": 7.565167684858095e-05,
      "loss": 0.6711,
      "step": 4139
    },
    {
      "epoch": 0.851063829787234,
      "grad_norm": 0.17296075820922852,
      "learning_rate": 7.564436334682594e-05,
      "loss": 0.6029,
      "step": 4140
    },
    {
      "epoch": 0.8512694007606126,
      "grad_norm": 0.21175174415111542,
      "learning_rate": 7.56370483353664e-05,
      "loss": 0.7072,
      "step": 4141
    },
    {
      "epoch": 0.8514749717339911,
      "grad_norm": 0.1445254236459732,
      "learning_rate": 7.562973181456269e-05,
      "loss": 0.5766,
      "step": 4142
    },
    {
      "epoch": 0.8516805427073697,
      "grad_norm": 0.19627566635608673,
      "learning_rate": 7.562241378477526e-05,
      "loss": 0.6652,
      "step": 4143
    },
    {
      "epoch": 0.8518861136807483,
      "grad_norm": 0.22292684018611908,
      "learning_rate": 7.561509424636462e-05,
      "loss": 0.7013,
      "step": 4144
    },
    {
      "epoch": 0.8520916846541269,
      "grad_norm": 0.1842968612909317,
      "learning_rate": 7.560777319969138e-05,
      "loss": 0.6621,
      "step": 4145
    },
    {
      "epoch": 0.8522972556275054,
      "grad_norm": 0.19120851159095764,
      "learning_rate": 7.560045064511622e-05,
      "loss": 0.6508,
      "step": 4146
    },
    {
      "epoch": 0.852502826600884,
      "grad_norm": 0.21807745099067688,
      "learning_rate": 7.559312658299988e-05,
      "loss": 0.6831,
      "step": 4147
    },
    {
      "epoch": 0.8527083975742625,
      "grad_norm": 0.19106024503707886,
      "learning_rate": 7.558580101370318e-05,
      "loss": 0.6636,
      "step": 4148
    },
    {
      "epoch": 0.8529139685476411,
      "grad_norm": 0.1850479245185852,
      "learning_rate": 7.557847393758702e-05,
      "loss": 0.589,
      "step": 4149
    },
    {
      "epoch": 0.8531195395210196,
      "grad_norm": 0.1937406063079834,
      "learning_rate": 7.55711453550124e-05,
      "loss": 0.6401,
      "step": 4150
    },
    {
      "epoch": 0.8533251104943982,
      "grad_norm": 0.12518863379955292,
      "learning_rate": 7.556381526634031e-05,
      "loss": 0.5776,
      "step": 4151
    },
    {
      "epoch": 0.8535306814677768,
      "grad_norm": 0.6598914861679077,
      "learning_rate": 7.555648367193191e-05,
      "loss": 0.6637,
      "step": 4152
    },
    {
      "epoch": 0.8537362524411554,
      "grad_norm": 0.19615043699741364,
      "learning_rate": 7.554915057214837e-05,
      "loss": 0.6883,
      "step": 4153
    },
    {
      "epoch": 0.8539418234145338,
      "grad_norm": 0.18384511768817902,
      "learning_rate": 7.554181596735097e-05,
      "loss": 0.6749,
      "step": 4154
    },
    {
      "epoch": 0.8541473943879124,
      "grad_norm": 0.198414608836174,
      "learning_rate": 7.553447985790105e-05,
      "loss": 0.6878,
      "step": 4155
    },
    {
      "epoch": 0.854352965361291,
      "grad_norm": 0.19876956939697266,
      "learning_rate": 7.552714224416002e-05,
      "loss": 0.6398,
      "step": 4156
    },
    {
      "epoch": 0.8545585363346695,
      "grad_norm": 0.18689413368701935,
      "learning_rate": 7.551980312648939e-05,
      "loss": 0.6765,
      "step": 4157
    },
    {
      "epoch": 0.8547641073080481,
      "grad_norm": 0.1880849003791809,
      "learning_rate": 7.55124625052507e-05,
      "loss": 0.6596,
      "step": 4158
    },
    {
      "epoch": 0.8549696782814267,
      "grad_norm": 0.18960778415203094,
      "learning_rate": 7.550512038080559e-05,
      "loss": 0.6677,
      "step": 4159
    },
    {
      "epoch": 0.8551752492548053,
      "grad_norm": 0.20969745516777039,
      "learning_rate": 7.549777675351581e-05,
      "loss": 0.5811,
      "step": 4160
    },
    {
      "epoch": 0.8553808202281837,
      "grad_norm": 0.1950722187757492,
      "learning_rate": 7.549043162374308e-05,
      "loss": 0.6807,
      "step": 4161
    },
    {
      "epoch": 0.8555863912015623,
      "grad_norm": 0.20414437353610992,
      "learning_rate": 7.54830849918493e-05,
      "loss": 0.6937,
      "step": 4162
    },
    {
      "epoch": 0.8557919621749409,
      "grad_norm": 0.3100520670413971,
      "learning_rate": 7.547573685819643e-05,
      "loss": 0.5698,
      "step": 4163
    },
    {
      "epoch": 0.8559975331483195,
      "grad_norm": 0.186519056558609,
      "learning_rate": 7.546838722314641e-05,
      "loss": 0.6604,
      "step": 4164
    },
    {
      "epoch": 0.856203104121698,
      "grad_norm": 0.19283641874790192,
      "learning_rate": 7.546103608706137e-05,
      "loss": 0.6484,
      "step": 4165
    },
    {
      "epoch": 0.8564086750950766,
      "grad_norm": 0.1958523392677307,
      "learning_rate": 7.545368345030348e-05,
      "loss": 0.6814,
      "step": 4166
    },
    {
      "epoch": 0.8566142460684552,
      "grad_norm": 0.19231447577476501,
      "learning_rate": 7.544632931323492e-05,
      "loss": 0.6768,
      "step": 4167
    },
    {
      "epoch": 0.8568198170418337,
      "grad_norm": 0.18475113809108734,
      "learning_rate": 7.543897367621804e-05,
      "loss": 0.6781,
      "step": 4168
    },
    {
      "epoch": 0.8570253880152122,
      "grad_norm": 0.1537688672542572,
      "learning_rate": 7.543161653961518e-05,
      "loss": 0.6122,
      "step": 4169
    },
    {
      "epoch": 0.8572309589885908,
      "grad_norm": 0.20179788768291473,
      "learning_rate": 7.542425790378882e-05,
      "loss": 0.6563,
      "step": 4170
    },
    {
      "epoch": 0.8574365299619694,
      "grad_norm": 0.1862722784280777,
      "learning_rate": 7.541689776910149e-05,
      "loss": 0.6752,
      "step": 4171
    },
    {
      "epoch": 0.857642100935348,
      "grad_norm": 0.18401017785072327,
      "learning_rate": 7.540953613591576e-05,
      "loss": 0.6828,
      "step": 4172
    },
    {
      "epoch": 0.8578476719087265,
      "grad_norm": 0.18829752504825592,
      "learning_rate": 7.540217300459431e-05,
      "loss": 0.6479,
      "step": 4173
    },
    {
      "epoch": 0.858053242882105,
      "grad_norm": 0.19413256645202637,
      "learning_rate": 7.539480837549991e-05,
      "loss": 0.6429,
      "step": 4174
    },
    {
      "epoch": 0.8582588138554836,
      "grad_norm": 0.19081558287143707,
      "learning_rate": 7.538744224899536e-05,
      "loss": 0.647,
      "step": 4175
    },
    {
      "epoch": 0.8584643848288621,
      "grad_norm": 0.15339916944503784,
      "learning_rate": 7.538007462544356e-05,
      "loss": 0.5791,
      "step": 4176
    },
    {
      "epoch": 0.8586699558022407,
      "grad_norm": 0.12977366149425507,
      "learning_rate": 7.537270550520749e-05,
      "loss": 0.6098,
      "step": 4177
    },
    {
      "epoch": 0.8588755267756193,
      "grad_norm": 0.21286390721797943,
      "learning_rate": 7.536533488865016e-05,
      "loss": 0.6783,
      "step": 4178
    },
    {
      "epoch": 0.8590810977489979,
      "grad_norm": 0.14268797636032104,
      "learning_rate": 7.535796277613473e-05,
      "loss": 0.5743,
      "step": 4179
    },
    {
      "epoch": 0.8592866687223764,
      "grad_norm": 0.19620656967163086,
      "learning_rate": 7.535058916802435e-05,
      "loss": 0.6796,
      "step": 4180
    },
    {
      "epoch": 0.859492239695755,
      "grad_norm": 0.18335068225860596,
      "learning_rate": 7.534321406468231e-05,
      "loss": 0.6621,
      "step": 4181
    },
    {
      "epoch": 0.8596978106691335,
      "grad_norm": 0.19787956774234772,
      "learning_rate": 7.533583746647194e-05,
      "loss": 0.6775,
      "step": 4182
    },
    {
      "epoch": 0.8599033816425121,
      "grad_norm": 0.19326303899288177,
      "learning_rate": 7.532845937375664e-05,
      "loss": 0.6674,
      "step": 4183
    },
    {
      "epoch": 0.8601089526158906,
      "grad_norm": 0.1872076541185379,
      "learning_rate": 7.532107978689988e-05,
      "loss": 0.6777,
      "step": 4184
    },
    {
      "epoch": 0.8603145235892692,
      "grad_norm": 0.18660016357898712,
      "learning_rate": 7.531369870626528e-05,
      "loss": 0.6712,
      "step": 4185
    },
    {
      "epoch": 0.8605200945626478,
      "grad_norm": 0.19512499868869781,
      "learning_rate": 7.53063161322164e-05,
      "loss": 0.6848,
      "step": 4186
    },
    {
      "epoch": 0.8607256655360264,
      "grad_norm": 0.19282682240009308,
      "learning_rate": 7.5298932065117e-05,
      "loss": 0.6611,
      "step": 4187
    },
    {
      "epoch": 0.8609312365094048,
      "grad_norm": 0.2191070318222046,
      "learning_rate": 7.529154650533081e-05,
      "loss": 0.6792,
      "step": 4188
    },
    {
      "epoch": 0.8611368074827834,
      "grad_norm": 0.1931408941745758,
      "learning_rate": 7.528415945322172e-05,
      "loss": 0.6362,
      "step": 4189
    },
    {
      "epoch": 0.861342378456162,
      "grad_norm": 0.18459977209568024,
      "learning_rate": 7.527677090915364e-05,
      "loss": 0.5784,
      "step": 4190
    },
    {
      "epoch": 0.8615479494295405,
      "grad_norm": 0.19997800886631012,
      "learning_rate": 7.526938087349057e-05,
      "loss": 0.677,
      "step": 4191
    },
    {
      "epoch": 0.8617535204029191,
      "grad_norm": 0.19136178493499756,
      "learning_rate": 7.52619893465966e-05,
      "loss": 0.6854,
      "step": 4192
    },
    {
      "epoch": 0.8619590913762977,
      "grad_norm": 0.18970435857772827,
      "learning_rate": 7.525459632883582e-05,
      "loss": 0.674,
      "step": 4193
    },
    {
      "epoch": 0.8621646623496763,
      "grad_norm": 0.21736173331737518,
      "learning_rate": 7.524720182057252e-05,
      "loss": 0.6546,
      "step": 4194
    },
    {
      "epoch": 0.8623702333230547,
      "grad_norm": 0.1582231968641281,
      "learning_rate": 7.523980582217096e-05,
      "loss": 0.5956,
      "step": 4195
    },
    {
      "epoch": 0.8625758042964333,
      "grad_norm": 0.19707003235816956,
      "learning_rate": 7.52324083339955e-05,
      "loss": 0.6682,
      "step": 4196
    },
    {
      "epoch": 0.8627813752698119,
      "grad_norm": 0.19862191379070282,
      "learning_rate": 7.522500935641058e-05,
      "loss": 0.6435,
      "step": 4197
    },
    {
      "epoch": 0.8629869462431905,
      "grad_norm": 0.1881260871887207,
      "learning_rate": 7.521760888978073e-05,
      "loss": 0.6581,
      "step": 4198
    },
    {
      "epoch": 0.863192517216569,
      "grad_norm": 0.1898849755525589,
      "learning_rate": 7.521020693447052e-05,
      "loss": 0.6645,
      "step": 4199
    },
    {
      "epoch": 0.8633980881899476,
      "grad_norm": 0.1787111759185791,
      "learning_rate": 7.520280349084462e-05,
      "loss": 0.6113,
      "step": 4200
    },
    {
      "epoch": 0.8636036591633262,
      "grad_norm": 0.19326132535934448,
      "learning_rate": 7.519539855926777e-05,
      "loss": 0.6772,
      "step": 4201
    },
    {
      "epoch": 0.8638092301367047,
      "grad_norm": 0.18564841151237488,
      "learning_rate": 7.518799214010474e-05,
      "loss": 0.6657,
      "step": 4202
    },
    {
      "epoch": 0.8640148011100832,
      "grad_norm": 0.2385823279619217,
      "learning_rate": 7.518058423372045e-05,
      "loss": 0.5945,
      "step": 4203
    },
    {
      "epoch": 0.8642203720834618,
      "grad_norm": 0.199651300907135,
      "learning_rate": 7.517317484047984e-05,
      "loss": 0.664,
      "step": 4204
    },
    {
      "epoch": 0.8644259430568404,
      "grad_norm": 0.194375678896904,
      "learning_rate": 7.516576396074794e-05,
      "loss": 0.6745,
      "step": 4205
    },
    {
      "epoch": 0.864631514030219,
      "grad_norm": 0.18686725199222565,
      "learning_rate": 7.515835159488984e-05,
      "loss": 0.6897,
      "step": 4206
    },
    {
      "epoch": 0.8648370850035975,
      "grad_norm": 0.18740524351596832,
      "learning_rate": 7.515093774327071e-05,
      "loss": 0.6931,
      "step": 4207
    },
    {
      "epoch": 0.865042655976976,
      "grad_norm": 0.1922253668308258,
      "learning_rate": 7.514352240625581e-05,
      "loss": 0.6467,
      "step": 4208
    },
    {
      "epoch": 0.8652482269503546,
      "grad_norm": 0.19109128415584564,
      "learning_rate": 7.513610558421045e-05,
      "loss": 0.6697,
      "step": 4209
    },
    {
      "epoch": 0.8654537979237331,
      "grad_norm": 0.18134894967079163,
      "learning_rate": 7.512868727750002e-05,
      "loss": 0.6566,
      "step": 4210
    },
    {
      "epoch": 0.8656593688971117,
      "grad_norm": 0.1900303065776825,
      "learning_rate": 7.512126748648999e-05,
      "loss": 0.6987,
      "step": 4211
    },
    {
      "epoch": 0.8658649398704903,
      "grad_norm": 0.19076496362686157,
      "learning_rate": 7.51138462115459e-05,
      "loss": 0.6514,
      "step": 4212
    },
    {
      "epoch": 0.8660705108438689,
      "grad_norm": 0.18519791960716248,
      "learning_rate": 7.510642345303338e-05,
      "loss": 0.6964,
      "step": 4213
    },
    {
      "epoch": 0.8662760818172474,
      "grad_norm": 0.13831019401550293,
      "learning_rate": 7.509899921131805e-05,
      "loss": 0.5829,
      "step": 4214
    },
    {
      "epoch": 0.866481652790626,
      "grad_norm": 0.20118573307991028,
      "learning_rate": 7.509157348676574e-05,
      "loss": 0.6699,
      "step": 4215
    },
    {
      "epoch": 0.8666872237640045,
      "grad_norm": 0.18774531781673431,
      "learning_rate": 7.508414627974225e-05,
      "loss": 0.6612,
      "step": 4216
    },
    {
      "epoch": 0.8668927947373831,
      "grad_norm": 0.17688573896884918,
      "learning_rate": 7.507671759061346e-05,
      "loss": 0.6519,
      "step": 4217
    },
    {
      "epoch": 0.8670983657107616,
      "grad_norm": 0.18357358872890472,
      "learning_rate": 7.50692874197454e-05,
      "loss": 0.6792,
      "step": 4218
    },
    {
      "epoch": 0.8673039366841402,
      "grad_norm": 0.19416451454162598,
      "learning_rate": 7.506185576750409e-05,
      "loss": 0.6708,
      "step": 4219
    },
    {
      "epoch": 0.8675095076575188,
      "grad_norm": 0.18293076753616333,
      "learning_rate": 7.505442263425565e-05,
      "loss": 0.6843,
      "step": 4220
    },
    {
      "epoch": 0.8677150786308974,
      "grad_norm": 0.18310247361660004,
      "learning_rate": 7.504698802036629e-05,
      "loss": 0.6409,
      "step": 4221
    },
    {
      "epoch": 0.8679206496042758,
      "grad_norm": 0.18264919519424438,
      "learning_rate": 7.503955192620225e-05,
      "loss": 0.6709,
      "step": 4222
    },
    {
      "epoch": 0.8681262205776544,
      "grad_norm": 0.19960664212703705,
      "learning_rate": 7.50321143521299e-05,
      "loss": 0.6537,
      "step": 4223
    },
    {
      "epoch": 0.868331791551033,
      "grad_norm": 0.19281069934368134,
      "learning_rate": 7.502467529851565e-05,
      "loss": 0.6657,
      "step": 4224
    },
    {
      "epoch": 0.8685373625244116,
      "grad_norm": 0.19561025500297546,
      "learning_rate": 7.501723476572599e-05,
      "loss": 0.6867,
      "step": 4225
    },
    {
      "epoch": 0.8687429334977901,
      "grad_norm": 0.17898957431316376,
      "learning_rate": 7.500979275412747e-05,
      "loss": 0.6587,
      "step": 4226
    },
    {
      "epoch": 0.8689485044711687,
      "grad_norm": 0.19035303592681885,
      "learning_rate": 7.500234926408671e-05,
      "loss": 0.6719,
      "step": 4227
    },
    {
      "epoch": 0.8691540754445473,
      "grad_norm": 0.1813403069972992,
      "learning_rate": 7.499490429597044e-05,
      "loss": 0.6734,
      "step": 4228
    },
    {
      "epoch": 0.8693596464179257,
      "grad_norm": 0.18334521353244781,
      "learning_rate": 7.498745785014543e-05,
      "loss": 0.6559,
      "step": 4229
    },
    {
      "epoch": 0.8695652173913043,
      "grad_norm": 0.17807736992835999,
      "learning_rate": 7.498000992697854e-05,
      "loss": 0.6318,
      "step": 4230
    },
    {
      "epoch": 0.8697707883646829,
      "grad_norm": 0.18650507926940918,
      "learning_rate": 7.497256052683668e-05,
      "loss": 0.666,
      "step": 4231
    },
    {
      "epoch": 0.8699763593380615,
      "grad_norm": 0.18326011300086975,
      "learning_rate": 7.496510965008686e-05,
      "loss": 0.6587,
      "step": 4232
    },
    {
      "epoch": 0.87018193031144,
      "grad_norm": 0.1905418336391449,
      "learning_rate": 7.495765729709615e-05,
      "loss": 0.6544,
      "step": 4233
    },
    {
      "epoch": 0.8703875012848186,
      "grad_norm": 0.187713161110878,
      "learning_rate": 7.495020346823168e-05,
      "loss": 0.6711,
      "step": 4234
    },
    {
      "epoch": 0.8705930722581972,
      "grad_norm": 0.1464671492576599,
      "learning_rate": 7.494274816386066e-05,
      "loss": 0.5836,
      "step": 4235
    },
    {
      "epoch": 0.8707986432315757,
      "grad_norm": 0.5995880961418152,
      "learning_rate": 7.49352913843504e-05,
      "loss": 0.6806,
      "step": 4236
    },
    {
      "epoch": 0.8710042142049542,
      "grad_norm": 0.18340499699115753,
      "learning_rate": 7.492783313006827e-05,
      "loss": 0.676,
      "step": 4237
    },
    {
      "epoch": 0.8712097851783328,
      "grad_norm": 0.191572368144989,
      "learning_rate": 7.492037340138165e-05,
      "loss": 0.6651,
      "step": 4238
    },
    {
      "epoch": 0.8714153561517114,
      "grad_norm": 0.13379883766174316,
      "learning_rate": 7.49129121986581e-05,
      "loss": 0.5498,
      "step": 4239
    },
    {
      "epoch": 0.87162092712509,
      "grad_norm": 0.19760626554489136,
      "learning_rate": 7.490544952226517e-05,
      "loss": 0.6624,
      "step": 4240
    },
    {
      "epoch": 0.8718264980984685,
      "grad_norm": 0.19867949187755585,
      "learning_rate": 7.489798537257052e-05,
      "loss": 0.6542,
      "step": 4241
    },
    {
      "epoch": 0.872032069071847,
      "grad_norm": 0.13943122327327728,
      "learning_rate": 7.489051974994188e-05,
      "loss": 0.5833,
      "step": 4242
    },
    {
      "epoch": 0.8722376400452256,
      "grad_norm": 0.20543548464775085,
      "learning_rate": 7.488305265474704e-05,
      "loss": 0.6621,
      "step": 4243
    },
    {
      "epoch": 0.8724432110186042,
      "grad_norm": 0.19805829226970673,
      "learning_rate": 7.487558408735387e-05,
      "loss": 0.6489,
      "step": 4244
    },
    {
      "epoch": 0.8726487819919827,
      "grad_norm": 0.1895926296710968,
      "learning_rate": 7.486811404813032e-05,
      "loss": 0.688,
      "step": 4245
    },
    {
      "epoch": 0.8728543529653613,
      "grad_norm": 0.13180671632289886,
      "learning_rate": 7.486064253744436e-05,
      "loss": 0.587,
      "step": 4246
    },
    {
      "epoch": 0.8730599239387399,
      "grad_norm": 0.20886261761188507,
      "learning_rate": 7.485316955566414e-05,
      "loss": 0.6347,
      "step": 4247
    },
    {
      "epoch": 0.8732654949121184,
      "grad_norm": 0.20359115302562714,
      "learning_rate": 7.484569510315778e-05,
      "loss": 0.6872,
      "step": 4248
    },
    {
      "epoch": 0.873471065885497,
      "grad_norm": 0.184517964720726,
      "learning_rate": 7.483821918029351e-05,
      "loss": 0.6556,
      "step": 4249
    },
    {
      "epoch": 0.8736766368588755,
      "grad_norm": 0.1971379816532135,
      "learning_rate": 7.483074178743966e-05,
      "loss": 0.6817,
      "step": 4250
    },
    {
      "epoch": 0.8738822078322541,
      "grad_norm": 0.19668948650360107,
      "learning_rate": 7.482326292496458e-05,
      "loss": 0.6625,
      "step": 4251
    },
    {
      "epoch": 0.8740877788056326,
      "grad_norm": 0.1894627958536148,
      "learning_rate": 7.481578259323674e-05,
      "loss": 0.6445,
      "step": 4252
    },
    {
      "epoch": 0.8742933497790112,
      "grad_norm": 0.1403988003730774,
      "learning_rate": 7.480830079262465e-05,
      "loss": 0.5633,
      "step": 4253
    },
    {
      "epoch": 0.8744989207523898,
      "grad_norm": 0.12436271458864212,
      "learning_rate": 7.48008175234969e-05,
      "loss": 0.5708,
      "step": 4254
    },
    {
      "epoch": 0.8747044917257684,
      "grad_norm": 0.7834026217460632,
      "learning_rate": 7.479333278622216e-05,
      "loss": 0.6563,
      "step": 4255
    },
    {
      "epoch": 0.8749100626991468,
      "grad_norm": 0.1350373923778534,
      "learning_rate": 7.478584658116915e-05,
      "loss": 0.5961,
      "step": 4256
    },
    {
      "epoch": 0.8751156336725254,
      "grad_norm": 0.1937408745288849,
      "learning_rate": 7.477835890870672e-05,
      "loss": 0.6703,
      "step": 4257
    },
    {
      "epoch": 0.875321204645904,
      "grad_norm": 0.13636933267116547,
      "learning_rate": 7.477086976920373e-05,
      "loss": 0.5909,
      "step": 4258
    },
    {
      "epoch": 0.8755267756192826,
      "grad_norm": 0.21809430420398712,
      "learning_rate": 7.476337916302911e-05,
      "loss": 0.6848,
      "step": 4259
    },
    {
      "epoch": 0.8757323465926611,
      "grad_norm": 0.16706953942775726,
      "learning_rate": 7.475588709055195e-05,
      "loss": 0.5596,
      "step": 4260
    },
    {
      "epoch": 0.8759379175660397,
      "grad_norm": 0.19577208161354065,
      "learning_rate": 7.47483935521413e-05,
      "loss": 0.6608,
      "step": 4261
    },
    {
      "epoch": 0.8761434885394183,
      "grad_norm": 0.194346085190773,
      "learning_rate": 7.474089854816633e-05,
      "loss": 0.6508,
      "step": 4262
    },
    {
      "epoch": 0.8763490595127968,
      "grad_norm": 0.20509304106235504,
      "learning_rate": 7.47334020789963e-05,
      "loss": 0.6794,
      "step": 4263
    },
    {
      "epoch": 0.8765546304861753,
      "grad_norm": 0.20143075287342072,
      "learning_rate": 7.472590414500053e-05,
      "loss": 0.691,
      "step": 4264
    },
    {
      "epoch": 0.8767602014595539,
      "grad_norm": 0.2505229711532593,
      "learning_rate": 7.471840474654838e-05,
      "loss": 0.652,
      "step": 4265
    },
    {
      "epoch": 0.8769657724329325,
      "grad_norm": 0.18424780666828156,
      "learning_rate": 7.471090388400936e-05,
      "loss": 0.6396,
      "step": 4266
    },
    {
      "epoch": 0.877171343406311,
      "grad_norm": 0.18971550464630127,
      "learning_rate": 7.470340155775296e-05,
      "loss": 0.6445,
      "step": 4267
    },
    {
      "epoch": 0.8773769143796896,
      "grad_norm": 0.19411668181419373,
      "learning_rate": 7.46958977681488e-05,
      "loss": 0.6377,
      "step": 4268
    },
    {
      "epoch": 0.8775824853530682,
      "grad_norm": 0.1822851151227951,
      "learning_rate": 7.468839251556656e-05,
      "loss": 0.6684,
      "step": 4269
    },
    {
      "epoch": 0.8777880563264467,
      "grad_norm": 0.17239375412464142,
      "learning_rate": 7.468088580037598e-05,
      "loss": 0.5929,
      "step": 4270
    },
    {
      "epoch": 0.8779936272998252,
      "grad_norm": 0.19313600659370422,
      "learning_rate": 7.467337762294689e-05,
      "loss": 0.659,
      "step": 4271
    },
    {
      "epoch": 0.8781991982732038,
      "grad_norm": 0.18807615339756012,
      "learning_rate": 7.466586798364918e-05,
      "loss": 0.6608,
      "step": 4272
    },
    {
      "epoch": 0.8784047692465824,
      "grad_norm": 0.1784089207649231,
      "learning_rate": 7.46583568828528e-05,
      "loss": 0.6781,
      "step": 4273
    },
    {
      "epoch": 0.878610340219961,
      "grad_norm": 0.21919219195842743,
      "learning_rate": 7.46508443209278e-05,
      "loss": 0.6469,
      "step": 4274
    },
    {
      "epoch": 0.8788159111933395,
      "grad_norm": 0.20207509398460388,
      "learning_rate": 7.464333029824429e-05,
      "loss": 0.6928,
      "step": 4275
    },
    {
      "epoch": 0.879021482166718,
      "grad_norm": 0.18525582551956177,
      "learning_rate": 7.463581481517245e-05,
      "loss": 0.6391,
      "step": 4276
    },
    {
      "epoch": 0.8792270531400966,
      "grad_norm": 0.1859021782875061,
      "learning_rate": 7.462829787208254e-05,
      "loss": 0.6515,
      "step": 4277
    },
    {
      "epoch": 0.8794326241134752,
      "grad_norm": 0.1962486058473587,
      "learning_rate": 7.462077946934488e-05,
      "loss": 0.6575,
      "step": 4278
    },
    {
      "epoch": 0.8796381950868537,
      "grad_norm": 0.1927611082792282,
      "learning_rate": 7.461325960732984e-05,
      "loss": 0.6696,
      "step": 4279
    },
    {
      "epoch": 0.8798437660602323,
      "grad_norm": 0.1841474175453186,
      "learning_rate": 7.460573828640791e-05,
      "loss": 0.6796,
      "step": 4280
    },
    {
      "epoch": 0.8800493370336109,
      "grad_norm": 0.17558540403842926,
      "learning_rate": 7.459821550694965e-05,
      "loss": 0.6047,
      "step": 4281
    },
    {
      "epoch": 0.8802549080069895,
      "grad_norm": 0.19254080951213837,
      "learning_rate": 7.459069126932565e-05,
      "loss": 0.6795,
      "step": 4282
    },
    {
      "epoch": 0.8804604789803679,
      "grad_norm": 0.21128569543361664,
      "learning_rate": 7.45831655739066e-05,
      "loss": 0.6753,
      "step": 4283
    },
    {
      "epoch": 0.8806660499537465,
      "grad_norm": 0.18865573406219482,
      "learning_rate": 7.457563842106324e-05,
      "loss": 0.6917,
      "step": 4284
    },
    {
      "epoch": 0.8808716209271251,
      "grad_norm": 0.14653199911117554,
      "learning_rate": 7.456810981116643e-05,
      "loss": 0.5964,
      "step": 4285
    },
    {
      "epoch": 0.8810771919005036,
      "grad_norm": 0.19860735535621643,
      "learning_rate": 7.456057974458704e-05,
      "loss": 0.6534,
      "step": 4286
    },
    {
      "epoch": 0.8812827628738822,
      "grad_norm": 0.1889762133359909,
      "learning_rate": 7.455304822169606e-05,
      "loss": 0.6638,
      "step": 4287
    },
    {
      "epoch": 0.8814883338472608,
      "grad_norm": 0.19104063510894775,
      "learning_rate": 7.454551524286451e-05,
      "loss": 0.6779,
      "step": 4288
    },
    {
      "epoch": 0.8816939048206394,
      "grad_norm": 0.14345437288284302,
      "learning_rate": 7.453798080846353e-05,
      "loss": 0.5678,
      "step": 4289
    },
    {
      "epoch": 0.8818994757940178,
      "grad_norm": 0.1917407065629959,
      "learning_rate": 7.453044491886429e-05,
      "loss": 0.6866,
      "step": 4290
    },
    {
      "epoch": 0.8821050467673964,
      "grad_norm": 0.18986758589744568,
      "learning_rate": 7.452290757443806e-05,
      "loss": 0.6745,
      "step": 4291
    },
    {
      "epoch": 0.882310617740775,
      "grad_norm": 0.18070244789123535,
      "learning_rate": 7.451536877555617e-05,
      "loss": 0.6416,
      "step": 4292
    },
    {
      "epoch": 0.8825161887141536,
      "grad_norm": 0.1812668889760971,
      "learning_rate": 7.450782852259e-05,
      "loss": 0.6547,
      "step": 4293
    },
    {
      "epoch": 0.8827217596875321,
      "grad_norm": 0.19002775847911835,
      "learning_rate": 7.450028681591104e-05,
      "loss": 0.6392,
      "step": 4294
    },
    {
      "epoch": 0.8829273306609107,
      "grad_norm": 0.18384869396686554,
      "learning_rate": 7.449274365589083e-05,
      "loss": 0.6789,
      "step": 4295
    },
    {
      "epoch": 0.8831329016342893,
      "grad_norm": 0.13192251324653625,
      "learning_rate": 7.4485199042901e-05,
      "loss": 0.5635,
      "step": 4296
    },
    {
      "epoch": 0.8833384726076678,
      "grad_norm": 0.20571434497833252,
      "learning_rate": 7.447765297731322e-05,
      "loss": 0.7032,
      "step": 4297
    },
    {
      "epoch": 0.8835440435810463,
      "grad_norm": 0.1892521232366562,
      "learning_rate": 7.447010545949926e-05,
      "loss": 0.6616,
      "step": 4298
    },
    {
      "epoch": 0.8837496145544249,
      "grad_norm": 0.1817133128643036,
      "learning_rate": 7.446255648983095e-05,
      "loss": 0.68,
      "step": 4299
    },
    {
      "epoch": 0.8839551855278035,
      "grad_norm": 0.18332892656326294,
      "learning_rate": 7.445500606868016e-05,
      "loss": 0.6436,
      "step": 4300
    },
    {
      "epoch": 0.8841607565011821,
      "grad_norm": 0.18680675327777863,
      "learning_rate": 7.444745419641893e-05,
      "loss": 0.6678,
      "step": 4301
    },
    {
      "epoch": 0.8843663274745606,
      "grad_norm": 0.18525037169456482,
      "learning_rate": 7.443990087341926e-05,
      "loss": 0.6411,
      "step": 4302
    },
    {
      "epoch": 0.8845718984479392,
      "grad_norm": 0.18258033692836761,
      "learning_rate": 7.443234610005327e-05,
      "loss": 0.6625,
      "step": 4303
    },
    {
      "epoch": 0.8847774694213177,
      "grad_norm": 0.1923125982284546,
      "learning_rate": 7.442478987669315e-05,
      "loss": 0.646,
      "step": 4304
    },
    {
      "epoch": 0.8849830403946962,
      "grad_norm": 0.18216663599014282,
      "learning_rate": 7.441723220371118e-05,
      "loss": 0.6628,
      "step": 4305
    },
    {
      "epoch": 0.8851886113680748,
      "grad_norm": 0.15292415022850037,
      "learning_rate": 7.440967308147966e-05,
      "loss": 0.5989,
      "step": 4306
    },
    {
      "epoch": 0.8853941823414534,
      "grad_norm": 0.187953382730484,
      "learning_rate": 7.440211251037101e-05,
      "loss": 0.6624,
      "step": 4307
    },
    {
      "epoch": 0.885599753314832,
      "grad_norm": 0.1256251335144043,
      "learning_rate": 7.439455049075771e-05,
      "loss": 0.5845,
      "step": 4308
    },
    {
      "epoch": 0.8858053242882105,
      "grad_norm": 0.19565753638744354,
      "learning_rate": 7.438698702301229e-05,
      "loss": 0.674,
      "step": 4309
    },
    {
      "epoch": 0.886010895261589,
      "grad_norm": 0.1811288446187973,
      "learning_rate": 7.437942210750737e-05,
      "loss": 0.6772,
      "step": 4310
    },
    {
      "epoch": 0.8862164662349676,
      "grad_norm": 0.18292637169361115,
      "learning_rate": 7.437185574461564e-05,
      "loss": 0.6611,
      "step": 4311
    },
    {
      "epoch": 0.8864220372083462,
      "grad_norm": 0.18883992731571198,
      "learning_rate": 7.436428793470987e-05,
      "loss": 0.6885,
      "step": 4312
    },
    {
      "epoch": 0.8866276081817247,
      "grad_norm": 0.17563700675964355,
      "learning_rate": 7.435671867816288e-05,
      "loss": 0.6364,
      "step": 4313
    },
    {
      "epoch": 0.8868331791551033,
      "grad_norm": 0.1886730045080185,
      "learning_rate": 7.434914797534758e-05,
      "loss": 0.6734,
      "step": 4314
    },
    {
      "epoch": 0.8870387501284819,
      "grad_norm": 0.18746259808540344,
      "learning_rate": 7.434157582663691e-05,
      "loss": 0.6793,
      "step": 4315
    },
    {
      "epoch": 0.8872443211018605,
      "grad_norm": 0.16091619431972504,
      "learning_rate": 7.433400223240397e-05,
      "loss": 0.6101,
      "step": 4316
    },
    {
      "epoch": 0.8874498920752389,
      "grad_norm": 0.1879081130027771,
      "learning_rate": 7.432642719302184e-05,
      "loss": 0.6706,
      "step": 4317
    },
    {
      "epoch": 0.8876554630486175,
      "grad_norm": 0.1933298110961914,
      "learning_rate": 7.431885070886372e-05,
      "loss": 0.6647,
      "step": 4318
    },
    {
      "epoch": 0.8878610340219961,
      "grad_norm": 0.12698352336883545,
      "learning_rate": 7.431127278030285e-05,
      "loss": 0.5725,
      "step": 4319
    },
    {
      "epoch": 0.8880666049953746,
      "grad_norm": 0.18227995932102203,
      "learning_rate": 7.430369340771258e-05,
      "loss": 0.6751,
      "step": 4320
    },
    {
      "epoch": 0.8882721759687532,
      "grad_norm": 0.12696510553359985,
      "learning_rate": 7.429611259146628e-05,
      "loss": 0.5934,
      "step": 4321
    },
    {
      "epoch": 0.8884777469421318,
      "grad_norm": 0.12385066598653793,
      "learning_rate": 7.428853033193745e-05,
      "loss": 0.5753,
      "step": 4322
    },
    {
      "epoch": 0.8886833179155104,
      "grad_norm": 0.189598947763443,
      "learning_rate": 7.428094662949964e-05,
      "loss": 0.6631,
      "step": 4323
    },
    {
      "epoch": 0.8888888888888888,
      "grad_norm": 0.19134309887886047,
      "learning_rate": 7.427336148452645e-05,
      "loss": 0.6627,
      "step": 4324
    },
    {
      "epoch": 0.8890944598622674,
      "grad_norm": 0.1795106679201126,
      "learning_rate": 7.426577489739155e-05,
      "loss": 0.6591,
      "step": 4325
    },
    {
      "epoch": 0.889300030835646,
      "grad_norm": 0.18666116893291473,
      "learning_rate": 7.425818686846872e-05,
      "loss": 0.6704,
      "step": 4326
    },
    {
      "epoch": 0.8895056018090246,
      "grad_norm": 0.18348322808742523,
      "learning_rate": 7.425059739813177e-05,
      "loss": 0.6872,
      "step": 4327
    },
    {
      "epoch": 0.8897111727824031,
      "grad_norm": 0.18486203253269196,
      "learning_rate": 7.424300648675459e-05,
      "loss": 0.683,
      "step": 4328
    },
    {
      "epoch": 0.8899167437557817,
      "grad_norm": 0.19054512679576874,
      "learning_rate": 7.423541413471117e-05,
      "loss": 0.6541,
      "step": 4329
    },
    {
      "epoch": 0.8901223147291603,
      "grad_norm": 0.18132087588310242,
      "learning_rate": 7.422782034237554e-05,
      "loss": 0.6879,
      "step": 4330
    },
    {
      "epoch": 0.8903278857025388,
      "grad_norm": 0.17876796424388885,
      "learning_rate": 7.422022511012182e-05,
      "loss": 0.6338,
      "step": 4331
    },
    {
      "epoch": 0.8905334566759173,
      "grad_norm": 0.18260298669338226,
      "learning_rate": 7.421262843832417e-05,
      "loss": 0.6436,
      "step": 4332
    },
    {
      "epoch": 0.8907390276492959,
      "grad_norm": 0.19324032962322235,
      "learning_rate": 7.420503032735688e-05,
      "loss": 0.6672,
      "step": 4333
    },
    {
      "epoch": 0.8909445986226745,
      "grad_norm": 0.1886059194803238,
      "learning_rate": 7.419743077759423e-05,
      "loss": 0.6803,
      "step": 4334
    },
    {
      "epoch": 0.8911501695960531,
      "grad_norm": 0.18304765224456787,
      "learning_rate": 7.418982978941065e-05,
      "loss": 0.682,
      "step": 4335
    },
    {
      "epoch": 0.8913557405694316,
      "grad_norm": 0.17993968725204468,
      "learning_rate": 7.418222736318057e-05,
      "loss": 0.5898,
      "step": 4336
    },
    {
      "epoch": 0.8915613115428102,
      "grad_norm": 0.21449178457260132,
      "learning_rate": 7.417462349927855e-05,
      "loss": 0.6657,
      "step": 4337
    },
    {
      "epoch": 0.8917668825161887,
      "grad_norm": 0.1957646608352661,
      "learning_rate": 7.41670181980792e-05,
      "loss": 0.6752,
      "step": 4338
    },
    {
      "epoch": 0.8919724534895672,
      "grad_norm": 0.1868593990802765,
      "learning_rate": 7.415941145995719e-05,
      "loss": 0.7023,
      "step": 4339
    },
    {
      "epoch": 0.8921780244629458,
      "grad_norm": 0.17802853882312775,
      "learning_rate": 7.415180328528726e-05,
      "loss": 0.6407,
      "step": 4340
    },
    {
      "epoch": 0.8923835954363244,
      "grad_norm": 0.1869519203901291,
      "learning_rate": 7.414419367444425e-05,
      "loss": 0.6797,
      "step": 4341
    },
    {
      "epoch": 0.892589166409703,
      "grad_norm": 0.1857430785894394,
      "learning_rate": 7.413658262780301e-05,
      "loss": 0.6507,
      "step": 4342
    },
    {
      "epoch": 0.8927947373830815,
      "grad_norm": 0.18577779829502106,
      "learning_rate": 7.412897014573856e-05,
      "loss": 0.6426,
      "step": 4343
    },
    {
      "epoch": 0.89300030835646,
      "grad_norm": 0.18945308029651642,
      "learning_rate": 7.412135622862588e-05,
      "loss": 0.6654,
      "step": 4344
    },
    {
      "epoch": 0.8932058793298386,
      "grad_norm": 0.19126087427139282,
      "learning_rate": 7.41137408768401e-05,
      "loss": 0.6727,
      "step": 4345
    },
    {
      "epoch": 0.8934114503032172,
      "grad_norm": 0.18092653155326843,
      "learning_rate": 7.410612409075639e-05,
      "loss": 0.6423,
      "step": 4346
    },
    {
      "epoch": 0.8936170212765957,
      "grad_norm": 0.18795473873615265,
      "learning_rate": 7.409850587074997e-05,
      "loss": 0.671,
      "step": 4347
    },
    {
      "epoch": 0.8938225922499743,
      "grad_norm": 0.18189306557178497,
      "learning_rate": 7.409088621719618e-05,
      "loss": 0.6605,
      "step": 4348
    },
    {
      "epoch": 0.8940281632233529,
      "grad_norm": 0.532122015953064,
      "learning_rate": 7.40832651304704e-05,
      "loss": 0.7076,
      "step": 4349
    },
    {
      "epoch": 0.8942337341967315,
      "grad_norm": 0.18601365387439728,
      "learning_rate": 7.407564261094808e-05,
      "loss": 0.6822,
      "step": 4350
    },
    {
      "epoch": 0.8944393051701099,
      "grad_norm": 0.17646148800849915,
      "learning_rate": 7.406801865900474e-05,
      "loss": 0.5773,
      "step": 4351
    },
    {
      "epoch": 0.8946448761434885,
      "grad_norm": 0.19108296930789948,
      "learning_rate": 7.406039327501599e-05,
      "loss": 0.6699,
      "step": 4352
    },
    {
      "epoch": 0.8948504471168671,
      "grad_norm": 0.13074934482574463,
      "learning_rate": 7.40527664593575e-05,
      "loss": 0.5759,
      "step": 4353
    },
    {
      "epoch": 0.8950560180902457,
      "grad_norm": 0.2011304795742035,
      "learning_rate": 7.4045138212405e-05,
      "loss": 0.6647,
      "step": 4354
    },
    {
      "epoch": 0.8952615890636242,
      "grad_norm": 0.20265452563762665,
      "learning_rate": 7.403750853453428e-05,
      "loss": 0.6872,
      "step": 4355
    },
    {
      "epoch": 0.8954671600370028,
      "grad_norm": 0.14710208773612976,
      "learning_rate": 7.402987742612124e-05,
      "loss": 0.5707,
      "step": 4356
    },
    {
      "epoch": 0.8956727310103814,
      "grad_norm": 0.1870591640472412,
      "learning_rate": 7.402224488754184e-05,
      "loss": 0.6863,
      "step": 4357
    },
    {
      "epoch": 0.8958783019837598,
      "grad_norm": 0.18606425821781158,
      "learning_rate": 7.401461091917206e-05,
      "loss": 0.6825,
      "step": 4358
    },
    {
      "epoch": 0.8960838729571384,
      "grad_norm": 0.18178561329841614,
      "learning_rate": 7.400697552138803e-05,
      "loss": 0.6685,
      "step": 4359
    },
    {
      "epoch": 0.896289443930517,
      "grad_norm": 0.1832335740327835,
      "learning_rate": 7.399933869456589e-05,
      "loss": 0.6756,
      "step": 4360
    },
    {
      "epoch": 0.8964950149038956,
      "grad_norm": 0.18786631524562836,
      "learning_rate": 7.399170043908187e-05,
      "loss": 0.6464,
      "step": 4361
    },
    {
      "epoch": 0.8967005858772741,
      "grad_norm": 0.18036015331745148,
      "learning_rate": 7.398406075531228e-05,
      "loss": 0.6493,
      "step": 4362
    },
    {
      "epoch": 0.8969061568506527,
      "grad_norm": 0.19510389864444733,
      "learning_rate": 7.39764196436335e-05,
      "loss": 0.6499,
      "step": 4363
    },
    {
      "epoch": 0.8971117278240313,
      "grad_norm": 0.18855442106723785,
      "learning_rate": 7.396877710442194e-05,
      "loss": 0.6618,
      "step": 4364
    },
    {
      "epoch": 0.8973172987974098,
      "grad_norm": 0.1755952090024948,
      "learning_rate": 7.396113313805416e-05,
      "loss": 0.5859,
      "step": 4365
    },
    {
      "epoch": 0.8975228697707883,
      "grad_norm": 0.19632591307163239,
      "learning_rate": 7.395348774490668e-05,
      "loss": 0.6806,
      "step": 4366
    },
    {
      "epoch": 0.8977284407441669,
      "grad_norm": 0.1848839372396469,
      "learning_rate": 7.394584092535622e-05,
      "loss": 0.6589,
      "step": 4367
    },
    {
      "epoch": 0.8979340117175455,
      "grad_norm": 0.1884489208459854,
      "learning_rate": 7.393819267977945e-05,
      "loss": 0.6858,
      "step": 4368
    },
    {
      "epoch": 0.8981395826909241,
      "grad_norm": 0.1883459985256195,
      "learning_rate": 7.393054300855318e-05,
      "loss": 0.6714,
      "step": 4369
    },
    {
      "epoch": 0.8983451536643026,
      "grad_norm": 0.18213316798210144,
      "learning_rate": 7.392289191205428e-05,
      "loss": 0.6601,
      "step": 4370
    },
    {
      "epoch": 0.8985507246376812,
      "grad_norm": 0.18287204205989838,
      "learning_rate": 7.391523939065969e-05,
      "loss": 0.6714,
      "step": 4371
    },
    {
      "epoch": 0.8987562956110597,
      "grad_norm": 0.18707792460918427,
      "learning_rate": 7.390758544474639e-05,
      "loss": 0.6407,
      "step": 4372
    },
    {
      "epoch": 0.8989618665844383,
      "grad_norm": 0.18532080948352814,
      "learning_rate": 7.389993007469148e-05,
      "loss": 0.6813,
      "step": 4373
    },
    {
      "epoch": 0.8991674375578168,
      "grad_norm": 0.17980536818504333,
      "learning_rate": 7.38922732808721e-05,
      "loss": 0.6335,
      "step": 4374
    },
    {
      "epoch": 0.8993730085311954,
      "grad_norm": 0.18949337303638458,
      "learning_rate": 7.388461506366544e-05,
      "loss": 0.6959,
      "step": 4375
    },
    {
      "epoch": 0.899578579504574,
      "grad_norm": 0.18386761844158173,
      "learning_rate": 7.387695542344881e-05,
      "loss": 0.6337,
      "step": 4376
    },
    {
      "epoch": 0.8997841504779525,
      "grad_norm": 0.18090958893299103,
      "learning_rate": 7.386929436059956e-05,
      "loss": 0.6445,
      "step": 4377
    },
    {
      "epoch": 0.899989721451331,
      "grad_norm": 0.18790413439273834,
      "learning_rate": 7.386163187549511e-05,
      "loss": 0.6622,
      "step": 4378
    },
    {
      "epoch": 0.9001952924247096,
      "grad_norm": 0.18693870306015015,
      "learning_rate": 7.385396796851296e-05,
      "loss": 0.6711,
      "step": 4379
    },
    {
      "epoch": 0.9004008633980882,
      "grad_norm": 0.18476144969463348,
      "learning_rate": 7.384630264003067e-05,
      "loss": 0.6642,
      "step": 4380
    },
    {
      "epoch": 0.9006064343714667,
      "grad_norm": 0.18623842298984528,
      "learning_rate": 7.383863589042587e-05,
      "loss": 0.6242,
      "step": 4381
    },
    {
      "epoch": 0.9008120053448453,
      "grad_norm": 0.14655017852783203,
      "learning_rate": 7.383096772007628e-05,
      "loss": 0.5558,
      "step": 4382
    },
    {
      "epoch": 0.9010175763182239,
      "grad_norm": 0.18449489772319794,
      "learning_rate": 7.382329812935963e-05,
      "loss": 0.6603,
      "step": 4383
    },
    {
      "epoch": 0.9012231472916025,
      "grad_norm": 0.1364215761423111,
      "learning_rate": 7.381562711865385e-05,
      "loss": 0.5671,
      "step": 4384
    },
    {
      "epoch": 0.9014287182649809,
      "grad_norm": 0.19321440160274506,
      "learning_rate": 7.380795468833679e-05,
      "loss": 0.6826,
      "step": 4385
    },
    {
      "epoch": 0.9016342892383595,
      "grad_norm": 0.18807579576969147,
      "learning_rate": 7.380028083878644e-05,
      "loss": 0.6982,
      "step": 4386
    },
    {
      "epoch": 0.9018398602117381,
      "grad_norm": 0.18062882125377655,
      "learning_rate": 7.379260557038088e-05,
      "loss": 0.6676,
      "step": 4387
    },
    {
      "epoch": 0.9020454311851167,
      "grad_norm": 0.14082865417003632,
      "learning_rate": 7.37849288834982e-05,
      "loss": 0.6026,
      "step": 4388
    },
    {
      "epoch": 0.9022510021584952,
      "grad_norm": 0.1912989616394043,
      "learning_rate": 7.377725077851663e-05,
      "loss": 0.6711,
      "step": 4389
    },
    {
      "epoch": 0.9024565731318738,
      "grad_norm": 0.12428473681211472,
      "learning_rate": 7.376957125581441e-05,
      "loss": 0.5805,
      "step": 4390
    },
    {
      "epoch": 0.9026621441052524,
      "grad_norm": 0.1931021362543106,
      "learning_rate": 7.376189031576991e-05,
      "loss": 0.6652,
      "step": 4391
    },
    {
      "epoch": 0.902867715078631,
      "grad_norm": 0.1896105408668518,
      "learning_rate": 7.375420795876148e-05,
      "loss": 0.6592,
      "step": 4392
    },
    {
      "epoch": 0.9030732860520094,
      "grad_norm": 0.18101376295089722,
      "learning_rate": 7.374652418516761e-05,
      "loss": 0.6803,
      "step": 4393
    },
    {
      "epoch": 0.903278857025388,
      "grad_norm": 0.18925289809703827,
      "learning_rate": 7.373883899536688e-05,
      "loss": 0.6599,
      "step": 4394
    },
    {
      "epoch": 0.9034844279987666,
      "grad_norm": 0.18771770596504211,
      "learning_rate": 7.373115238973786e-05,
      "loss": 0.6866,
      "step": 4395
    },
    {
      "epoch": 0.9036899989721451,
      "grad_norm": 0.18801310658454895,
      "learning_rate": 7.372346436865927e-05,
      "loss": 0.6602,
      "step": 4396
    },
    {
      "epoch": 0.9038955699455237,
      "grad_norm": 0.1810484528541565,
      "learning_rate": 7.371577493250983e-05,
      "loss": 0.6377,
      "step": 4397
    },
    {
      "epoch": 0.9041011409189023,
      "grad_norm": 0.18624310195446014,
      "learning_rate": 7.370808408166838e-05,
      "loss": 0.6655,
      "step": 4398
    },
    {
      "epoch": 0.9043067118922808,
      "grad_norm": 0.1851394772529602,
      "learning_rate": 7.37003918165138e-05,
      "loss": 0.6622,
      "step": 4399
    },
    {
      "epoch": 0.9045122828656593,
      "grad_norm": 0.20104296505451202,
      "learning_rate": 7.369269813742507e-05,
      "loss": 0.6727,
      "step": 4400
    },
    {
      "epoch": 0.9047178538390379,
      "grad_norm": 0.15082360804080963,
      "learning_rate": 7.368500304478121e-05,
      "loss": 0.5995,
      "step": 4401
    },
    {
      "epoch": 0.9049234248124165,
      "grad_norm": 0.13055920600891113,
      "learning_rate": 7.367730653896132e-05,
      "loss": 0.5763,
      "step": 4402
    },
    {
      "epoch": 0.9051289957857951,
      "grad_norm": 0.1956562101840973,
      "learning_rate": 7.366960862034458e-05,
      "loss": 0.6743,
      "step": 4403
    },
    {
      "epoch": 0.9053345667591736,
      "grad_norm": 0.1876806765794754,
      "learning_rate": 7.366190928931021e-05,
      "loss": 0.6862,
      "step": 4404
    },
    {
      "epoch": 0.9055401377325522,
      "grad_norm": 0.1496850550174713,
      "learning_rate": 7.365420854623755e-05,
      "loss": 0.5858,
      "step": 4405
    },
    {
      "epoch": 0.9057457087059307,
      "grad_norm": 0.14271092414855957,
      "learning_rate": 7.364650639150596e-05,
      "loss": 0.6152,
      "step": 4406
    },
    {
      "epoch": 0.9059512796793093,
      "grad_norm": 0.20220176875591278,
      "learning_rate": 7.36388028254949e-05,
      "loss": 0.6771,
      "step": 4407
    },
    {
      "epoch": 0.9061568506526878,
      "grad_norm": 0.13460181653499603,
      "learning_rate": 7.363109784858388e-05,
      "loss": 0.5904,
      "step": 4408
    },
    {
      "epoch": 0.9063624216260664,
      "grad_norm": 0.13429884612560272,
      "learning_rate": 7.362339146115248e-05,
      "loss": 0.5729,
      "step": 4409
    },
    {
      "epoch": 0.906567992599445,
      "grad_norm": 0.18408654630184174,
      "learning_rate": 7.361568366358038e-05,
      "loss": 0.6534,
      "step": 4410
    },
    {
      "epoch": 0.9067735635728236,
      "grad_norm": 0.20259039103984833,
      "learning_rate": 7.360797445624729e-05,
      "loss": 0.6585,
      "step": 4411
    },
    {
      "epoch": 0.906979134546202,
      "grad_norm": 0.18721166253089905,
      "learning_rate": 7.360026383953301e-05,
      "loss": 0.6825,
      "step": 4412
    },
    {
      "epoch": 0.9071847055195806,
      "grad_norm": 0.18604475259780884,
      "learning_rate": 7.359255181381741e-05,
      "loss": 0.6372,
      "step": 4413
    },
    {
      "epoch": 0.9073902764929592,
      "grad_norm": 0.1993558555841446,
      "learning_rate": 7.358483837948043e-05,
      "loss": 0.653,
      "step": 4414
    },
    {
      "epoch": 0.9075958474663377,
      "grad_norm": 0.18966707587242126,
      "learning_rate": 7.357712353690205e-05,
      "loss": 0.6598,
      "step": 4415
    },
    {
      "epoch": 0.9078014184397163,
      "grad_norm": 0.18439289927482605,
      "learning_rate": 7.35694072864624e-05,
      "loss": 0.6923,
      "step": 4416
    },
    {
      "epoch": 0.9080069894130949,
      "grad_norm": 0.1838844269514084,
      "learning_rate": 7.356168962854155e-05,
      "loss": 0.6617,
      "step": 4417
    },
    {
      "epoch": 0.9082125603864735,
      "grad_norm": 0.18378853797912598,
      "learning_rate": 7.355397056351975e-05,
      "loss": 0.6939,
      "step": 4418
    },
    {
      "epoch": 0.9084181313598519,
      "grad_norm": 0.1807030588388443,
      "learning_rate": 7.354625009177729e-05,
      "loss": 0.6425,
      "step": 4419
    },
    {
      "epoch": 0.9086237023332305,
      "grad_norm": 0.18497875332832336,
      "learning_rate": 7.353852821369452e-05,
      "loss": 0.682,
      "step": 4420
    },
    {
      "epoch": 0.9088292733066091,
      "grad_norm": 0.18819686770439148,
      "learning_rate": 7.353080492965184e-05,
      "loss": 0.6772,
      "step": 4421
    },
    {
      "epoch": 0.9090348442799877,
      "grad_norm": 0.19138747453689575,
      "learning_rate": 7.352308024002977e-05,
      "loss": 0.5944,
      "step": 4422
    },
    {
      "epoch": 0.9092404152533662,
      "grad_norm": 0.1951584368944168,
      "learning_rate": 7.351535414520884e-05,
      "loss": 0.6523,
      "step": 4423
    },
    {
      "epoch": 0.9094459862267448,
      "grad_norm": 0.19077381491661072,
      "learning_rate": 7.350762664556969e-05,
      "loss": 0.6364,
      "step": 4424
    },
    {
      "epoch": 0.9096515572001234,
      "grad_norm": 0.18306457996368408,
      "learning_rate": 7.349989774149302e-05,
      "loss": 0.6616,
      "step": 4425
    },
    {
      "epoch": 0.909857128173502,
      "grad_norm": 0.18449269235134125,
      "learning_rate": 7.349216743335961e-05,
      "loss": 0.6431,
      "step": 4426
    },
    {
      "epoch": 0.9100626991468804,
      "grad_norm": 0.18515408039093018,
      "learning_rate": 7.348443572155027e-05,
      "loss": 0.634,
      "step": 4427
    },
    {
      "epoch": 0.910268270120259,
      "grad_norm": 0.16887053847312927,
      "learning_rate": 7.347670260644592e-05,
      "loss": 0.5846,
      "step": 4428
    },
    {
      "epoch": 0.9104738410936376,
      "grad_norm": 0.19056186079978943,
      "learning_rate": 7.346896808842753e-05,
      "loss": 0.6496,
      "step": 4429
    },
    {
      "epoch": 0.9106794120670162,
      "grad_norm": 0.19287076592445374,
      "learning_rate": 7.346123216787616e-05,
      "loss": 0.6689,
      "step": 4430
    },
    {
      "epoch": 0.9108849830403947,
      "grad_norm": 0.18666431307792664,
      "learning_rate": 7.34534948451729e-05,
      "loss": 0.6639,
      "step": 4431
    },
    {
      "epoch": 0.9110905540137733,
      "grad_norm": 0.1829727292060852,
      "learning_rate": 7.344575612069893e-05,
      "loss": 0.6354,
      "step": 4432
    },
    {
      "epoch": 0.9112961249871518,
      "grad_norm": 0.19133751094341278,
      "learning_rate": 7.343801599483554e-05,
      "loss": 0.6718,
      "step": 4433
    },
    {
      "epoch": 0.9115016959605303,
      "grad_norm": 0.19330398738384247,
      "learning_rate": 7.3430274467964e-05,
      "loss": 0.6725,
      "step": 4434
    },
    {
      "epoch": 0.9117072669339089,
      "grad_norm": 0.18251781165599823,
      "learning_rate": 7.342253154046571e-05,
      "loss": 0.6553,
      "step": 4435
    },
    {
      "epoch": 0.9119128379072875,
      "grad_norm": 0.1795288473367691,
      "learning_rate": 7.341478721272215e-05,
      "loss": 0.6338,
      "step": 4436
    },
    {
      "epoch": 0.9121184088806661,
      "grad_norm": 0.19127197563648224,
      "learning_rate": 7.340704148511483e-05,
      "loss": 0.6715,
      "step": 4437
    },
    {
      "epoch": 0.9123239798540446,
      "grad_norm": 0.1778208166360855,
      "learning_rate": 7.339929435802536e-05,
      "loss": 0.6374,
      "step": 4438
    },
    {
      "epoch": 0.9125295508274232,
      "grad_norm": 0.18795600533485413,
      "learning_rate": 7.339154583183538e-05,
      "loss": 0.6714,
      "step": 4439
    },
    {
      "epoch": 0.9127351218008017,
      "grad_norm": 0.1836930364370346,
      "learning_rate": 7.338379590692665e-05,
      "loss": 0.6638,
      "step": 4440
    },
    {
      "epoch": 0.9129406927741803,
      "grad_norm": 0.1537027806043625,
      "learning_rate": 7.337604458368095e-05,
      "loss": 0.5768,
      "step": 4441
    },
    {
      "epoch": 0.9131462637475588,
      "grad_norm": 0.1962456852197647,
      "learning_rate": 7.336829186248018e-05,
      "loss": 0.679,
      "step": 4442
    },
    {
      "epoch": 0.9133518347209374,
      "grad_norm": 0.185762420296669,
      "learning_rate": 7.336053774370626e-05,
      "loss": 0.6342,
      "step": 4443
    },
    {
      "epoch": 0.913557405694316,
      "grad_norm": 0.18311749398708344,
      "learning_rate": 7.33527822277412e-05,
      "loss": 0.6502,
      "step": 4444
    },
    {
      "epoch": 0.9137629766676946,
      "grad_norm": 0.17960374057292938,
      "learning_rate": 7.334502531496707e-05,
      "loss": 0.6496,
      "step": 4445
    },
    {
      "epoch": 0.913968547641073,
      "grad_norm": 0.17319880425930023,
      "learning_rate": 7.333726700576603e-05,
      "loss": 0.6354,
      "step": 4446
    },
    {
      "epoch": 0.9141741186144516,
      "grad_norm": 0.19266557693481445,
      "learning_rate": 7.332950730052029e-05,
      "loss": 0.66,
      "step": 4447
    },
    {
      "epoch": 0.9143796895878302,
      "grad_norm": 0.18681232631206512,
      "learning_rate": 7.332174619961215e-05,
      "loss": 0.6807,
      "step": 4448
    },
    {
      "epoch": 0.9145852605612088,
      "grad_norm": 0.14952509105205536,
      "learning_rate": 7.331398370342393e-05,
      "loss": 0.5697,
      "step": 4449
    },
    {
      "epoch": 0.9147908315345873,
      "grad_norm": 0.18955743312835693,
      "learning_rate": 7.33062198123381e-05,
      "loss": 0.6537,
      "step": 4450
    },
    {
      "epoch": 0.9149964025079659,
      "grad_norm": 0.18778184056282043,
      "learning_rate": 7.32984545267371e-05,
      "loss": 0.6738,
      "step": 4451
    },
    {
      "epoch": 0.9152019734813445,
      "grad_norm": 0.12955501675605774,
      "learning_rate": 7.329068784700352e-05,
      "loss": 0.5692,
      "step": 4452
    },
    {
      "epoch": 0.9154075444547229,
      "grad_norm": 0.12300048768520355,
      "learning_rate": 7.328291977351998e-05,
      "loss": 0.5731,
      "step": 4453
    },
    {
      "epoch": 0.9156131154281015,
      "grad_norm": 0.1227407306432724,
      "learning_rate": 7.327515030666918e-05,
      "loss": 0.5563,
      "step": 4454
    },
    {
      "epoch": 0.9158186864014801,
      "grad_norm": 0.1943395435810089,
      "learning_rate": 7.326737944683387e-05,
      "loss": 0.6464,
      "step": 4455
    },
    {
      "epoch": 0.9160242573748587,
      "grad_norm": 0.1225886195898056,
      "learning_rate": 7.32596071943969e-05,
      "loss": 0.5903,
      "step": 4456
    },
    {
      "epoch": 0.9162298283482372,
      "grad_norm": 0.19221986830234528,
      "learning_rate": 7.325183354974119e-05,
      "loss": 0.6723,
      "step": 4457
    },
    {
      "epoch": 0.9164353993216158,
      "grad_norm": 0.12794283032417297,
      "learning_rate": 7.324405851324967e-05,
      "loss": 0.5684,
      "step": 4458
    },
    {
      "epoch": 0.9166409702949944,
      "grad_norm": 0.18591567873954773,
      "learning_rate": 7.32362820853054e-05,
      "loss": 0.6615,
      "step": 4459
    },
    {
      "epoch": 0.916846541268373,
      "grad_norm": 0.1794724315404892,
      "learning_rate": 7.32285042662915e-05,
      "loss": 0.66,
      "step": 4460
    },
    {
      "epoch": 0.9170521122417514,
      "grad_norm": 0.17996345460414886,
      "learning_rate": 7.322072505659111e-05,
      "loss": 0.6703,
      "step": 4461
    },
    {
      "epoch": 0.91725768321513,
      "grad_norm": 0.1860412210226059,
      "learning_rate": 7.321294445658754e-05,
      "loss": 0.6633,
      "step": 4462
    },
    {
      "epoch": 0.9174632541885086,
      "grad_norm": 0.18460632860660553,
      "learning_rate": 7.320516246666401e-05,
      "loss": 0.6719,
      "step": 4463
    },
    {
      "epoch": 0.9176688251618872,
      "grad_norm": 0.179931178689003,
      "learning_rate": 7.3197379087204e-05,
      "loss": 0.6874,
      "step": 4464
    },
    {
      "epoch": 0.9178743961352657,
      "grad_norm": 0.17799624800682068,
      "learning_rate": 7.31895943185909e-05,
      "loss": 0.6829,
      "step": 4465
    },
    {
      "epoch": 0.9180799671086443,
      "grad_norm": 0.17857834696769714,
      "learning_rate": 7.318180816120825e-05,
      "loss": 0.6732,
      "step": 4466
    },
    {
      "epoch": 0.9182855380820228,
      "grad_norm": 0.18361206352710724,
      "learning_rate": 7.317402061543963e-05,
      "loss": 0.6628,
      "step": 4467
    },
    {
      "epoch": 0.9184911090554013,
      "grad_norm": 0.18038511276245117,
      "learning_rate": 7.316623168166869e-05,
      "loss": 0.65,
      "step": 4468
    },
    {
      "epoch": 0.9186966800287799,
      "grad_norm": 0.18144308030605316,
      "learning_rate": 7.315844136027917e-05,
      "loss": 0.6874,
      "step": 4469
    },
    {
      "epoch": 0.9189022510021585,
      "grad_norm": 0.18093526363372803,
      "learning_rate": 7.315064965165486e-05,
      "loss": 0.6514,
      "step": 4470
    },
    {
      "epoch": 0.9191078219755371,
      "grad_norm": 0.15748950839042664,
      "learning_rate": 7.314285655617962e-05,
      "loss": 0.5854,
      "step": 4471
    },
    {
      "epoch": 0.9193133929489156,
      "grad_norm": 0.18608981370925903,
      "learning_rate": 7.313506207423738e-05,
      "loss": 0.6583,
      "step": 4472
    },
    {
      "epoch": 0.9195189639222942,
      "grad_norm": 0.13079427182674408,
      "learning_rate": 7.312726620621211e-05,
      "loss": 0.5866,
      "step": 4473
    },
    {
      "epoch": 0.9197245348956727,
      "grad_norm": 0.1949155479669571,
      "learning_rate": 7.311946895248793e-05,
      "loss": 0.6501,
      "step": 4474
    },
    {
      "epoch": 0.9199301058690513,
      "grad_norm": 0.19749537110328674,
      "learning_rate": 7.311167031344894e-05,
      "loss": 0.6782,
      "step": 4475
    },
    {
      "epoch": 0.9201356768424298,
      "grad_norm": 0.18449652194976807,
      "learning_rate": 7.310387028947934e-05,
      "loss": 0.6683,
      "step": 4476
    },
    {
      "epoch": 0.9203412478158084,
      "grad_norm": 0.18260683119297028,
      "learning_rate": 7.309606888096341e-05,
      "loss": 0.6541,
      "step": 4477
    },
    {
      "epoch": 0.920546818789187,
      "grad_norm": 0.16052718460559845,
      "learning_rate": 7.308826608828548e-05,
      "loss": 0.5706,
      "step": 4478
    },
    {
      "epoch": 0.9207523897625656,
      "grad_norm": 0.1838260293006897,
      "learning_rate": 7.308046191182998e-05,
      "loss": 0.6577,
      "step": 4479
    },
    {
      "epoch": 0.920957960735944,
      "grad_norm": 0.12468434125185013,
      "learning_rate": 7.307265635198135e-05,
      "loss": 0.5989,
      "step": 4480
    },
    {
      "epoch": 0.9211635317093226,
      "grad_norm": 0.12467863410711288,
      "learning_rate": 7.306484940912416e-05,
      "loss": 0.5734,
      "step": 4481
    },
    {
      "epoch": 0.9213691026827012,
      "grad_norm": 0.2048860639333725,
      "learning_rate": 7.305704108364301e-05,
      "loss": 0.6777,
      "step": 4482
    },
    {
      "epoch": 0.9215746736560798,
      "grad_norm": 0.12955395877361298,
      "learning_rate": 7.304923137592258e-05,
      "loss": 0.5742,
      "step": 4483
    },
    {
      "epoch": 0.9217802446294583,
      "grad_norm": 0.18409843742847443,
      "learning_rate": 7.304142028634764e-05,
      "loss": 0.6323,
      "step": 4484
    },
    {
      "epoch": 0.9219858156028369,
      "grad_norm": 0.1324310153722763,
      "learning_rate": 7.303360781530299e-05,
      "loss": 0.5826,
      "step": 4485
    },
    {
      "epoch": 0.9221913865762155,
      "grad_norm": 0.18649353086948395,
      "learning_rate": 7.30257939631735e-05,
      "loss": 0.6743,
      "step": 4486
    },
    {
      "epoch": 0.9223969575495939,
      "grad_norm": 0.19196631014347076,
      "learning_rate": 7.301797873034412e-05,
      "loss": 0.6578,
      "step": 4487
    },
    {
      "epoch": 0.9226025285229725,
      "grad_norm": 0.13730254769325256,
      "learning_rate": 7.301016211719992e-05,
      "loss": 0.5787,
      "step": 4488
    },
    {
      "epoch": 0.9228080994963511,
      "grad_norm": 0.17747841775417328,
      "learning_rate": 7.300234412412593e-05,
      "loss": 0.6616,
      "step": 4489
    },
    {
      "epoch": 0.9230136704697297,
      "grad_norm": 0.1930990219116211,
      "learning_rate": 7.299452475150732e-05,
      "loss": 0.6509,
      "step": 4490
    },
    {
      "epoch": 0.9232192414431082,
      "grad_norm": 0.18891580402851105,
      "learning_rate": 7.298670399972933e-05,
      "loss": 0.6808,
      "step": 4491
    },
    {
      "epoch": 0.9234248124164868,
      "grad_norm": 0.1751311719417572,
      "learning_rate": 7.297888186917724e-05,
      "loss": 0.649,
      "step": 4492
    },
    {
      "epoch": 0.9236303833898654,
      "grad_norm": 0.37240174412727356,
      "learning_rate": 7.297105836023642e-05,
      "loss": 0.6677,
      "step": 4493
    },
    {
      "epoch": 0.923835954363244,
      "grad_norm": 0.1758231371641159,
      "learning_rate": 7.296323347329228e-05,
      "loss": 0.6484,
      "step": 4494
    },
    {
      "epoch": 0.9240415253366224,
      "grad_norm": 0.18870992958545685,
      "learning_rate": 7.295540720873034e-05,
      "loss": 0.6792,
      "step": 4495
    },
    {
      "epoch": 0.924247096310001,
      "grad_norm": 0.17921528220176697,
      "learning_rate": 7.294757956693616e-05,
      "loss": 0.6595,
      "step": 4496
    },
    {
      "epoch": 0.9244526672833796,
      "grad_norm": 0.18485888838768005,
      "learning_rate": 7.293975054829534e-05,
      "loss": 0.6875,
      "step": 4497
    },
    {
      "epoch": 0.9246582382567582,
      "grad_norm": 0.1897556483745575,
      "learning_rate": 7.293192015319359e-05,
      "loss": 0.6486,
      "step": 4498
    },
    {
      "epoch": 0.9248638092301367,
      "grad_norm": 0.18818815052509308,
      "learning_rate": 7.29240883820167e-05,
      "loss": 0.6567,
      "step": 4499
    },
    {
      "epoch": 0.9250693802035153,
      "grad_norm": 0.1831316202878952,
      "learning_rate": 7.291625523515051e-05,
      "loss": 0.6784,
      "step": 4500
    },
    {
      "epoch": 0.9252749511768938,
      "grad_norm": 0.18603403866291046,
      "learning_rate": 7.290842071298088e-05,
      "loss": 0.6519,
      "step": 4501
    },
    {
      "epoch": 0.9254805221502724,
      "grad_norm": 0.18271493911743164,
      "learning_rate": 7.290058481589381e-05,
      "loss": 0.6522,
      "step": 4502
    },
    {
      "epoch": 0.9256860931236509,
      "grad_norm": 0.1795085072517395,
      "learning_rate": 7.289274754427536e-05,
      "loss": 0.6418,
      "step": 4503
    },
    {
      "epoch": 0.9258916640970295,
      "grad_norm": 0.18269407749176025,
      "learning_rate": 7.288490889851158e-05,
      "loss": 0.6724,
      "step": 4504
    },
    {
      "epoch": 0.9260972350704081,
      "grad_norm": 0.18335239589214325,
      "learning_rate": 7.287706887898867e-05,
      "loss": 0.6758,
      "step": 4505
    },
    {
      "epoch": 0.9263028060437866,
      "grad_norm": 0.17889541387557983,
      "learning_rate": 7.28692274860929e-05,
      "loss": 0.6228,
      "step": 4506
    },
    {
      "epoch": 0.9265083770171652,
      "grad_norm": 0.1919565200805664,
      "learning_rate": 7.286138472021053e-05,
      "loss": 0.6629,
      "step": 4507
    },
    {
      "epoch": 0.9267139479905437,
      "grad_norm": 0.162271648645401,
      "learning_rate": 7.285354058172796e-05,
      "loss": 0.5823,
      "step": 4508
    },
    {
      "epoch": 0.9269195189639223,
      "grad_norm": 0.18953320384025574,
      "learning_rate": 7.284569507103164e-05,
      "loss": 0.687,
      "step": 4509
    },
    {
      "epoch": 0.9271250899373008,
      "grad_norm": 0.18748800456523895,
      "learning_rate": 7.283784818850807e-05,
      "loss": 0.6741,
      "step": 4510
    },
    {
      "epoch": 0.9273306609106794,
      "grad_norm": 0.20176881551742554,
      "learning_rate": 7.282999993454383e-05,
      "loss": 0.6658,
      "step": 4511
    },
    {
      "epoch": 0.927536231884058,
      "grad_norm": 0.19243447482585907,
      "learning_rate": 7.282215030952558e-05,
      "loss": 0.6633,
      "step": 4512
    },
    {
      "epoch": 0.9277418028574366,
      "grad_norm": 0.18939968943595886,
      "learning_rate": 7.281429931384001e-05,
      "loss": 0.6858,
      "step": 4513
    },
    {
      "epoch": 0.927947373830815,
      "grad_norm": 0.1850508600473404,
      "learning_rate": 7.280644694787393e-05,
      "loss": 0.6459,
      "step": 4514
    },
    {
      "epoch": 0.9281529448041936,
      "grad_norm": 0.18251655995845795,
      "learning_rate": 7.279859321201418e-05,
      "loss": 0.6619,
      "step": 4515
    },
    {
      "epoch": 0.9283585157775722,
      "grad_norm": 0.18301190435886383,
      "learning_rate": 7.279073810664767e-05,
      "loss": 0.6507,
      "step": 4516
    },
    {
      "epoch": 0.9285640867509508,
      "grad_norm": 0.15676529705524445,
      "learning_rate": 7.278288163216138e-05,
      "loss": 0.5846,
      "step": 4517
    },
    {
      "epoch": 0.9287696577243293,
      "grad_norm": 0.18805831670761108,
      "learning_rate": 7.277502378894237e-05,
      "loss": 0.6531,
      "step": 4518
    },
    {
      "epoch": 0.9289752286977079,
      "grad_norm": 0.1892201006412506,
      "learning_rate": 7.276716457737776e-05,
      "loss": 0.659,
      "step": 4519
    },
    {
      "epoch": 0.9291807996710865,
      "grad_norm": 0.13629117608070374,
      "learning_rate": 7.275930399785473e-05,
      "loss": 0.569,
      "step": 4520
    },
    {
      "epoch": 0.929386370644465,
      "grad_norm": 0.20433087646961212,
      "learning_rate": 7.275144205076053e-05,
      "loss": 0.6686,
      "step": 4521
    },
    {
      "epoch": 0.9295919416178435,
      "grad_norm": 0.1851327121257782,
      "learning_rate": 7.274357873648252e-05,
      "loss": 0.6472,
      "step": 4522
    },
    {
      "epoch": 0.9297975125912221,
      "grad_norm": 0.19075118005275726,
      "learning_rate": 7.273571405540802e-05,
      "loss": 0.6702,
      "step": 4523
    },
    {
      "epoch": 0.9300030835646007,
      "grad_norm": 0.1823331117630005,
      "learning_rate": 7.272784800792457e-05,
      "loss": 0.6637,
      "step": 4524
    },
    {
      "epoch": 0.9302086545379792,
      "grad_norm": 0.18252402544021606,
      "learning_rate": 7.271998059441962e-05,
      "loss": 0.6553,
      "step": 4525
    },
    {
      "epoch": 0.9304142255113578,
      "grad_norm": 0.18108795583248138,
      "learning_rate": 7.27121118152808e-05,
      "loss": 0.6487,
      "step": 4526
    },
    {
      "epoch": 0.9306197964847364,
      "grad_norm": 0.18125282227993011,
      "learning_rate": 7.270424167089574e-05,
      "loss": 0.6674,
      "step": 4527
    },
    {
      "epoch": 0.930825367458115,
      "grad_norm": 0.17340825498104095,
      "learning_rate": 7.269637016165218e-05,
      "loss": 0.6521,
      "step": 4528
    },
    {
      "epoch": 0.9310309384314934,
      "grad_norm": 0.17793837189674377,
      "learning_rate": 7.268849728793794e-05,
      "loss": 0.6443,
      "step": 4529
    },
    {
      "epoch": 0.931236509404872,
      "grad_norm": 0.1863885074853897,
      "learning_rate": 7.268062305014085e-05,
      "loss": 0.6374,
      "step": 4530
    },
    {
      "epoch": 0.9314420803782506,
      "grad_norm": 0.1790206879377365,
      "learning_rate": 7.267274744864883e-05,
      "loss": 0.6463,
      "step": 4531
    },
    {
      "epoch": 0.9316476513516292,
      "grad_norm": 0.19194868206977844,
      "learning_rate": 7.266487048384987e-05,
      "loss": 0.6575,
      "step": 4532
    },
    {
      "epoch": 0.9318532223250077,
      "grad_norm": 0.17925460636615753,
      "learning_rate": 7.265699215613208e-05,
      "loss": 0.655,
      "step": 4533
    },
    {
      "epoch": 0.9320587932983863,
      "grad_norm": 0.18405981361865997,
      "learning_rate": 7.264911246588353e-05,
      "loss": 0.6661,
      "step": 4534
    },
    {
      "epoch": 0.9322643642717648,
      "grad_norm": 0.15504823625087738,
      "learning_rate": 7.264123141349245e-05,
      "loss": 0.5726,
      "step": 4535
    },
    {
      "epoch": 0.9324699352451434,
      "grad_norm": 0.1932215392589569,
      "learning_rate": 7.26333489993471e-05,
      "loss": 0.659,
      "step": 4536
    },
    {
      "epoch": 0.9326755062185219,
      "grad_norm": 0.182255357503891,
      "learning_rate": 7.262546522383579e-05,
      "loss": 0.6792,
      "step": 4537
    },
    {
      "epoch": 0.9328810771919005,
      "grad_norm": 0.1837291121482849,
      "learning_rate": 7.261758008734693e-05,
      "loss": 0.6816,
      "step": 4538
    },
    {
      "epoch": 0.9330866481652791,
      "grad_norm": 0.1409105658531189,
      "learning_rate": 7.2609693590269e-05,
      "loss": 0.5832,
      "step": 4539
    },
    {
      "epoch": 0.9332922191386577,
      "grad_norm": 0.19682304561138153,
      "learning_rate": 7.260180573299049e-05,
      "loss": 0.6693,
      "step": 4540
    },
    {
      "epoch": 0.9334977901120362,
      "grad_norm": 0.1264413744211197,
      "learning_rate": 7.259391651590005e-05,
      "loss": 0.5933,
      "step": 4541
    },
    {
      "epoch": 0.9337033610854147,
      "grad_norm": 0.1842966377735138,
      "learning_rate": 7.258602593938629e-05,
      "loss": 0.6619,
      "step": 4542
    },
    {
      "epoch": 0.9339089320587933,
      "grad_norm": 0.18830621242523193,
      "learning_rate": 7.257813400383798e-05,
      "loss": 0.6614,
      "step": 4543
    },
    {
      "epoch": 0.9341145030321718,
      "grad_norm": 0.17995837330818176,
      "learning_rate": 7.257024070964391e-05,
      "loss": 0.6535,
      "step": 4544
    },
    {
      "epoch": 0.9343200740055504,
      "grad_norm": 0.1838386356830597,
      "learning_rate": 7.256234605719294e-05,
      "loss": 0.6598,
      "step": 4545
    },
    {
      "epoch": 0.934525644978929,
      "grad_norm": 0.18245835602283478,
      "learning_rate": 7.2554450046874e-05,
      "loss": 0.6377,
      "step": 4546
    },
    {
      "epoch": 0.9347312159523076,
      "grad_norm": 0.18414458632469177,
      "learning_rate": 7.254655267907611e-05,
      "loss": 0.6616,
      "step": 4547
    },
    {
      "epoch": 0.934936786925686,
      "grad_norm": 0.14779187738895416,
      "learning_rate": 7.253865395418832e-05,
      "loss": 0.574,
      "step": 4548
    },
    {
      "epoch": 0.9351423578990646,
      "grad_norm": 0.13919095695018768,
      "learning_rate": 7.253075387259975e-05,
      "loss": 0.5738,
      "step": 4549
    },
    {
      "epoch": 0.9353479288724432,
      "grad_norm": 0.20152714848518372,
      "learning_rate": 7.252285243469962e-05,
      "loss": 0.656,
      "step": 4550
    },
    {
      "epoch": 0.9355534998458218,
      "grad_norm": 0.20961932837963104,
      "learning_rate": 7.251494964087721e-05,
      "loss": 0.6724,
      "step": 4551
    },
    {
      "epoch": 0.9357590708192003,
      "grad_norm": 0.1847916692495346,
      "learning_rate": 7.25070454915218e-05,
      "loss": 0.6601,
      "step": 4552
    },
    {
      "epoch": 0.9359646417925789,
      "grad_norm": 0.1776532083749771,
      "learning_rate": 7.249913998702287e-05,
      "loss": 0.645,
      "step": 4553
    },
    {
      "epoch": 0.9361702127659575,
      "grad_norm": 0.18173301219940186,
      "learning_rate": 7.249123312776982e-05,
      "loss": 0.6983,
      "step": 4554
    },
    {
      "epoch": 0.936375783739336,
      "grad_norm": 0.18323563039302826,
      "learning_rate": 7.24833249141522e-05,
      "loss": 0.6603,
      "step": 4555
    },
    {
      "epoch": 0.9365813547127145,
      "grad_norm": 0.18385376036167145,
      "learning_rate": 7.247541534655962e-05,
      "loss": 0.6551,
      "step": 4556
    },
    {
      "epoch": 0.9367869256860931,
      "grad_norm": 0.18663281202316284,
      "learning_rate": 7.246750442538176e-05,
      "loss": 0.6562,
      "step": 4557
    },
    {
      "epoch": 0.9369924966594717,
      "grad_norm": 0.18781627714633942,
      "learning_rate": 7.245959215100834e-05,
      "loss": 0.6772,
      "step": 4558
    },
    {
      "epoch": 0.9371980676328503,
      "grad_norm": 0.18314847350120544,
      "learning_rate": 7.245167852382915e-05,
      "loss": 0.6523,
      "step": 4559
    },
    {
      "epoch": 0.9374036386062288,
      "grad_norm": 0.18462207913398743,
      "learning_rate": 7.244376354423408e-05,
      "loss": 0.6716,
      "step": 4560
    },
    {
      "epoch": 0.9376092095796074,
      "grad_norm": 0.18789240717887878,
      "learning_rate": 7.243584721261302e-05,
      "loss": 0.6672,
      "step": 4561
    },
    {
      "epoch": 0.937814780552986,
      "grad_norm": 0.18339639902114868,
      "learning_rate": 7.242792952935604e-05,
      "loss": 0.6526,
      "step": 4562
    },
    {
      "epoch": 0.9380203515263644,
      "grad_norm": 0.18866628408432007,
      "learning_rate": 7.242001049485314e-05,
      "loss": 0.6739,
      "step": 4563
    },
    {
      "epoch": 0.938225922499743,
      "grad_norm": 0.18578583002090454,
      "learning_rate": 7.241209010949452e-05,
      "loss": 0.6485,
      "step": 4564
    },
    {
      "epoch": 0.9384314934731216,
      "grad_norm": 0.18551675975322723,
      "learning_rate": 7.240416837367032e-05,
      "loss": 0.6537,
      "step": 4565
    },
    {
      "epoch": 0.9386370644465002,
      "grad_norm": 0.1823461502790451,
      "learning_rate": 7.239624528777082e-05,
      "loss": 0.6626,
      "step": 4566
    },
    {
      "epoch": 0.9388426354198787,
      "grad_norm": 0.18426772952079773,
      "learning_rate": 7.23883208521864e-05,
      "loss": 0.6314,
      "step": 4567
    },
    {
      "epoch": 0.9390482063932573,
      "grad_norm": 0.19278199970722198,
      "learning_rate": 7.23803950673074e-05,
      "loss": 0.6813,
      "step": 4568
    },
    {
      "epoch": 0.9392537773666358,
      "grad_norm": 0.17879322171211243,
      "learning_rate": 7.23724679335243e-05,
      "loss": 0.6412,
      "step": 4569
    },
    {
      "epoch": 0.9394593483400144,
      "grad_norm": 0.18191079795360565,
      "learning_rate": 7.236453945122767e-05,
      "loss": 0.6825,
      "step": 4570
    },
    {
      "epoch": 0.9396649193133929,
      "grad_norm": 0.19142381846904755,
      "learning_rate": 7.235660962080805e-05,
      "loss": 0.6717,
      "step": 4571
    },
    {
      "epoch": 0.9398704902867715,
      "grad_norm": 0.18709653615951538,
      "learning_rate": 7.234867844265617e-05,
      "loss": 0.6483,
      "step": 4572
    },
    {
      "epoch": 0.9400760612601501,
      "grad_norm": 0.18491537868976593,
      "learning_rate": 7.234074591716271e-05,
      "loss": 0.6614,
      "step": 4573
    },
    {
      "epoch": 0.9402816322335287,
      "grad_norm": 0.2287359982728958,
      "learning_rate": 7.233281204471851e-05,
      "loss": 0.5824,
      "step": 4574
    },
    {
      "epoch": 0.9404872032069072,
      "grad_norm": 0.1951487511396408,
      "learning_rate": 7.232487682571439e-05,
      "loss": 0.6553,
      "step": 4575
    },
    {
      "epoch": 0.9406927741802857,
      "grad_norm": 0.19920621812343597,
      "learning_rate": 7.231694026054133e-05,
      "loss": 0.6497,
      "step": 4576
    },
    {
      "epoch": 0.9408983451536643,
      "grad_norm": 0.15709790587425232,
      "learning_rate": 7.230900234959028e-05,
      "loss": 0.5685,
      "step": 4577
    },
    {
      "epoch": 0.9411039161270429,
      "grad_norm": 0.19238202273845673,
      "learning_rate": 7.230106309325234e-05,
      "loss": 0.6771,
      "step": 4578
    },
    {
      "epoch": 0.9413094871004214,
      "grad_norm": 0.1886894553899765,
      "learning_rate": 7.229312249191862e-05,
      "loss": 0.6278,
      "step": 4579
    },
    {
      "epoch": 0.9415150580738,
      "grad_norm": 0.19003844261169434,
      "learning_rate": 7.228518054598032e-05,
      "loss": 0.6583,
      "step": 4580
    },
    {
      "epoch": 0.9417206290471786,
      "grad_norm": 0.15929581224918365,
      "learning_rate": 7.227723725582871e-05,
      "loss": 0.5738,
      "step": 4581
    },
    {
      "epoch": 0.941926200020557,
      "grad_norm": 0.19181537628173828,
      "learning_rate": 7.226929262185511e-05,
      "loss": 0.6692,
      "step": 4582
    },
    {
      "epoch": 0.9421317709939356,
      "grad_norm": 0.19717134535312653,
      "learning_rate": 7.226134664445093e-05,
      "loss": 0.665,
      "step": 4583
    },
    {
      "epoch": 0.9423373419673142,
      "grad_norm": 0.17591415345668793,
      "learning_rate": 7.22533993240076e-05,
      "loss": 0.6358,
      "step": 4584
    },
    {
      "epoch": 0.9425429129406928,
      "grad_norm": 0.18756897747516632,
      "learning_rate": 7.224545066091669e-05,
      "loss": 0.6755,
      "step": 4585
    },
    {
      "epoch": 0.9427484839140713,
      "grad_norm": 0.18418292701244354,
      "learning_rate": 7.223750065556977e-05,
      "loss": 0.6498,
      "step": 4586
    },
    {
      "epoch": 0.9429540548874499,
      "grad_norm": 0.14689591526985168,
      "learning_rate": 7.222954930835849e-05,
      "loss": 0.5795,
      "step": 4587
    },
    {
      "epoch": 0.9431596258608285,
      "grad_norm": 0.18386992812156677,
      "learning_rate": 7.222159661967459e-05,
      "loss": 0.6699,
      "step": 4588
    },
    {
      "epoch": 0.943365196834207,
      "grad_norm": 0.1894700974225998,
      "learning_rate": 7.221364258990985e-05,
      "loss": 0.6571,
      "step": 4589
    },
    {
      "epoch": 0.9435707678075855,
      "grad_norm": 0.17809130251407623,
      "learning_rate": 7.220568721945614e-05,
      "loss": 0.6409,
      "step": 4590
    },
    {
      "epoch": 0.9437763387809641,
      "grad_norm": 0.18572141230106354,
      "learning_rate": 7.219773050870537e-05,
      "loss": 0.6774,
      "step": 4591
    },
    {
      "epoch": 0.9439819097543427,
      "grad_norm": 0.1781856119632721,
      "learning_rate": 7.218977245804955e-05,
      "loss": 0.6939,
      "step": 4592
    },
    {
      "epoch": 0.9441874807277213,
      "grad_norm": 0.1840573400259018,
      "learning_rate": 7.218181306788074e-05,
      "loss": 0.6654,
      "step": 4593
    },
    {
      "epoch": 0.9443930517010998,
      "grad_norm": 0.1829008311033249,
      "learning_rate": 7.217385233859102e-05,
      "loss": 0.6673,
      "step": 4594
    },
    {
      "epoch": 0.9445986226744784,
      "grad_norm": 0.18518169224262238,
      "learning_rate": 7.216589027057262e-05,
      "loss": 0.6902,
      "step": 4595
    },
    {
      "epoch": 0.944804193647857,
      "grad_norm": 0.18205900490283966,
      "learning_rate": 7.215792686421779e-05,
      "loss": 0.6773,
      "step": 4596
    },
    {
      "epoch": 0.9450097646212354,
      "grad_norm": 0.14691917598247528,
      "learning_rate": 7.214996211991883e-05,
      "loss": 0.5941,
      "step": 4597
    },
    {
      "epoch": 0.945215335594614,
      "grad_norm": 0.18329864740371704,
      "learning_rate": 7.214199603806812e-05,
      "loss": 0.6699,
      "step": 4598
    },
    {
      "epoch": 0.9454209065679926,
      "grad_norm": 0.19199949502944946,
      "learning_rate": 7.213402861905814e-05,
      "loss": 0.6787,
      "step": 4599
    },
    {
      "epoch": 0.9456264775413712,
      "grad_norm": 0.15589165687561035,
      "learning_rate": 7.21260598632814e-05,
      "loss": 0.592,
      "step": 4600
    },
    {
      "epoch": 0.9458320485147497,
      "grad_norm": 0.12858957052230835,
      "learning_rate": 7.211808977113046e-05,
      "loss": 0.5699,
      "step": 4601
    },
    {
      "epoch": 0.9460376194881283,
      "grad_norm": 0.19628196954727173,
      "learning_rate": 7.2110118342998e-05,
      "loss": 0.6516,
      "step": 4602
    },
    {
      "epoch": 0.9462431904615068,
      "grad_norm": 0.1841566562652588,
      "learning_rate": 7.210214557927672e-05,
      "loss": 0.6473,
      "step": 4603
    },
    {
      "epoch": 0.9464487614348854,
      "grad_norm": 0.17483435571193695,
      "learning_rate": 7.20941714803594e-05,
      "loss": 0.6634,
      "step": 4604
    },
    {
      "epoch": 0.9466543324082639,
      "grad_norm": 0.18066710233688354,
      "learning_rate": 7.20861960466389e-05,
      "loss": 0.6443,
      "step": 4605
    },
    {
      "epoch": 0.9468599033816425,
      "grad_norm": 0.18473312258720398,
      "learning_rate": 7.207821927850811e-05,
      "loss": 0.6632,
      "step": 4606
    },
    {
      "epoch": 0.9470654743550211,
      "grad_norm": 0.18283243477344513,
      "learning_rate": 7.207024117636002e-05,
      "loss": 0.6703,
      "step": 4607
    },
    {
      "epoch": 0.9472710453283997,
      "grad_norm": 0.16648972034454346,
      "learning_rate": 7.206226174058766e-05,
      "loss": 0.5717,
      "step": 4608
    },
    {
      "epoch": 0.9474766163017782,
      "grad_norm": 0.19748379290103912,
      "learning_rate": 7.205428097158419e-05,
      "loss": 0.6838,
      "step": 4609
    },
    {
      "epoch": 0.9476821872751567,
      "grad_norm": 0.19934770464897156,
      "learning_rate": 7.204629886974271e-05,
      "loss": 0.6729,
      "step": 4610
    },
    {
      "epoch": 0.9478877582485353,
      "grad_norm": 0.19454436004161835,
      "learning_rate": 7.203831543545651e-05,
      "loss": 0.6693,
      "step": 4611
    },
    {
      "epoch": 0.9480933292219139,
      "grad_norm": 0.18130190670490265,
      "learning_rate": 7.203033066911889e-05,
      "loss": 0.6533,
      "step": 4612
    },
    {
      "epoch": 0.9482989001952924,
      "grad_norm": 0.17981968820095062,
      "learning_rate": 7.202234457112322e-05,
      "loss": 0.6574,
      "step": 4613
    },
    {
      "epoch": 0.948504471168671,
      "grad_norm": 0.1938825398683548,
      "learning_rate": 7.201435714186294e-05,
      "loss": 0.6517,
      "step": 4614
    },
    {
      "epoch": 0.9487100421420496,
      "grad_norm": 0.18407849967479706,
      "learning_rate": 7.200636838173153e-05,
      "loss": 0.6561,
      "step": 4615
    },
    {
      "epoch": 0.948915613115428,
      "grad_norm": 0.186232790350914,
      "learning_rate": 7.199837829112259e-05,
      "loss": 0.6417,
      "step": 4616
    },
    {
      "epoch": 0.9491211840888066,
      "grad_norm": 0.14791084825992584,
      "learning_rate": 7.199038687042973e-05,
      "loss": 0.591,
      "step": 4617
    },
    {
      "epoch": 0.9493267550621852,
      "grad_norm": 0.1812361627817154,
      "learning_rate": 7.198239412004667e-05,
      "loss": 0.6669,
      "step": 4618
    },
    {
      "epoch": 0.9495323260355638,
      "grad_norm": 0.18593737483024597,
      "learning_rate": 7.197440004036716e-05,
      "loss": 0.6999,
      "step": 4619
    },
    {
      "epoch": 0.9497378970089423,
      "grad_norm": 0.17995983362197876,
      "learning_rate": 7.196640463178506e-05,
      "loss": 0.6708,
      "step": 4620
    },
    {
      "epoch": 0.9499434679823209,
      "grad_norm": 0.19636765122413635,
      "learning_rate": 7.195840789469422e-05,
      "loss": 0.6667,
      "step": 4621
    },
    {
      "epoch": 0.9501490389556995,
      "grad_norm": 0.18465958535671234,
      "learning_rate": 7.195040982948865e-05,
      "loss": 0.6646,
      "step": 4622
    },
    {
      "epoch": 0.950354609929078,
      "grad_norm": 0.1823161542415619,
      "learning_rate": 7.194241043656234e-05,
      "loss": 0.636,
      "step": 4623
    },
    {
      "epoch": 0.9505601809024565,
      "grad_norm": 0.19029709696769714,
      "learning_rate": 7.19344097163094e-05,
      "loss": 0.6731,
      "step": 4624
    },
    {
      "epoch": 0.9507657518758351,
      "grad_norm": 0.19934682548046112,
      "learning_rate": 7.192640766912397e-05,
      "loss": 0.6384,
      "step": 4625
    },
    {
      "epoch": 0.9509713228492137,
      "grad_norm": 0.14476045966148376,
      "learning_rate": 7.19184042954003e-05,
      "loss": 0.5821,
      "step": 4626
    },
    {
      "epoch": 0.9511768938225923,
      "grad_norm": 0.1817658394575119,
      "learning_rate": 7.191039959553266e-05,
      "loss": 0.6815,
      "step": 4627
    },
    {
      "epoch": 0.9513824647959708,
      "grad_norm": 0.1834515631198883,
      "learning_rate": 7.190239356991542e-05,
      "loss": 0.6393,
      "step": 4628
    },
    {
      "epoch": 0.9515880357693494,
      "grad_norm": 0.18767185509204865,
      "learning_rate": 7.189438621894298e-05,
      "loss": 0.6657,
      "step": 4629
    },
    {
      "epoch": 0.951793606742728,
      "grad_norm": 0.18882089853286743,
      "learning_rate": 7.188637754300984e-05,
      "loss": 0.6531,
      "step": 4630
    },
    {
      "epoch": 0.9519991777161065,
      "grad_norm": 0.18064305186271667,
      "learning_rate": 7.187836754251055e-05,
      "loss": 0.6739,
      "step": 4631
    },
    {
      "epoch": 0.952204748689485,
      "grad_norm": 0.18906618654727936,
      "learning_rate": 7.187035621783972e-05,
      "loss": 0.68,
      "step": 4632
    },
    {
      "epoch": 0.9524103196628636,
      "grad_norm": 0.1903999000787735,
      "learning_rate": 7.186234356939204e-05,
      "loss": 0.6503,
      "step": 4633
    },
    {
      "epoch": 0.9526158906362422,
      "grad_norm": 0.184392049908638,
      "learning_rate": 7.185432959756222e-05,
      "loss": 0.6723,
      "step": 4634
    },
    {
      "epoch": 0.9528214616096207,
      "grad_norm": 0.19594880938529968,
      "learning_rate": 7.184631430274512e-05,
      "loss": 0.6487,
      "step": 4635
    },
    {
      "epoch": 0.9530270325829993,
      "grad_norm": 0.1459794044494629,
      "learning_rate": 7.183829768533558e-05,
      "loss": 0.5766,
      "step": 4636
    },
    {
      "epoch": 0.9532326035563778,
      "grad_norm": 0.19931526482105255,
      "learning_rate": 7.183027974572856e-05,
      "loss": 0.6702,
      "step": 4637
    },
    {
      "epoch": 0.9534381745297564,
      "grad_norm": 0.18936146795749664,
      "learning_rate": 7.182226048431907e-05,
      "loss": 0.6409,
      "step": 4638
    },
    {
      "epoch": 0.9536437455031349,
      "grad_norm": 0.12762728333473206,
      "learning_rate": 7.181423990150215e-05,
      "loss": 0.5624,
      "step": 4639
    },
    {
      "epoch": 0.9538493164765135,
      "grad_norm": 0.1938938945531845,
      "learning_rate": 7.180621799767298e-05,
      "loss": 0.6835,
      "step": 4640
    },
    {
      "epoch": 0.9540548874498921,
      "grad_norm": 0.1908787190914154,
      "learning_rate": 7.179819477322673e-05,
      "loss": 0.679,
      "step": 4641
    },
    {
      "epoch": 0.9542604584232707,
      "grad_norm": 0.17859888076782227,
      "learning_rate": 7.179017022855868e-05,
      "loss": 0.6604,
      "step": 4642
    },
    {
      "epoch": 0.9544660293966492,
      "grad_norm": 0.14399871230125427,
      "learning_rate": 7.178214436406416e-05,
      "loss": 0.5768,
      "step": 4643
    },
    {
      "epoch": 0.9546716003700277,
      "grad_norm": 0.19949081540107727,
      "learning_rate": 7.177411718013858e-05,
      "loss": 0.6536,
      "step": 4644
    },
    {
      "epoch": 0.9548771713434063,
      "grad_norm": 0.12567096948623657,
      "learning_rate": 7.176608867717738e-05,
      "loss": 0.579,
      "step": 4645
    },
    {
      "epoch": 0.9550827423167849,
      "grad_norm": 0.1978704035282135,
      "learning_rate": 7.175805885557608e-05,
      "loss": 0.654,
      "step": 4646
    },
    {
      "epoch": 0.9552883132901634,
      "grad_norm": 0.1830187439918518,
      "learning_rate": 7.175002771573031e-05,
      "loss": 0.665,
      "step": 4647
    },
    {
      "epoch": 0.955493884263542,
      "grad_norm": 0.14475895464420319,
      "learning_rate": 7.17419952580357e-05,
      "loss": 0.5824,
      "step": 4648
    },
    {
      "epoch": 0.9556994552369206,
      "grad_norm": 0.2026558667421341,
      "learning_rate": 7.173396148288796e-05,
      "loss": 0.6604,
      "step": 4649
    },
    {
      "epoch": 0.9559050262102992,
      "grad_norm": 0.18734343349933624,
      "learning_rate": 7.172592639068291e-05,
      "loss": 0.6658,
      "step": 4650
    },
    {
      "epoch": 0.9561105971836776,
      "grad_norm": 0.18206274509429932,
      "learning_rate": 7.171788998181637e-05,
      "loss": 0.6371,
      "step": 4651
    },
    {
      "epoch": 0.9563161681570562,
      "grad_norm": 0.18837840855121613,
      "learning_rate": 7.170985225668428e-05,
      "loss": 0.6306,
      "step": 4652
    },
    {
      "epoch": 0.9565217391304348,
      "grad_norm": 0.19700245559215546,
      "learning_rate": 7.17018132156826e-05,
      "loss": 0.6645,
      "step": 4653
    },
    {
      "epoch": 0.9567273101038133,
      "grad_norm": 0.18174946308135986,
      "learning_rate": 7.169377285920738e-05,
      "loss": 0.6657,
      "step": 4654
    },
    {
      "epoch": 0.9569328810771919,
      "grad_norm": 0.1869078427553177,
      "learning_rate": 7.168573118765476e-05,
      "loss": 0.6752,
      "step": 4655
    },
    {
      "epoch": 0.9571384520505705,
      "grad_norm": 0.19436730444431305,
      "learning_rate": 7.167768820142088e-05,
      "loss": 0.6694,
      "step": 4656
    },
    {
      "epoch": 0.957344023023949,
      "grad_norm": 0.18894408643245697,
      "learning_rate": 7.166964390090199e-05,
      "loss": 0.6644,
      "step": 4657
    },
    {
      "epoch": 0.9575495939973275,
      "grad_norm": 0.18464897572994232,
      "learning_rate": 7.16615982864944e-05,
      "loss": 0.6457,
      "step": 4658
    },
    {
      "epoch": 0.9577551649707061,
      "grad_norm": 0.1893334686756134,
      "learning_rate": 7.16535513585945e-05,
      "loss": 0.6692,
      "step": 4659
    },
    {
      "epoch": 0.9579607359440847,
      "grad_norm": 0.151536226272583,
      "learning_rate": 7.164550311759869e-05,
      "loss": 0.5774,
      "step": 4660
    },
    {
      "epoch": 0.9581663069174633,
      "grad_norm": 0.20720963180065155,
      "learning_rate": 7.163745356390347e-05,
      "loss": 0.6608,
      "step": 4661
    },
    {
      "epoch": 0.9583718778908418,
      "grad_norm": 0.18656425178050995,
      "learning_rate": 7.162940269790543e-05,
      "loss": 0.6502,
      "step": 4662
    },
    {
      "epoch": 0.9585774488642204,
      "grad_norm": 0.18301479518413544,
      "learning_rate": 7.162135052000116e-05,
      "loss": 0.6854,
      "step": 4663
    },
    {
      "epoch": 0.958783019837599,
      "grad_norm": 0.14167705178260803,
      "learning_rate": 7.161329703058742e-05,
      "loss": 0.5932,
      "step": 4664
    },
    {
      "epoch": 0.9589885908109775,
      "grad_norm": 0.13432294130325317,
      "learning_rate": 7.16052422300609e-05,
      "loss": 0.5758,
      "step": 4665
    },
    {
      "epoch": 0.959194161784356,
      "grad_norm": 0.2055593878030777,
      "learning_rate": 7.159718611881845e-05,
      "loss": 0.6646,
      "step": 4666
    },
    {
      "epoch": 0.9593997327577346,
      "grad_norm": 0.19777736067771912,
      "learning_rate": 7.158912869725695e-05,
      "loss": 0.6821,
      "step": 4667
    },
    {
      "epoch": 0.9596053037311132,
      "grad_norm": 0.18612885475158691,
      "learning_rate": 7.158106996577336e-05,
      "loss": 0.6758,
      "step": 4668
    },
    {
      "epoch": 0.9598108747044918,
      "grad_norm": 0.1979762464761734,
      "learning_rate": 7.15730099247647e-05,
      "loss": 0.6779,
      "step": 4669
    },
    {
      "epoch": 0.9600164456778703,
      "grad_norm": 0.1957666128873825,
      "learning_rate": 7.156494857462803e-05,
      "loss": 0.657,
      "step": 4670
    },
    {
      "epoch": 0.9602220166512488,
      "grad_norm": 0.16183792054653168,
      "learning_rate": 7.155688591576051e-05,
      "loss": 0.5905,
      "step": 4671
    },
    {
      "epoch": 0.9604275876246274,
      "grad_norm": 0.181317538022995,
      "learning_rate": 7.154882194855936e-05,
      "loss": 0.633,
      "step": 4672
    },
    {
      "epoch": 0.9606331585980059,
      "grad_norm": 0.1878432035446167,
      "learning_rate": 7.154075667342183e-05,
      "loss": 0.6703,
      "step": 4673
    },
    {
      "epoch": 0.9608387295713845,
      "grad_norm": 0.19090843200683594,
      "learning_rate": 7.153269009074528e-05,
      "loss": 0.6737,
      "step": 4674
    },
    {
      "epoch": 0.9610443005447631,
      "grad_norm": 0.18672534823417664,
      "learning_rate": 7.15246222009271e-05,
      "loss": 0.6585,
      "step": 4675
    },
    {
      "epoch": 0.9612498715181417,
      "grad_norm": 0.18867382407188416,
      "learning_rate": 7.151655300436475e-05,
      "loss": 0.6403,
      "step": 4676
    },
    {
      "epoch": 0.9614554424915202,
      "grad_norm": 0.18556974828243256,
      "learning_rate": 7.150848250145578e-05,
      "loss": 0.6543,
      "step": 4677
    },
    {
      "epoch": 0.9616610134648987,
      "grad_norm": 0.18414060771465302,
      "learning_rate": 7.150041069259777e-05,
      "loss": 0.6671,
      "step": 4678
    },
    {
      "epoch": 0.9618665844382773,
      "grad_norm": 0.14137166738510132,
      "learning_rate": 7.14923375781884e-05,
      "loss": 0.5742,
      "step": 4679
    },
    {
      "epoch": 0.9620721554116559,
      "grad_norm": 0.19371961057186127,
      "learning_rate": 7.148426315862537e-05,
      "loss": 0.6423,
      "step": 4680
    },
    {
      "epoch": 0.9622777263850344,
      "grad_norm": 0.1935972273349762,
      "learning_rate": 7.147618743430648e-05,
      "loss": 0.6896,
      "step": 4681
    },
    {
      "epoch": 0.962483297358413,
      "grad_norm": 0.19424404203891754,
      "learning_rate": 7.14681104056296e-05,
      "loss": 0.6634,
      "step": 4682
    },
    {
      "epoch": 0.9626888683317916,
      "grad_norm": 0.18401269614696503,
      "learning_rate": 7.146003207299263e-05,
      "loss": 0.6301,
      "step": 4683
    },
    {
      "epoch": 0.9628944393051702,
      "grad_norm": 0.18967941403388977,
      "learning_rate": 7.145195243679354e-05,
      "loss": 0.6583,
      "step": 4684
    },
    {
      "epoch": 0.9631000102785486,
      "grad_norm": 0.18623065948486328,
      "learning_rate": 7.14438714974304e-05,
      "loss": 0.6411,
      "step": 4685
    },
    {
      "epoch": 0.9633055812519272,
      "grad_norm": 0.18971066176891327,
      "learning_rate": 7.14357892553013e-05,
      "loss": 0.6237,
      "step": 4686
    },
    {
      "epoch": 0.9635111522253058,
      "grad_norm": 0.13411302864551544,
      "learning_rate": 7.142770571080443e-05,
      "loss": 0.5835,
      "step": 4687
    },
    {
      "epoch": 0.9637167231986844,
      "grad_norm": 0.19216755032539368,
      "learning_rate": 7.141962086433802e-05,
      "loss": 0.6423,
      "step": 4688
    },
    {
      "epoch": 0.9639222941720629,
      "grad_norm": 0.18625061213970184,
      "learning_rate": 7.141153471630038e-05,
      "loss": 0.6641,
      "step": 4689
    },
    {
      "epoch": 0.9641278651454415,
      "grad_norm": 0.19605115056037903,
      "learning_rate": 7.140344726708988e-05,
      "loss": 0.6713,
      "step": 4690
    },
    {
      "epoch": 0.96433343611882,
      "grad_norm": 0.18507269024848938,
      "learning_rate": 7.139535851710492e-05,
      "loss": 0.6626,
      "step": 4691
    },
    {
      "epoch": 0.9645390070921985,
      "grad_norm": 0.1852700561285019,
      "learning_rate": 7.138726846674403e-05,
      "loss": 0.6751,
      "step": 4692
    },
    {
      "epoch": 0.9647445780655771,
      "grad_norm": 0.17961280047893524,
      "learning_rate": 7.137917711640575e-05,
      "loss": 0.6648,
      "step": 4693
    },
    {
      "epoch": 0.9649501490389557,
      "grad_norm": 0.20353473722934723,
      "learning_rate": 7.137108446648873e-05,
      "loss": 0.6485,
      "step": 4694
    },
    {
      "epoch": 0.9651557200123343,
      "grad_norm": 0.18588435649871826,
      "learning_rate": 7.136299051739162e-05,
      "loss": 0.6377,
      "step": 4695
    },
    {
      "epoch": 0.9653612909857128,
      "grad_norm": 0.14005832374095917,
      "learning_rate": 7.135489526951318e-05,
      "loss": 0.5717,
      "step": 4696
    },
    {
      "epoch": 0.9655668619590914,
      "grad_norm": 0.18969693779945374,
      "learning_rate": 7.134679872325224e-05,
      "loss": 0.6724,
      "step": 4697
    },
    {
      "epoch": 0.96577243293247,
      "grad_norm": 0.12744298577308655,
      "learning_rate": 7.133870087900768e-05,
      "loss": 0.5729,
      "step": 4698
    },
    {
      "epoch": 0.9659780039058485,
      "grad_norm": 0.18571147322654724,
      "learning_rate": 7.133060173717842e-05,
      "loss": 0.6547,
      "step": 4699
    },
    {
      "epoch": 0.966183574879227,
      "grad_norm": 0.19381268322467804,
      "learning_rate": 7.13225012981635e-05,
      "loss": 0.672,
      "step": 4700
    },
    {
      "epoch": 0.9663891458526056,
      "grad_norm": 0.18004442751407623,
      "learning_rate": 7.131439956236194e-05,
      "loss": 0.6923,
      "step": 4701
    },
    {
      "epoch": 0.9665947168259842,
      "grad_norm": 0.18902912735939026,
      "learning_rate": 7.130629653017293e-05,
      "loss": 0.6709,
      "step": 4702
    },
    {
      "epoch": 0.9668002877993628,
      "grad_norm": 0.1331816166639328,
      "learning_rate": 7.129819220199566e-05,
      "loss": 0.5755,
      "step": 4703
    },
    {
      "epoch": 0.9670058587727413,
      "grad_norm": 0.1306556910276413,
      "learning_rate": 7.129008657822936e-05,
      "loss": 0.5504,
      "step": 4704
    },
    {
      "epoch": 0.9672114297461198,
      "grad_norm": 0.12396678328514099,
      "learning_rate": 7.128197965927337e-05,
      "loss": 0.5786,
      "step": 4705
    },
    {
      "epoch": 0.9674170007194984,
      "grad_norm": 0.2061123251914978,
      "learning_rate": 7.127387144552709e-05,
      "loss": 0.6777,
      "step": 4706
    },
    {
      "epoch": 0.967622571692877,
      "grad_norm": 0.1253053843975067,
      "learning_rate": 7.126576193738997e-05,
      "loss": 0.5862,
      "step": 4707
    },
    {
      "epoch": 0.9678281426662555,
      "grad_norm": 0.1296505630016327,
      "learning_rate": 7.125765113526151e-05,
      "loss": 0.5758,
      "step": 4708
    },
    {
      "epoch": 0.9680337136396341,
      "grad_norm": 0.1793881356716156,
      "learning_rate": 7.124953903954132e-05,
      "loss": 0.6242,
      "step": 4709
    },
    {
      "epoch": 0.9682392846130127,
      "grad_norm": 0.13087224960327148,
      "learning_rate": 7.124142565062903e-05,
      "loss": 0.5745,
      "step": 4710
    },
    {
      "epoch": 0.9684448555863912,
      "grad_norm": 0.12415716052055359,
      "learning_rate": 7.123331096892434e-05,
      "loss": 0.5658,
      "step": 4711
    },
    {
      "epoch": 0.9686504265597697,
      "grad_norm": 0.18544363975524902,
      "learning_rate": 7.122519499482706e-05,
      "loss": 0.6601,
      "step": 4712
    },
    {
      "epoch": 0.9688559975331483,
      "grad_norm": 0.17584609985351562,
      "learning_rate": 7.121707772873699e-05,
      "loss": 0.6448,
      "step": 4713
    },
    {
      "epoch": 0.9690615685065269,
      "grad_norm": 0.18083997070789337,
      "learning_rate": 7.120895917105402e-05,
      "loss": 0.6701,
      "step": 4714
    },
    {
      "epoch": 0.9692671394799054,
      "grad_norm": 0.17472274601459503,
      "learning_rate": 7.120083932217815e-05,
      "loss": 0.6957,
      "step": 4715
    },
    {
      "epoch": 0.969472710453284,
      "grad_norm": 0.15184062719345093,
      "learning_rate": 7.119271818250936e-05,
      "loss": 0.5817,
      "step": 4716
    },
    {
      "epoch": 0.9696782814266626,
      "grad_norm": 0.18855705857276917,
      "learning_rate": 7.11845957524478e-05,
      "loss": 0.6749,
      "step": 4717
    },
    {
      "epoch": 0.9698838524000412,
      "grad_norm": 0.18199525773525238,
      "learning_rate": 7.117647203239358e-05,
      "loss": 0.6665,
      "step": 4718
    },
    {
      "epoch": 0.9700894233734196,
      "grad_norm": 0.1904478669166565,
      "learning_rate": 7.116834702274693e-05,
      "loss": 0.6339,
      "step": 4719
    },
    {
      "epoch": 0.9702949943467982,
      "grad_norm": 0.18044617772102356,
      "learning_rate": 7.116022072390815e-05,
      "loss": 0.6575,
      "step": 4720
    },
    {
      "epoch": 0.9705005653201768,
      "grad_norm": 0.17925746738910675,
      "learning_rate": 7.115209313627755e-05,
      "loss": 0.6639,
      "step": 4721
    },
    {
      "epoch": 0.9707061362935554,
      "grad_norm": 0.18334949016571045,
      "learning_rate": 7.114396426025557e-05,
      "loss": 0.6716,
      "step": 4722
    },
    {
      "epoch": 0.9709117072669339,
      "grad_norm": 0.17840418219566345,
      "learning_rate": 7.113583409624265e-05,
      "loss": 0.6672,
      "step": 4723
    },
    {
      "epoch": 0.9711172782403125,
      "grad_norm": 0.14346054196357727,
      "learning_rate": 7.112770264463936e-05,
      "loss": 0.6005,
      "step": 4724
    },
    {
      "epoch": 0.971322849213691,
      "grad_norm": 0.20740103721618652,
      "learning_rate": 7.111956990584626e-05,
      "loss": 0.6906,
      "step": 4725
    },
    {
      "epoch": 0.9715284201870696,
      "grad_norm": 0.1770005226135254,
      "learning_rate": 7.111143588026406e-05,
      "loss": 0.6421,
      "step": 4726
    },
    {
      "epoch": 0.9717339911604481,
      "grad_norm": 0.18892593681812286,
      "learning_rate": 7.110330056829344e-05,
      "loss": 0.6357,
      "step": 4727
    },
    {
      "epoch": 0.9719395621338267,
      "grad_norm": 0.18768368661403656,
      "learning_rate": 7.109516397033522e-05,
      "loss": 0.6538,
      "step": 4728
    },
    {
      "epoch": 0.9721451331072053,
      "grad_norm": 0.1885116994380951,
      "learning_rate": 7.108702608679022e-05,
      "loss": 0.6792,
      "step": 4729
    },
    {
      "epoch": 0.9723507040805838,
      "grad_norm": 0.19139648973941803,
      "learning_rate": 7.10788869180594e-05,
      "loss": 0.6307,
      "step": 4730
    },
    {
      "epoch": 0.9725562750539624,
      "grad_norm": 0.18306319415569305,
      "learning_rate": 7.107074646454368e-05,
      "loss": 0.6564,
      "step": 4731
    },
    {
      "epoch": 0.972761846027341,
      "grad_norm": 0.18829376995563507,
      "learning_rate": 7.106260472664417e-05,
      "loss": 0.6439,
      "step": 4732
    },
    {
      "epoch": 0.9729674170007195,
      "grad_norm": 0.18569877743721008,
      "learning_rate": 7.105446170476193e-05,
      "loss": 0.6301,
      "step": 4733
    },
    {
      "epoch": 0.973172987974098,
      "grad_norm": 0.17895673215389252,
      "learning_rate": 7.104631739929814e-05,
      "loss": 0.6752,
      "step": 4734
    },
    {
      "epoch": 0.9733785589474766,
      "grad_norm": 0.14281636476516724,
      "learning_rate": 7.103817181065402e-05,
      "loss": 0.5585,
      "step": 4735
    },
    {
      "epoch": 0.9735841299208552,
      "grad_norm": 0.18822631239891052,
      "learning_rate": 7.103002493923089e-05,
      "loss": 0.6773,
      "step": 4736
    },
    {
      "epoch": 0.9737897008942338,
      "grad_norm": 0.19597260653972626,
      "learning_rate": 7.102187678543009e-05,
      "loss": 0.6525,
      "step": 4737
    },
    {
      "epoch": 0.9739952718676123,
      "grad_norm": 0.18155749142169952,
      "learning_rate": 7.101372734965306e-05,
      "loss": 0.6369,
      "step": 4738
    },
    {
      "epoch": 0.9742008428409908,
      "grad_norm": 0.1390804648399353,
      "learning_rate": 7.100557663230125e-05,
      "loss": 0.5831,
      "step": 4739
    },
    {
      "epoch": 0.9744064138143694,
      "grad_norm": 0.19495947659015656,
      "learning_rate": 7.099742463377626e-05,
      "loss": 0.6545,
      "step": 4740
    },
    {
      "epoch": 0.974611984787748,
      "grad_norm": 0.12120307236909866,
      "learning_rate": 7.098927135447965e-05,
      "loss": 0.5725,
      "step": 4741
    },
    {
      "epoch": 0.9748175557611265,
      "grad_norm": 0.18559974431991577,
      "learning_rate": 7.09811167948131e-05,
      "loss": 0.6441,
      "step": 4742
    },
    {
      "epoch": 0.9750231267345051,
      "grad_norm": 0.18373870849609375,
      "learning_rate": 7.097296095517838e-05,
      "loss": 0.6765,
      "step": 4743
    },
    {
      "epoch": 0.9752286977078837,
      "grad_norm": 0.13721033930778503,
      "learning_rate": 7.096480383597725e-05,
      "loss": 0.5717,
      "step": 4744
    },
    {
      "epoch": 0.9754342686812622,
      "grad_norm": 0.25049659609794617,
      "learning_rate": 7.095664543761162e-05,
      "loss": 0.653,
      "step": 4745
    },
    {
      "epoch": 0.9756398396546407,
      "grad_norm": 0.17938856780529022,
      "learning_rate": 7.094848576048339e-05,
      "loss": 0.6455,
      "step": 4746
    },
    {
      "epoch": 0.9758454106280193,
      "grad_norm": 0.18572570383548737,
      "learning_rate": 7.094032480499454e-05,
      "loss": 0.634,
      "step": 4747
    },
    {
      "epoch": 0.9760509816013979,
      "grad_norm": 0.18931305408477783,
      "learning_rate": 7.093216257154713e-05,
      "loss": 0.6397,
      "step": 4748
    },
    {
      "epoch": 0.9762565525747764,
      "grad_norm": 0.178866446018219,
      "learning_rate": 7.092399906054328e-05,
      "loss": 0.6501,
      "step": 4749
    },
    {
      "epoch": 0.976462123548155,
      "grad_norm": 0.13158805668354034,
      "learning_rate": 7.091583427238515e-05,
      "loss": 0.5743,
      "step": 4750
    },
    {
      "epoch": 0.9766676945215336,
      "grad_norm": 0.18385621905326843,
      "learning_rate": 7.090766820747502e-05,
      "loss": 0.6433,
      "step": 4751
    },
    {
      "epoch": 0.9768732654949122,
      "grad_norm": 0.18304161727428436,
      "learning_rate": 7.089950086621515e-05,
      "loss": 0.6304,
      "step": 4752
    },
    {
      "epoch": 0.9770788364682906,
      "grad_norm": 0.14201928675174713,
      "learning_rate": 7.089133224900794e-05,
      "loss": 0.5821,
      "step": 4753
    },
    {
      "epoch": 0.9772844074416692,
      "grad_norm": 0.18627387285232544,
      "learning_rate": 7.08831623562558e-05,
      "loss": 0.6627,
      "step": 4754
    },
    {
      "epoch": 0.9774899784150478,
      "grad_norm": 0.18866147100925446,
      "learning_rate": 7.087499118836123e-05,
      "loss": 0.6627,
      "step": 4755
    },
    {
      "epoch": 0.9776955493884264,
      "grad_norm": 0.1349857598543167,
      "learning_rate": 7.086681874572677e-05,
      "loss": 0.5733,
      "step": 4756
    },
    {
      "epoch": 0.9779011203618049,
      "grad_norm": 0.18248964846134186,
      "learning_rate": 7.085864502875506e-05,
      "loss": 0.6549,
      "step": 4757
    },
    {
      "epoch": 0.9781066913351835,
      "grad_norm": 0.188977912068367,
      "learning_rate": 7.085047003784879e-05,
      "loss": 0.6531,
      "step": 4758
    },
    {
      "epoch": 0.978312262308562,
      "grad_norm": 0.1410411149263382,
      "learning_rate": 7.084229377341068e-05,
      "loss": 0.5773,
      "step": 4759
    },
    {
      "epoch": 0.9785178332819406,
      "grad_norm": 0.18209992349147797,
      "learning_rate": 7.083411623584352e-05,
      "loss": 0.6653,
      "step": 4760
    },
    {
      "epoch": 0.9787234042553191,
      "grad_norm": 0.18571458756923676,
      "learning_rate": 7.082593742555023e-05,
      "loss": 0.6621,
      "step": 4761
    },
    {
      "epoch": 0.9789289752286977,
      "grad_norm": 0.18278199434280396,
      "learning_rate": 7.08177573429337e-05,
      "loss": 0.6688,
      "step": 4762
    },
    {
      "epoch": 0.9791345462020763,
      "grad_norm": 0.17872901260852814,
      "learning_rate": 7.080957598839693e-05,
      "loss": 0.6442,
      "step": 4763
    },
    {
      "epoch": 0.9793401171754548,
      "grad_norm": 0.17393019795417786,
      "learning_rate": 7.080139336234299e-05,
      "loss": 0.6474,
      "step": 4764
    },
    {
      "epoch": 0.9795456881488334,
      "grad_norm": 0.18381252884864807,
      "learning_rate": 7.0793209465175e-05,
      "loss": 0.6469,
      "step": 4765
    },
    {
      "epoch": 0.979751259122212,
      "grad_norm": 0.18060103058815002,
      "learning_rate": 7.078502429729614e-05,
      "loss": 0.6635,
      "step": 4766
    },
    {
      "epoch": 0.9799568300955905,
      "grad_norm": 0.18748174607753754,
      "learning_rate": 7.077683785910964e-05,
      "loss": 0.6695,
      "step": 4767
    },
    {
      "epoch": 0.980162401068969,
      "grad_norm": 0.18352623283863068,
      "learning_rate": 7.076865015101882e-05,
      "loss": 0.6475,
      "step": 4768
    },
    {
      "epoch": 0.9803679720423476,
      "grad_norm": 0.14373265206813812,
      "learning_rate": 7.076046117342705e-05,
      "loss": 0.5666,
      "step": 4769
    },
    {
      "epoch": 0.9805735430157262,
      "grad_norm": 0.18376867473125458,
      "learning_rate": 7.075227092673777e-05,
      "loss": 0.6542,
      "step": 4770
    },
    {
      "epoch": 0.9807791139891048,
      "grad_norm": 0.1273968666791916,
      "learning_rate": 7.074407941135447e-05,
      "loss": 0.5939,
      "step": 4771
    },
    {
      "epoch": 0.9809846849624833,
      "grad_norm": 0.19144566357135773,
      "learning_rate": 7.073588662768069e-05,
      "loss": 0.655,
      "step": 4772
    },
    {
      "epoch": 0.9811902559358618,
      "grad_norm": 0.18799123167991638,
      "learning_rate": 7.072769257612007e-05,
      "loss": 0.6726,
      "step": 4773
    },
    {
      "epoch": 0.9813958269092404,
      "grad_norm": 0.19798782467842102,
      "learning_rate": 7.071949725707628e-05,
      "loss": 0.6438,
      "step": 4774
    },
    {
      "epoch": 0.981601397882619,
      "grad_norm": 0.18581277132034302,
      "learning_rate": 7.07113006709531e-05,
      "loss": 0.6562,
      "step": 4775
    },
    {
      "epoch": 0.9818069688559975,
      "grad_norm": 0.1861695498228073,
      "learning_rate": 7.070310281815429e-05,
      "loss": 0.6693,
      "step": 4776
    },
    {
      "epoch": 0.9820125398293761,
      "grad_norm": 0.15388214588165283,
      "learning_rate": 7.069490369908374e-05,
      "loss": 0.5852,
      "step": 4777
    },
    {
      "epoch": 0.9822181108027547,
      "grad_norm": 0.19053393602371216,
      "learning_rate": 7.068670331414539e-05,
      "loss": 0.6512,
      "step": 4778
    },
    {
      "epoch": 0.9824236817761333,
      "grad_norm": 0.19945350289344788,
      "learning_rate": 7.067850166374322e-05,
      "loss": 0.6898,
      "step": 4779
    },
    {
      "epoch": 0.9826292527495117,
      "grad_norm": 0.12717384099960327,
      "learning_rate": 7.067029874828131e-05,
      "loss": 0.5656,
      "step": 4780
    },
    {
      "epoch": 0.9828348237228903,
      "grad_norm": 0.18999631702899933,
      "learning_rate": 7.066209456816373e-05,
      "loss": 0.6775,
      "step": 4781
    },
    {
      "epoch": 0.9830403946962689,
      "grad_norm": 0.1788676530122757,
      "learning_rate": 7.065388912379472e-05,
      "loss": 0.6573,
      "step": 4782
    },
    {
      "epoch": 0.9832459656696474,
      "grad_norm": 0.1846441626548767,
      "learning_rate": 7.06456824155785e-05,
      "loss": 0.6285,
      "step": 4783
    },
    {
      "epoch": 0.983451536643026,
      "grad_norm": 0.18069574236869812,
      "learning_rate": 7.063747444391937e-05,
      "loss": 0.6477,
      "step": 4784
    },
    {
      "epoch": 0.9836571076164046,
      "grad_norm": 0.18519815802574158,
      "learning_rate": 7.062926520922171e-05,
      "loss": 0.6372,
      "step": 4785
    },
    {
      "epoch": 0.9838626785897832,
      "grad_norm": 0.1890534907579422,
      "learning_rate": 7.062105471188993e-05,
      "loss": 0.6727,
      "step": 4786
    },
    {
      "epoch": 0.9840682495631616,
      "grad_norm": 0.14361847937107086,
      "learning_rate": 7.061284295232854e-05,
      "loss": 0.5637,
      "step": 4787
    },
    {
      "epoch": 0.9842738205365402,
      "grad_norm": 0.20357996225357056,
      "learning_rate": 7.060462993094209e-05,
      "loss": 0.643,
      "step": 4788
    },
    {
      "epoch": 0.9844793915099188,
      "grad_norm": 0.12664301693439484,
      "learning_rate": 7.059641564813521e-05,
      "loss": 0.5653,
      "step": 4789
    },
    {
      "epoch": 0.9846849624832974,
      "grad_norm": 0.11784827709197998,
      "learning_rate": 7.058820010431256e-05,
      "loss": 0.5801,
      "step": 4790
    },
    {
      "epoch": 0.9848905334566759,
      "grad_norm": 0.19485826790332794,
      "learning_rate": 7.057998329987889e-05,
      "loss": 0.6846,
      "step": 4791
    },
    {
      "epoch": 0.9850961044300545,
      "grad_norm": 0.19157935678958893,
      "learning_rate": 7.057176523523901e-05,
      "loss": 0.6641,
      "step": 4792
    },
    {
      "epoch": 0.985301675403433,
      "grad_norm": 0.17738407850265503,
      "learning_rate": 7.056354591079778e-05,
      "loss": 0.646,
      "step": 4793
    },
    {
      "epoch": 0.9855072463768116,
      "grad_norm": 0.18637309968471527,
      "learning_rate": 7.055532532696012e-05,
      "loss": 0.6406,
      "step": 4794
    },
    {
      "epoch": 0.9857128173501901,
      "grad_norm": 0.18436288833618164,
      "learning_rate": 7.054710348413103e-05,
      "loss": 0.6349,
      "step": 4795
    },
    {
      "epoch": 0.9859183883235687,
      "grad_norm": 0.1875494122505188,
      "learning_rate": 7.053888038271555e-05,
      "loss": 0.6585,
      "step": 4796
    },
    {
      "epoch": 0.9861239592969473,
      "grad_norm": 0.18584869801998138,
      "learning_rate": 7.053065602311882e-05,
      "loss": 0.6729,
      "step": 4797
    },
    {
      "epoch": 0.9863295302703259,
      "grad_norm": 0.1785837858915329,
      "learning_rate": 7.052243040574597e-05,
      "loss": 0.6571,
      "step": 4798
    },
    {
      "epoch": 0.9865351012437044,
      "grad_norm": 0.18055270612239838,
      "learning_rate": 7.051420353100228e-05,
      "loss": 0.6732,
      "step": 4799
    },
    {
      "epoch": 0.986740672217083,
      "grad_norm": 0.17629997432231903,
      "learning_rate": 7.050597539929304e-05,
      "loss": 0.6463,
      "step": 4800
    },
    {
      "epoch": 0.9869462431904615,
      "grad_norm": 0.17916452884674072,
      "learning_rate": 7.049774601102361e-05,
      "loss": 0.664,
      "step": 4801
    },
    {
      "epoch": 0.98715181416384,
      "grad_norm": 0.17072445154190063,
      "learning_rate": 7.04895153665994e-05,
      "loss": 0.6522,
      "step": 4802
    },
    {
      "epoch": 0.9873573851372186,
      "grad_norm": 0.18947453796863556,
      "learning_rate": 7.048128346642591e-05,
      "loss": 0.6475,
      "step": 4803
    },
    {
      "epoch": 0.9875629561105972,
      "grad_norm": 0.18113267421722412,
      "learning_rate": 7.047305031090869e-05,
      "loss": 0.6505,
      "step": 4804
    },
    {
      "epoch": 0.9877685270839758,
      "grad_norm": 0.18333598971366882,
      "learning_rate": 7.046481590045331e-05,
      "loss": 0.6697,
      "step": 4805
    },
    {
      "epoch": 0.9879740980573543,
      "grad_norm": 0.17988578975200653,
      "learning_rate": 7.045658023546551e-05,
      "loss": 0.6637,
      "step": 4806
    },
    {
      "epoch": 0.9881796690307328,
      "grad_norm": 0.18621614575386047,
      "learning_rate": 7.044834331635098e-05,
      "loss": 0.6762,
      "step": 4807
    },
    {
      "epoch": 0.9883852400041114,
      "grad_norm": 0.17191414535045624,
      "learning_rate": 7.04401051435155e-05,
      "loss": 0.6615,
      "step": 4808
    },
    {
      "epoch": 0.98859081097749,
      "grad_norm": 0.1875917762517929,
      "learning_rate": 7.043186571736496e-05,
      "loss": 0.6757,
      "step": 4809
    },
    {
      "epoch": 0.9887963819508685,
      "grad_norm": 0.18261222541332245,
      "learning_rate": 7.042362503830527e-05,
      "loss": 0.635,
      "step": 4810
    },
    {
      "epoch": 0.9890019529242471,
      "grad_norm": 0.18588493764400482,
      "learning_rate": 7.04153831067424e-05,
      "loss": 0.6719,
      "step": 4811
    },
    {
      "epoch": 0.9892075238976257,
      "grad_norm": 0.1783093363046646,
      "learning_rate": 7.040713992308239e-05,
      "loss": 0.6538,
      "step": 4812
    },
    {
      "epoch": 0.9894130948710043,
      "grad_norm": 0.18314149975776672,
      "learning_rate": 7.039889548773136e-05,
      "loss": 0.6912,
      "step": 4813
    },
    {
      "epoch": 0.9896186658443827,
      "grad_norm": 0.18791064620018005,
      "learning_rate": 7.039064980109544e-05,
      "loss": 0.6627,
      "step": 4814
    },
    {
      "epoch": 0.9898242368177613,
      "grad_norm": 0.18856315314769745,
      "learning_rate": 7.038240286358089e-05,
      "loss": 0.5847,
      "step": 4815
    },
    {
      "epoch": 0.9900298077911399,
      "grad_norm": 0.19757792353630066,
      "learning_rate": 7.0374154675594e-05,
      "loss": 0.6815,
      "step": 4816
    },
    {
      "epoch": 0.9902353787645185,
      "grad_norm": 0.18688839673995972,
      "learning_rate": 7.036590523754109e-05,
      "loss": 0.6686,
      "step": 4817
    },
    {
      "epoch": 0.990440949737897,
      "grad_norm": 0.1844862699508667,
      "learning_rate": 7.035765454982861e-05,
      "loss": 0.6518,
      "step": 4818
    },
    {
      "epoch": 0.9906465207112756,
      "grad_norm": 0.18571245670318604,
      "learning_rate": 7.0349402612863e-05,
      "loss": 0.642,
      "step": 4819
    },
    {
      "epoch": 0.9908520916846542,
      "grad_norm": 0.1918804794549942,
      "learning_rate": 7.034114942705081e-05,
      "loss": 0.6427,
      "step": 4820
    },
    {
      "epoch": 0.9910576626580326,
      "grad_norm": 0.19276823103427887,
      "learning_rate": 7.033289499279863e-05,
      "loss": 0.6943,
      "step": 4821
    },
    {
      "epoch": 0.9912632336314112,
      "grad_norm": 0.18441876769065857,
      "learning_rate": 7.032463931051311e-05,
      "loss": 0.6596,
      "step": 4822
    },
    {
      "epoch": 0.9914688046047898,
      "grad_norm": 0.1893150806427002,
      "learning_rate": 7.031638238060099e-05,
      "loss": 0.6599,
      "step": 4823
    },
    {
      "epoch": 0.9916743755781684,
      "grad_norm": 0.18663519620895386,
      "learning_rate": 7.030812420346902e-05,
      "loss": 0.6508,
      "step": 4824
    },
    {
      "epoch": 0.9918799465515469,
      "grad_norm": 0.18189288675785065,
      "learning_rate": 7.029986477952409e-05,
      "loss": 0.5656,
      "step": 4825
    },
    {
      "epoch": 0.9920855175249255,
      "grad_norm": 0.1928027868270874,
      "learning_rate": 7.029160410917305e-05,
      "loss": 0.6758,
      "step": 4826
    },
    {
      "epoch": 0.992291088498304,
      "grad_norm": 0.19040422141551971,
      "learning_rate": 7.028334219282291e-05,
      "loss": 0.6546,
      "step": 4827
    },
    {
      "epoch": 0.9924966594716826,
      "grad_norm": 0.15369752049446106,
      "learning_rate": 7.027507903088066e-05,
      "loss": 0.5874,
      "step": 4828
    },
    {
      "epoch": 0.9927022304450611,
      "grad_norm": 0.13231946527957916,
      "learning_rate": 7.026681462375339e-05,
      "loss": 0.5761,
      "step": 4829
    },
    {
      "epoch": 0.9929078014184397,
      "grad_norm": 0.1998869776725769,
      "learning_rate": 7.025854897184828e-05,
      "loss": 0.6637,
      "step": 4830
    },
    {
      "epoch": 0.9931133723918183,
      "grad_norm": 0.18532314896583557,
      "learning_rate": 7.025028207557251e-05,
      "loss": 0.6492,
      "step": 4831
    },
    {
      "epoch": 0.9933189433651969,
      "grad_norm": 0.1902119517326355,
      "learning_rate": 7.024201393533337e-05,
      "loss": 0.6405,
      "step": 4832
    },
    {
      "epoch": 0.9935245143385754,
      "grad_norm": 0.17781443893909454,
      "learning_rate": 7.023374455153817e-05,
      "loss": 0.6644,
      "step": 4833
    },
    {
      "epoch": 0.993730085311954,
      "grad_norm": 0.1855769008398056,
      "learning_rate": 7.022547392459434e-05,
      "loss": 0.6642,
      "step": 4834
    },
    {
      "epoch": 0.9939356562853325,
      "grad_norm": 0.18379683792591095,
      "learning_rate": 7.02172020549093e-05,
      "loss": 0.6377,
      "step": 4835
    },
    {
      "epoch": 0.9941412272587111,
      "grad_norm": 0.17909419536590576,
      "learning_rate": 7.020892894289058e-05,
      "loss": 0.6393,
      "step": 4836
    },
    {
      "epoch": 0.9943467982320896,
      "grad_norm": 0.17869077622890472,
      "learning_rate": 7.020065458894575e-05,
      "loss": 0.6718,
      "step": 4837
    },
    {
      "epoch": 0.9945523692054682,
      "grad_norm": 0.18221206963062286,
      "learning_rate": 7.019237899348247e-05,
      "loss": 0.6812,
      "step": 4838
    },
    {
      "epoch": 0.9947579401788468,
      "grad_norm": 0.1849188208580017,
      "learning_rate": 7.018410215690841e-05,
      "loss": 0.586,
      "step": 4839
    },
    {
      "epoch": 0.9949635111522253,
      "grad_norm": 0.2003324180841446,
      "learning_rate": 7.017582407963136e-05,
      "loss": 0.6561,
      "step": 4840
    },
    {
      "epoch": 0.9951690821256038,
      "grad_norm": 0.19074849784374237,
      "learning_rate": 7.016754476205913e-05,
      "loss": 0.6509,
      "step": 4841
    },
    {
      "epoch": 0.9953746530989824,
      "grad_norm": 0.14551801979541779,
      "learning_rate": 7.01592642045996e-05,
      "loss": 0.5873,
      "step": 4842
    },
    {
      "epoch": 0.995580224072361,
      "grad_norm": 0.1880098134279251,
      "learning_rate": 7.01509824076607e-05,
      "loss": 0.6588,
      "step": 4843
    },
    {
      "epoch": 0.9957857950457395,
      "grad_norm": 0.18471471965312958,
      "learning_rate": 7.014269937165048e-05,
      "loss": 0.6426,
      "step": 4844
    },
    {
      "epoch": 0.9959913660191181,
      "grad_norm": 1.0276774168014526,
      "learning_rate": 7.013441509697696e-05,
      "loss": 0.6583,
      "step": 4845
    },
    {
      "epoch": 0.9961969369924967,
      "grad_norm": 0.13645489513874054,
      "learning_rate": 7.01261295840483e-05,
      "loss": 0.5675,
      "step": 4846
    },
    {
      "epoch": 0.9964025079658753,
      "grad_norm": 0.12980090081691742,
      "learning_rate": 7.011784283327266e-05,
      "loss": 0.5932,
      "step": 4847
    },
    {
      "epoch": 0.9966080789392537,
      "grad_norm": 0.21611304581165314,
      "learning_rate": 7.010955484505831e-05,
      "loss": 0.6966,
      "step": 4848
    },
    {
      "epoch": 0.9968136499126323,
      "grad_norm": 0.21331064403057098,
      "learning_rate": 7.010126561981356e-05,
      "loss": 0.6875,
      "step": 4849
    },
    {
      "epoch": 0.9970192208860109,
      "grad_norm": 0.19772310554981232,
      "learning_rate": 7.009297515794678e-05,
      "loss": 0.6665,
      "step": 4850
    },
    {
      "epoch": 0.9972247918593895,
      "grad_norm": 0.20528611540794373,
      "learning_rate": 7.008468345986637e-05,
      "loss": 0.6648,
      "step": 4851
    },
    {
      "epoch": 0.997430362832768,
      "grad_norm": 0.2074098140001297,
      "learning_rate": 7.007639052598088e-05,
      "loss": 0.6817,
      "step": 4852
    },
    {
      "epoch": 0.9976359338061466,
      "grad_norm": 0.19538044929504395,
      "learning_rate": 7.006809635669882e-05,
      "loss": 0.6414,
      "step": 4853
    },
    {
      "epoch": 0.9978415047795252,
      "grad_norm": 0.189046248793602,
      "learning_rate": 7.005980095242883e-05,
      "loss": 0.5861,
      "step": 4854
    },
    {
      "epoch": 0.9980470757529037,
      "grad_norm": 0.14991891384124756,
      "learning_rate": 7.005150431357957e-05,
      "loss": 0.5907,
      "step": 4855
    },
    {
      "epoch": 0.9982526467262822,
      "grad_norm": 0.24315035343170166,
      "learning_rate": 7.004320644055979e-05,
      "loss": 0.6664,
      "step": 4856
    },
    {
      "epoch": 0.9984582176996608,
      "grad_norm": 0.183399498462677,
      "learning_rate": 7.003490733377827e-05,
      "loss": 0.5846,
      "step": 4857
    },
    {
      "epoch": 0.9986637886730394,
      "grad_norm": 0.20931483805179596,
      "learning_rate": 7.002660699364389e-05,
      "loss": 0.6624,
      "step": 4858
    },
    {
      "epoch": 0.9988693596464179,
      "grad_norm": 0.19488368928432465,
      "learning_rate": 7.001830542056555e-05,
      "loss": 0.6757,
      "step": 4859
    },
    {
      "epoch": 0.9990749306197965,
      "grad_norm": 0.16465352475643158,
      "learning_rate": 7.001000261495223e-05,
      "loss": 0.5584,
      "step": 4860
    },
    {
      "epoch": 0.999280501593175,
      "grad_norm": 0.17341670393943787,
      "learning_rate": 7.0001698577213e-05,
      "loss": 0.5857,
      "step": 4861
    },
    {
      "epoch": 0.9994860725665536,
      "grad_norm": 0.21987827122211456,
      "learning_rate": 6.99933933077569e-05,
      "loss": 0.6423,
      "step": 4862
    },
    {
      "epoch": 0.9996916435399321,
      "grad_norm": 0.21325050294399261,
      "learning_rate": 6.998508680699317e-05,
      "loss": 0.6558,
      "step": 4863
    },
    {
      "epoch": 0.9998972145133107,
      "grad_norm": 0.1891472041606903,
      "learning_rate": 6.997677907533099e-05,
      "loss": 0.6461,
      "step": 4864
    },
    {
      "epoch": 1.0001027854866893,
      "grad_norm": 0.20316524803638458,
      "learning_rate": 6.996847011317963e-05,
      "loss": 0.5995,
      "step": 4865
    },
    {
      "epoch": 1.0003083564600679,
      "grad_norm": 0.271843820810318,
      "learning_rate": 6.996015992094846e-05,
      "loss": 0.5709,
      "step": 4866
    },
    {
      "epoch": 1.0005139274334465,
      "grad_norm": 0.22854308784008026,
      "learning_rate": 6.995184849904686e-05,
      "loss": 0.5628,
      "step": 4867
    },
    {
      "epoch": 1.000719498406825,
      "grad_norm": 0.20615056157112122,
      "learning_rate": 6.994353584788431e-05,
      "loss": 0.5559,
      "step": 4868
    },
    {
      "epoch": 1.0009250693802034,
      "grad_norm": 0.24276702105998993,
      "learning_rate": 6.993522196787035e-05,
      "loss": 0.5764,
      "step": 4869
    },
    {
      "epoch": 1.001130640353582,
      "grad_norm": 0.28377482295036316,
      "learning_rate": 6.992690685941454e-05,
      "loss": 0.5666,
      "step": 4870
    },
    {
      "epoch": 1.0013362113269606,
      "grad_norm": 0.2509450912475586,
      "learning_rate": 6.991859052292654e-05,
      "loss": 0.5716,
      "step": 4871
    },
    {
      "epoch": 1.0015417823003392,
      "grad_norm": 0.20262686908245087,
      "learning_rate": 6.991027295881606e-05,
      "loss": 0.5314,
      "step": 4872
    },
    {
      "epoch": 1.0017473532737178,
      "grad_norm": 0.1771395355463028,
      "learning_rate": 6.990195416749287e-05,
      "loss": 0.5826,
      "step": 4873
    },
    {
      "epoch": 1.0019529242470964,
      "grad_norm": 0.22669513523578644,
      "learning_rate": 6.989363414936676e-05,
      "loss": 0.5785,
      "step": 4874
    },
    {
      "epoch": 1.002158495220475,
      "grad_norm": 0.18329079449176788,
      "learning_rate": 6.988531290484768e-05,
      "loss": 0.5626,
      "step": 4875
    },
    {
      "epoch": 1.0023640661938533,
      "grad_norm": 0.17352893948554993,
      "learning_rate": 6.987699043434552e-05,
      "loss": 0.5549,
      "step": 4876
    },
    {
      "epoch": 1.002569637167232,
      "grad_norm": 0.2029443383216858,
      "learning_rate": 6.986866673827032e-05,
      "loss": 0.5679,
      "step": 4877
    },
    {
      "epoch": 1.0027752081406105,
      "grad_norm": 0.21238186955451965,
      "learning_rate": 6.986034181703216e-05,
      "loss": 0.579,
      "step": 4878
    },
    {
      "epoch": 1.002980779113989,
      "grad_norm": 0.20517666637897491,
      "learning_rate": 6.985201567104115e-05,
      "loss": 0.5578,
      "step": 4879
    },
    {
      "epoch": 1.0031863500873677,
      "grad_norm": 0.221823588013649,
      "learning_rate": 6.984368830070747e-05,
      "loss": 0.5559,
      "step": 4880
    },
    {
      "epoch": 1.0033919210607463,
      "grad_norm": 0.21827368438243866,
      "learning_rate": 6.98353597064414e-05,
      "loss": 0.5811,
      "step": 4881
    },
    {
      "epoch": 1.0035974920341248,
      "grad_norm": 0.20785865187644958,
      "learning_rate": 6.982702988865326e-05,
      "loss": 0.5226,
      "step": 4882
    },
    {
      "epoch": 1.0038030630075034,
      "grad_norm": 0.18137192726135254,
      "learning_rate": 6.981869884775336e-05,
      "loss": 0.5244,
      "step": 4883
    },
    {
      "epoch": 1.0040086339808818,
      "grad_norm": 0.18444296717643738,
      "learning_rate": 6.981036658415218e-05,
      "loss": 0.5603,
      "step": 4884
    },
    {
      "epoch": 1.0042142049542604,
      "grad_norm": 0.22535867989063263,
      "learning_rate": 6.980203309826021e-05,
      "loss": 0.5684,
      "step": 4885
    },
    {
      "epoch": 1.004419775927639,
      "grad_norm": 0.21289990842342377,
      "learning_rate": 6.979369839048799e-05,
      "loss": 0.5214,
      "step": 4886
    },
    {
      "epoch": 1.0046253469010176,
      "grad_norm": 0.16380147635936737,
      "learning_rate": 6.978536246124615e-05,
      "loss": 0.5145,
      "step": 4887
    },
    {
      "epoch": 1.0048309178743962,
      "grad_norm": 0.170881450176239,
      "learning_rate": 6.977702531094534e-05,
      "loss": 0.5329,
      "step": 4888
    },
    {
      "epoch": 1.0050364888477747,
      "grad_norm": 0.17499133944511414,
      "learning_rate": 6.976868693999629e-05,
      "loss": 0.5228,
      "step": 4889
    },
    {
      "epoch": 1.0052420598211533,
      "grad_norm": 0.20944778621196747,
      "learning_rate": 6.976034734880981e-05,
      "loss": 0.5465,
      "step": 4890
    },
    {
      "epoch": 1.005447630794532,
      "grad_norm": 0.20664618909358978,
      "learning_rate": 6.975200653779674e-05,
      "loss": 0.5645,
      "step": 4891
    },
    {
      "epoch": 1.0056532017679103,
      "grad_norm": 0.20452165603637695,
      "learning_rate": 6.974366450736801e-05,
      "loss": 0.536,
      "step": 4892
    },
    {
      "epoch": 1.0058587727412889,
      "grad_norm": 0.20522767305374146,
      "learning_rate": 6.973532125793457e-05,
      "loss": 0.5524,
      "step": 4893
    },
    {
      "epoch": 1.0060643437146675,
      "grad_norm": 0.20214490592479706,
      "learning_rate": 6.972697678990747e-05,
      "loss": 0.5829,
      "step": 4894
    },
    {
      "epoch": 1.006269914688046,
      "grad_norm": 0.19097676873207092,
      "learning_rate": 6.971863110369778e-05,
      "loss": 0.5589,
      "step": 4895
    },
    {
      "epoch": 1.0064754856614246,
      "grad_norm": 0.19728168845176697,
      "learning_rate": 6.97102841997167e-05,
      "loss": 0.5546,
      "step": 4896
    },
    {
      "epoch": 1.0066810566348032,
      "grad_norm": 0.1733403503894806,
      "learning_rate": 6.97019360783754e-05,
      "loss": 0.5264,
      "step": 4897
    },
    {
      "epoch": 1.0068866276081818,
      "grad_norm": 0.17879877984523773,
      "learning_rate": 6.969358674008516e-05,
      "loss": 0.5623,
      "step": 4898
    },
    {
      "epoch": 1.0070921985815602,
      "grad_norm": 0.19583040475845337,
      "learning_rate": 6.968523618525733e-05,
      "loss": 0.5773,
      "step": 4899
    },
    {
      "epoch": 1.0072977695549388,
      "grad_norm": 0.193648099899292,
      "learning_rate": 6.967688441430328e-05,
      "loss": 0.576,
      "step": 4900
    },
    {
      "epoch": 1.0075033405283174,
      "grad_norm": 0.1968041956424713,
      "learning_rate": 6.966853142763448e-05,
      "loss": 0.5513,
      "step": 4901
    },
    {
      "epoch": 1.007708911501696,
      "grad_norm": 0.196999654173851,
      "learning_rate": 6.966017722566246e-05,
      "loss": 0.5576,
      "step": 4902
    },
    {
      "epoch": 1.0079144824750745,
      "grad_norm": 0.19729197025299072,
      "learning_rate": 6.965182180879873e-05,
      "loss": 0.5689,
      "step": 4903
    },
    {
      "epoch": 1.0081200534484531,
      "grad_norm": 0.20436379313468933,
      "learning_rate": 6.964346517745498e-05,
      "loss": 0.5766,
      "step": 4904
    },
    {
      "epoch": 1.0083256244218317,
      "grad_norm": 0.19463692605495453,
      "learning_rate": 6.963510733204288e-05,
      "loss": 0.5477,
      "step": 4905
    },
    {
      "epoch": 1.0085311953952103,
      "grad_norm": 0.19440148770809174,
      "learning_rate": 6.962674827297418e-05,
      "loss": 0.5578,
      "step": 4906
    },
    {
      "epoch": 1.0087367663685887,
      "grad_norm": 0.19789615273475647,
      "learning_rate": 6.961838800066072e-05,
      "loss": 0.5559,
      "step": 4907
    },
    {
      "epoch": 1.0089423373419673,
      "grad_norm": 0.19245871901512146,
      "learning_rate": 6.961002651551432e-05,
      "loss": 0.5484,
      "step": 4908
    },
    {
      "epoch": 1.0091479083153458,
      "grad_norm": 0.19907300174236298,
      "learning_rate": 6.960166381794697e-05,
      "loss": 0.5343,
      "step": 4909
    },
    {
      "epoch": 1.0093534792887244,
      "grad_norm": 0.19037161767482758,
      "learning_rate": 6.959329990837061e-05,
      "loss": 0.5389,
      "step": 4910
    },
    {
      "epoch": 1.009559050262103,
      "grad_norm": 0.18873098492622375,
      "learning_rate": 6.958493478719733e-05,
      "loss": 0.5582,
      "step": 4911
    },
    {
      "epoch": 1.0097646212354816,
      "grad_norm": 0.19992291927337646,
      "learning_rate": 6.95765684548392e-05,
      "loss": 0.5746,
      "step": 4912
    },
    {
      "epoch": 1.0099701922088602,
      "grad_norm": 0.2006637305021286,
      "learning_rate": 6.956820091170844e-05,
      "loss": 0.5731,
      "step": 4913
    },
    {
      "epoch": 1.0101757631822386,
      "grad_norm": 0.1814371794462204,
      "learning_rate": 6.955983215821724e-05,
      "loss": 0.5409,
      "step": 4914
    },
    {
      "epoch": 1.0103813341556172,
      "grad_norm": 0.16987626254558563,
      "learning_rate": 6.955146219477788e-05,
      "loss": 0.5778,
      "step": 4915
    },
    {
      "epoch": 1.0105869051289957,
      "grad_norm": 0.19764257967472076,
      "learning_rate": 6.954309102180276e-05,
      "loss": 0.5729,
      "step": 4916
    },
    {
      "epoch": 1.0107924761023743,
      "grad_norm": 0.19731703400611877,
      "learning_rate": 6.953471863970424e-05,
      "loss": 0.5507,
      "step": 4917
    },
    {
      "epoch": 1.010998047075753,
      "grad_norm": 0.18993428349494934,
      "learning_rate": 6.952634504889484e-05,
      "loss": 0.5448,
      "step": 4918
    },
    {
      "epoch": 1.0112036180491315,
      "grad_norm": 0.1911395788192749,
      "learning_rate": 6.951797024978703e-05,
      "loss": 0.5319,
      "step": 4919
    },
    {
      "epoch": 1.01140918902251,
      "grad_norm": 0.2100227028131485,
      "learning_rate": 6.950959424279342e-05,
      "loss": 0.5865,
      "step": 4920
    },
    {
      "epoch": 1.0116147599958887,
      "grad_norm": 0.1891854703426361,
      "learning_rate": 6.950121702832666e-05,
      "loss": 0.5353,
      "step": 4921
    },
    {
      "epoch": 1.011820330969267,
      "grad_norm": 0.16457346081733704,
      "learning_rate": 6.949283860679946e-05,
      "loss": 0.519,
      "step": 4922
    },
    {
      "epoch": 1.0120259019426456,
      "grad_norm": 0.13976171612739563,
      "learning_rate": 6.948445897862458e-05,
      "loss": 0.5277,
      "step": 4923
    },
    {
      "epoch": 1.0122314729160242,
      "grad_norm": 0.17471906542778015,
      "learning_rate": 6.947607814421486e-05,
      "loss": 0.5693,
      "step": 4924
    },
    {
      "epoch": 1.0124370438894028,
      "grad_norm": 0.22914856672286987,
      "learning_rate": 6.946769610398316e-05,
      "loss": 0.5756,
      "step": 4925
    },
    {
      "epoch": 1.0126426148627814,
      "grad_norm": 0.20704926550388336,
      "learning_rate": 6.945931285834242e-05,
      "loss": 0.5726,
      "step": 4926
    },
    {
      "epoch": 1.01284818583616,
      "grad_norm": 0.19101639091968536,
      "learning_rate": 6.945092840770567e-05,
      "loss": 0.556,
      "step": 4927
    },
    {
      "epoch": 1.0130537568095386,
      "grad_norm": 0.2030901461839676,
      "learning_rate": 6.944254275248597e-05,
      "loss": 0.5723,
      "step": 4928
    },
    {
      "epoch": 1.013259327782917,
      "grad_norm": 0.24590568244457245,
      "learning_rate": 6.943415589309642e-05,
      "loss": 0.551,
      "step": 4929
    },
    {
      "epoch": 1.0134648987562955,
      "grad_norm": 0.1951897293329239,
      "learning_rate": 6.942576782995022e-05,
      "loss": 0.5712,
      "step": 4930
    },
    {
      "epoch": 1.0136704697296741,
      "grad_norm": 0.19376187026500702,
      "learning_rate": 6.94173785634606e-05,
      "loss": 0.5565,
      "step": 4931
    },
    {
      "epoch": 1.0138760407030527,
      "grad_norm": 0.20647601783275604,
      "learning_rate": 6.940898809404086e-05,
      "loss": 0.5822,
      "step": 4932
    },
    {
      "epoch": 1.0140816116764313,
      "grad_norm": 0.1940944641828537,
      "learning_rate": 6.940059642210438e-05,
      "loss": 0.5529,
      "step": 4933
    },
    {
      "epoch": 1.0142871826498099,
      "grad_norm": 0.18651318550109863,
      "learning_rate": 6.939220354806455e-05,
      "loss": 0.519,
      "step": 4934
    },
    {
      "epoch": 1.0144927536231885,
      "grad_norm": 0.17096173763275146,
      "learning_rate": 6.938380947233487e-05,
      "loss": 0.5716,
      "step": 4935
    },
    {
      "epoch": 1.014698324596567,
      "grad_norm": 0.20484822988510132,
      "learning_rate": 6.937541419532885e-05,
      "loss": 0.569,
      "step": 4936
    },
    {
      "epoch": 1.0149038955699454,
      "grad_norm": 2.4529898166656494,
      "learning_rate": 6.936701771746012e-05,
      "loss": 0.5871,
      "step": 4937
    },
    {
      "epoch": 1.015109466543324,
      "grad_norm": 0.1707240641117096,
      "learning_rate": 6.935862003914231e-05,
      "loss": 0.5322,
      "step": 4938
    },
    {
      "epoch": 1.0153150375167026,
      "grad_norm": 0.20285925269126892,
      "learning_rate": 6.935022116078915e-05,
      "loss": 0.5767,
      "step": 4939
    },
    {
      "epoch": 1.0155206084900812,
      "grad_norm": 0.23825408518314362,
      "learning_rate": 6.93418210828144e-05,
      "loss": 0.5537,
      "step": 4940
    },
    {
      "epoch": 1.0157261794634598,
      "grad_norm": 0.25726380944252014,
      "learning_rate": 6.93334198056319e-05,
      "loss": 0.5728,
      "step": 4941
    },
    {
      "epoch": 1.0159317504368384,
      "grad_norm": 0.2844366431236267,
      "learning_rate": 6.932501732965554e-05,
      "loss": 0.5752,
      "step": 4942
    },
    {
      "epoch": 1.016137321410217,
      "grad_norm": 0.24454839527606964,
      "learning_rate": 6.931661365529926e-05,
      "loss": 0.5687,
      "step": 4943
    },
    {
      "epoch": 1.0163428923835955,
      "grad_norm": 0.2527025043964386,
      "learning_rate": 6.930820878297711e-05,
      "loss": 0.5439,
      "step": 4944
    },
    {
      "epoch": 1.016548463356974,
      "grad_norm": 0.5170005559921265,
      "learning_rate": 6.92998027131031e-05,
      "loss": 0.5863,
      "step": 4945
    },
    {
      "epoch": 1.0167540343303525,
      "grad_norm": 0.2004466950893402,
      "learning_rate": 6.92913954460914e-05,
      "loss": 0.542,
      "step": 4946
    },
    {
      "epoch": 1.016959605303731,
      "grad_norm": 0.2018880397081375,
      "learning_rate": 6.928298698235619e-05,
      "loss": 0.5909,
      "step": 4947
    },
    {
      "epoch": 1.0171651762771097,
      "grad_norm": 0.21628795564174652,
      "learning_rate": 6.927457732231169e-05,
      "loss": 0.5622,
      "step": 4948
    },
    {
      "epoch": 1.0173707472504883,
      "grad_norm": 0.21719391644001007,
      "learning_rate": 6.926616646637225e-05,
      "loss": 0.5624,
      "step": 4949
    },
    {
      "epoch": 1.0175763182238668,
      "grad_norm": 0.20705457031726837,
      "learning_rate": 6.92577544149522e-05,
      "loss": 0.56,
      "step": 4950
    },
    {
      "epoch": 1.0177818891972454,
      "grad_norm": 0.1947423666715622,
      "learning_rate": 6.924934116846596e-05,
      "loss": 0.5193,
      "step": 4951
    },
    {
      "epoch": 1.0179874601706238,
      "grad_norm": 0.1868080198764801,
      "learning_rate": 6.924092672732802e-05,
      "loss": 0.5699,
      "step": 4952
    },
    {
      "epoch": 1.0181930311440024,
      "grad_norm": 0.2158852070569992,
      "learning_rate": 6.923251109195293e-05,
      "loss": 0.5611,
      "step": 4953
    },
    {
      "epoch": 1.018398602117381,
      "grad_norm": 0.17527857422828674,
      "learning_rate": 6.922409426275528e-05,
      "loss": 0.5361,
      "step": 4954
    },
    {
      "epoch": 1.0186041730907596,
      "grad_norm": 0.16154874861240387,
      "learning_rate": 6.921567624014973e-05,
      "loss": 0.5337,
      "step": 4955
    },
    {
      "epoch": 1.0188097440641382,
      "grad_norm": 0.18655456602573395,
      "learning_rate": 6.920725702455099e-05,
      "loss": 0.5684,
      "step": 4956
    },
    {
      "epoch": 1.0190153150375167,
      "grad_norm": 0.22478148341178894,
      "learning_rate": 6.919883661637383e-05,
      "loss": 0.5722,
      "step": 4957
    },
    {
      "epoch": 1.0192208860108953,
      "grad_norm": 0.20847651362419128,
      "learning_rate": 6.919041501603313e-05,
      "loss": 0.5891,
      "step": 4958
    },
    {
      "epoch": 1.019426456984274,
      "grad_norm": 0.17430467903614044,
      "learning_rate": 6.918199222394373e-05,
      "loss": 0.5449,
      "step": 4959
    },
    {
      "epoch": 1.0196320279576523,
      "grad_norm": 0.17865034937858582,
      "learning_rate": 6.917356824052059e-05,
      "loss": 0.54,
      "step": 4960
    },
    {
      "epoch": 1.0198375989310309,
      "grad_norm": 0.19386602938175201,
      "learning_rate": 6.916514306617874e-05,
      "loss": 0.5582,
      "step": 4961
    },
    {
      "epoch": 1.0200431699044095,
      "grad_norm": 0.1756899505853653,
      "learning_rate": 6.915671670133324e-05,
      "loss": 0.521,
      "step": 4962
    },
    {
      "epoch": 1.020248740877788,
      "grad_norm": 0.16583296656608582,
      "learning_rate": 6.914828914639922e-05,
      "loss": 0.5647,
      "step": 4963
    },
    {
      "epoch": 1.0204543118511666,
      "grad_norm": 0.19850464165210724,
      "learning_rate": 6.913986040179185e-05,
      "loss": 0.5415,
      "step": 4964
    },
    {
      "epoch": 1.0206598828245452,
      "grad_norm": 0.2507860064506531,
      "learning_rate": 6.913143046792639e-05,
      "loss": 0.5441,
      "step": 4965
    },
    {
      "epoch": 1.0208654537979238,
      "grad_norm": 0.19658030569553375,
      "learning_rate": 6.912299934521814e-05,
      "loss": 0.5782,
      "step": 4966
    },
    {
      "epoch": 1.0210710247713024,
      "grad_norm": 0.19466283917427063,
      "learning_rate": 6.911456703408246e-05,
      "loss": 0.5552,
      "step": 4967
    },
    {
      "epoch": 1.0212765957446808,
      "grad_norm": 0.16281276941299438,
      "learning_rate": 6.910613353493479e-05,
      "loss": 0.5291,
      "step": 4968
    },
    {
      "epoch": 1.0214821667180594,
      "grad_norm": 0.1634058803319931,
      "learning_rate": 6.909769884819057e-05,
      "loss": 0.5497,
      "step": 4969
    },
    {
      "epoch": 1.021687737691438,
      "grad_norm": 0.1930556446313858,
      "learning_rate": 6.908926297426537e-05,
      "loss": 0.5608,
      "step": 4970
    },
    {
      "epoch": 1.0218933086648165,
      "grad_norm": 0.19795656204223633,
      "learning_rate": 6.908082591357478e-05,
      "loss": 0.5729,
      "step": 4971
    },
    {
      "epoch": 1.0220988796381951,
      "grad_norm": 0.19776557385921478,
      "learning_rate": 6.907238766653445e-05,
      "loss": 0.5634,
      "step": 4972
    },
    {
      "epoch": 1.0223044506115737,
      "grad_norm": 0.19151826202869415,
      "learning_rate": 6.90639482335601e-05,
      "loss": 0.571,
      "step": 4973
    },
    {
      "epoch": 1.0225100215849523,
      "grad_norm": 0.18954800069332123,
      "learning_rate": 6.905550761506747e-05,
      "loss": 0.5519,
      "step": 4974
    },
    {
      "epoch": 1.0227155925583307,
      "grad_norm": 0.19335106015205383,
      "learning_rate": 6.904706581147243e-05,
      "loss": 0.5452,
      "step": 4975
    },
    {
      "epoch": 1.0229211635317093,
      "grad_norm": 0.20168174803256989,
      "learning_rate": 6.903862282319087e-05,
      "loss": 0.5838,
      "step": 4976
    },
    {
      "epoch": 1.0231267345050878,
      "grad_norm": 0.20087262988090515,
      "learning_rate": 6.90301786506387e-05,
      "loss": 0.5656,
      "step": 4977
    },
    {
      "epoch": 1.0233323054784664,
      "grad_norm": 0.1917273849248886,
      "learning_rate": 6.902173329423195e-05,
      "loss": 0.5679,
      "step": 4978
    },
    {
      "epoch": 1.023537876451845,
      "grad_norm": 0.1951013058423996,
      "learning_rate": 6.901328675438669e-05,
      "loss": 0.5635,
      "step": 4979
    },
    {
      "epoch": 1.0237434474252236,
      "grad_norm": 0.20168475806713104,
      "learning_rate": 6.9004839031519e-05,
      "loss": 0.5826,
      "step": 4980
    },
    {
      "epoch": 1.0239490183986022,
      "grad_norm": 0.19177857041358948,
      "learning_rate": 6.899639012604512e-05,
      "loss": 0.5675,
      "step": 4981
    },
    {
      "epoch": 1.0241545893719808,
      "grad_norm": 0.15916599333286285,
      "learning_rate": 6.898794003838124e-05,
      "loss": 0.5457,
      "step": 4982
    },
    {
      "epoch": 1.0243601603453591,
      "grad_norm": 0.1605004519224167,
      "learning_rate": 6.897948876894369e-05,
      "loss": 0.5663,
      "step": 4983
    },
    {
      "epoch": 1.0245657313187377,
      "grad_norm": 0.19485965371131897,
      "learning_rate": 6.897103631814878e-05,
      "loss": 0.5683,
      "step": 4984
    },
    {
      "epoch": 1.0247713022921163,
      "grad_norm": 0.1926756501197815,
      "learning_rate": 6.896258268641298e-05,
      "loss": 0.5525,
      "step": 4985
    },
    {
      "epoch": 1.024976873265495,
      "grad_norm": 0.19675122201442719,
      "learning_rate": 6.895412787415272e-05,
      "loss": 0.5811,
      "step": 4986
    },
    {
      "epoch": 1.0251824442388735,
      "grad_norm": 0.19753362238407135,
      "learning_rate": 6.894567188178454e-05,
      "loss": 0.582,
      "step": 4987
    },
    {
      "epoch": 1.025388015212252,
      "grad_norm": 0.195309117436409,
      "learning_rate": 6.893721470972502e-05,
      "loss": 0.5446,
      "step": 4988
    },
    {
      "epoch": 1.0255935861856307,
      "grad_norm": 0.19395774602890015,
      "learning_rate": 6.892875635839081e-05,
      "loss": 0.574,
      "step": 4989
    },
    {
      "epoch": 1.025799157159009,
      "grad_norm": 0.1611412912607193,
      "learning_rate": 6.892029682819864e-05,
      "loss": 0.5342,
      "step": 4990
    },
    {
      "epoch": 1.0260047281323876,
      "grad_norm": 0.1334841102361679,
      "learning_rate": 6.891183611956523e-05,
      "loss": 0.5458,
      "step": 4991
    },
    {
      "epoch": 1.0262102991057662,
      "grad_norm": 0.18414229154586792,
      "learning_rate": 6.890337423290743e-05,
      "loss": 0.5658,
      "step": 4992
    },
    {
      "epoch": 1.0264158700791448,
      "grad_norm": 0.20537151396274567,
      "learning_rate": 6.88949111686421e-05,
      "loss": 0.6111,
      "step": 4993
    },
    {
      "epoch": 1.0266214410525234,
      "grad_norm": 0.18854451179504395,
      "learning_rate": 6.88864469271862e-05,
      "loss": 0.5655,
      "step": 4994
    },
    {
      "epoch": 1.026827012025902,
      "grad_norm": 0.19057627022266388,
      "learning_rate": 6.887798150895667e-05,
      "loss": 0.5645,
      "step": 4995
    },
    {
      "epoch": 1.0270325829992806,
      "grad_norm": 0.22320972383022308,
      "learning_rate": 6.886951491437062e-05,
      "loss": 0.5688,
      "step": 4996
    },
    {
      "epoch": 1.0272381539726592,
      "grad_norm": 0.2112189084291458,
      "learning_rate": 6.886104714384512e-05,
      "loss": 0.5599,
      "step": 4997
    },
    {
      "epoch": 1.0274437249460375,
      "grad_norm": 0.1889009028673172,
      "learning_rate": 6.885257819779736e-05,
      "loss": 0.5472,
      "step": 4998
    },
    {
      "epoch": 1.0276492959194161,
      "grad_norm": 0.18562033772468567,
      "learning_rate": 6.884410807664456e-05,
      "loss": 0.5478,
      "step": 4999
    },
    {
      "epoch": 1.0278548668927947,
      "grad_norm": 0.1892947107553482,
      "learning_rate": 6.8835636780804e-05,
      "loss": 0.5561,
      "step": 5000
    },
    {
      "epoch": 1.0280604378661733,
      "grad_norm": 0.19414404034614563,
      "learning_rate": 6.882716431069303e-05,
      "loss": 0.5769,
      "step": 5001
    },
    {
      "epoch": 1.0282660088395519,
      "grad_norm": 0.194126158952713,
      "learning_rate": 6.881869066672904e-05,
      "loss": 0.5609,
      "step": 5002
    },
    {
      "epoch": 1.0284715798129305,
      "grad_norm": 0.1930353194475174,
      "learning_rate": 6.881021584932949e-05,
      "loss": 0.57,
      "step": 5003
    },
    {
      "epoch": 1.028677150786309,
      "grad_norm": 0.18623441457748413,
      "learning_rate": 6.88017398589119e-05,
      "loss": 0.5429,
      "step": 5004
    },
    {
      "epoch": 1.0288827217596874,
      "grad_norm": 0.1921243667602539,
      "learning_rate": 6.879326269589382e-05,
      "loss": 0.5579,
      "step": 5005
    },
    {
      "epoch": 1.029088292733066,
      "grad_norm": 0.18247570097446442,
      "learning_rate": 6.87847843606929e-05,
      "loss": 0.5402,
      "step": 5006
    },
    {
      "epoch": 1.0292938637064446,
      "grad_norm": 0.17088961601257324,
      "learning_rate": 6.877630485372684e-05,
      "loss": 0.5483,
      "step": 5007
    },
    {
      "epoch": 1.0294994346798232,
      "grad_norm": 0.20109815895557404,
      "learning_rate": 6.876782417541334e-05,
      "loss": 0.5541,
      "step": 5008
    },
    {
      "epoch": 1.0297050056532018,
      "grad_norm": 0.19609639048576355,
      "learning_rate": 6.875934232617027e-05,
      "loss": 0.5629,
      "step": 5009
    },
    {
      "epoch": 1.0299105766265804,
      "grad_norm": 0.19312147796154022,
      "learning_rate": 6.875085930641543e-05,
      "loss": 0.5603,
      "step": 5010
    },
    {
      "epoch": 1.030116147599959,
      "grad_norm": 0.1975242644548416,
      "learning_rate": 6.874237511656677e-05,
      "loss": 0.5763,
      "step": 5011
    },
    {
      "epoch": 1.0303217185733375,
      "grad_norm": 0.1999368965625763,
      "learning_rate": 6.873388975704225e-05,
      "loss": 0.5884,
      "step": 5012
    },
    {
      "epoch": 1.030527289546716,
      "grad_norm": 0.16335703432559967,
      "learning_rate": 6.872540322825994e-05,
      "loss": 0.5181,
      "step": 5013
    },
    {
      "epoch": 1.0307328605200945,
      "grad_norm": 0.17105185985565186,
      "learning_rate": 6.871691553063788e-05,
      "loss": 0.566,
      "step": 5014
    },
    {
      "epoch": 1.030938431493473,
      "grad_norm": 0.21174640953540802,
      "learning_rate": 6.870842666459425e-05,
      "loss": 0.5851,
      "step": 5015
    },
    {
      "epoch": 1.0311440024668517,
      "grad_norm": 0.19627945125102997,
      "learning_rate": 6.869993663054725e-05,
      "loss": 0.5655,
      "step": 5016
    },
    {
      "epoch": 1.0313495734402303,
      "grad_norm": 0.18857216835021973,
      "learning_rate": 6.869144542891517e-05,
      "loss": 0.5448,
      "step": 5017
    },
    {
      "epoch": 1.0315551444136088,
      "grad_norm": 0.16696830093860626,
      "learning_rate": 6.868295306011628e-05,
      "loss": 0.5241,
      "step": 5018
    },
    {
      "epoch": 1.0317607153869874,
      "grad_norm": 0.16772493720054626,
      "learning_rate": 6.867445952456899e-05,
      "loss": 0.5759,
      "step": 5019
    },
    {
      "epoch": 1.0319662863603658,
      "grad_norm": 0.19662131369113922,
      "learning_rate": 6.866596482269175e-05,
      "loss": 0.5647,
      "step": 5020
    },
    {
      "epoch": 1.0321718573337444,
      "grad_norm": 0.20151005685329437,
      "learning_rate": 6.8657468954903e-05,
      "loss": 0.5577,
      "step": 5021
    },
    {
      "epoch": 1.032377428307123,
      "grad_norm": 0.16064363718032837,
      "learning_rate": 6.864897192162136e-05,
      "loss": 0.5182,
      "step": 5022
    },
    {
      "epoch": 1.0325829992805016,
      "grad_norm": 0.16647809743881226,
      "learning_rate": 6.864047372326539e-05,
      "loss": 0.5572,
      "step": 5023
    },
    {
      "epoch": 1.0327885702538802,
      "grad_norm": 0.18600209057331085,
      "learning_rate": 6.86319743602538e-05,
      "loss": 0.5426,
      "step": 5024
    },
    {
      "epoch": 1.0329941412272587,
      "grad_norm": 0.15970858931541443,
      "learning_rate": 6.862347383300529e-05,
      "loss": 0.5119,
      "step": 5025
    },
    {
      "epoch": 1.0331997122006373,
      "grad_norm": 0.16188785433769226,
      "learning_rate": 6.861497214193861e-05,
      "loss": 0.5732,
      "step": 5026
    },
    {
      "epoch": 1.033405283174016,
      "grad_norm": 0.19583800435066223,
      "learning_rate": 6.860646928747265e-05,
      "loss": 0.5387,
      "step": 5027
    },
    {
      "epoch": 1.0336108541473943,
      "grad_norm": 0.19050170481204987,
      "learning_rate": 6.859796527002627e-05,
      "loss": 0.5715,
      "step": 5028
    },
    {
      "epoch": 1.0338164251207729,
      "grad_norm": 0.19282078742980957,
      "learning_rate": 6.858946009001844e-05,
      "loss": 0.5717,
      "step": 5029
    },
    {
      "epoch": 1.0340219960941515,
      "grad_norm": 0.19418777525424957,
      "learning_rate": 6.858095374786818e-05,
      "loss": 0.558,
      "step": 5030
    },
    {
      "epoch": 1.03422756706753,
      "grad_norm": 0.2037775069475174,
      "learning_rate": 6.857244624399455e-05,
      "loss": 0.5487,
      "step": 5031
    },
    {
      "epoch": 1.0344331380409086,
      "grad_norm": 0.20054981112480164,
      "learning_rate": 6.856393757881665e-05,
      "loss": 0.5565,
      "step": 5032
    },
    {
      "epoch": 1.0346387090142872,
      "grad_norm": 0.2035524547100067,
      "learning_rate": 6.855542775275369e-05,
      "loss": 0.5952,
      "step": 5033
    },
    {
      "epoch": 1.0348442799876658,
      "grad_norm": 0.1957496851682663,
      "learning_rate": 6.854691676622492e-05,
      "loss": 0.5563,
      "step": 5034
    },
    {
      "epoch": 1.0350498509610444,
      "grad_norm": 0.21326994895935059,
      "learning_rate": 6.853840461964961e-05,
      "loss": 0.5745,
      "step": 5035
    },
    {
      "epoch": 1.0352554219344228,
      "grad_norm": 0.16329696774482727,
      "learning_rate": 6.852989131344712e-05,
      "loss": 0.5331,
      "step": 5036
    },
    {
      "epoch": 1.0354609929078014,
      "grad_norm": 0.16014549136161804,
      "learning_rate": 6.852137684803686e-05,
      "loss": 0.5432,
      "step": 5037
    },
    {
      "epoch": 1.03566656388118,
      "grad_norm": 0.164669468998909,
      "learning_rate": 6.851286122383831e-05,
      "loss": 0.5325,
      "step": 5038
    },
    {
      "epoch": 1.0358721348545585,
      "grad_norm": 0.16093246638774872,
      "learning_rate": 6.850434444127098e-05,
      "loss": 0.5639,
      "step": 5039
    },
    {
      "epoch": 1.0360777058279371,
      "grad_norm": 0.20291577279567719,
      "learning_rate": 6.849582650075445e-05,
      "loss": 0.5414,
      "step": 5040
    },
    {
      "epoch": 1.0362832768013157,
      "grad_norm": 0.16935724020004272,
      "learning_rate": 6.848730740270839e-05,
      "loss": 0.5082,
      "step": 5041
    },
    {
      "epoch": 1.0364888477746943,
      "grad_norm": 0.1641445755958557,
      "learning_rate": 6.847878714755244e-05,
      "loss": 0.5472,
      "step": 5042
    },
    {
      "epoch": 1.0366944187480727,
      "grad_norm": 0.196893572807312,
      "learning_rate": 6.847026573570642e-05,
      "loss": 0.5856,
      "step": 5043
    },
    {
      "epoch": 1.0368999897214513,
      "grad_norm": 0.19081740081310272,
      "learning_rate": 6.846174316759007e-05,
      "loss": 0.5622,
      "step": 5044
    },
    {
      "epoch": 1.0371055606948298,
      "grad_norm": 0.16471846401691437,
      "learning_rate": 6.845321944362332e-05,
      "loss": 0.5485,
      "step": 5045
    },
    {
      "epoch": 1.0373111316682084,
      "grad_norm": 0.16739195585250854,
      "learning_rate": 6.844469456422606e-05,
      "loss": 0.5717,
      "step": 5046
    },
    {
      "epoch": 1.037516702641587,
      "grad_norm": 0.19602900743484497,
      "learning_rate": 6.843616852981831e-05,
      "loss": 0.563,
      "step": 5047
    },
    {
      "epoch": 1.0377222736149656,
      "grad_norm": 0.19288770854473114,
      "learning_rate": 6.842764134082004e-05,
      "loss": 0.5641,
      "step": 5048
    },
    {
      "epoch": 1.0379278445883442,
      "grad_norm": 0.18607531487941742,
      "learning_rate": 6.841911299765141e-05,
      "loss": 0.5437,
      "step": 5049
    },
    {
      "epoch": 1.0381334155617228,
      "grad_norm": 0.20571644604206085,
      "learning_rate": 6.84105835007325e-05,
      "loss": 0.5444,
      "step": 5050
    },
    {
      "epoch": 1.0383389865351011,
      "grad_norm": 0.2017316222190857,
      "learning_rate": 6.840205285048359e-05,
      "loss": 0.5615,
      "step": 5051
    },
    {
      "epoch": 1.0385445575084797,
      "grad_norm": 0.19289067387580872,
      "learning_rate": 6.839352104732492e-05,
      "loss": 0.5715,
      "step": 5052
    },
    {
      "epoch": 1.0387501284818583,
      "grad_norm": 0.19483251869678497,
      "learning_rate": 6.838498809167681e-05,
      "loss": 0.5936,
      "step": 5053
    },
    {
      "epoch": 1.038955699455237,
      "grad_norm": 0.17118024826049805,
      "learning_rate": 6.837645398395962e-05,
      "loss": 0.5091,
      "step": 5054
    },
    {
      "epoch": 1.0391612704286155,
      "grad_norm": 0.15377415716648102,
      "learning_rate": 6.836791872459382e-05,
      "loss": 0.5493,
      "step": 5055
    },
    {
      "epoch": 1.039366841401994,
      "grad_norm": 0.16883303225040436,
      "learning_rate": 6.835938231399989e-05,
      "loss": 0.5309,
      "step": 5056
    },
    {
      "epoch": 1.0395724123753727,
      "grad_norm": 0.18094538152217865,
      "learning_rate": 6.835084475259835e-05,
      "loss": 0.5579,
      "step": 5057
    },
    {
      "epoch": 1.0397779833487513,
      "grad_norm": 0.1928682029247284,
      "learning_rate": 6.834230604080986e-05,
      "loss": 0.5608,
      "step": 5058
    },
    {
      "epoch": 1.0399835543221296,
      "grad_norm": 0.16030603647232056,
      "learning_rate": 6.833376617905504e-05,
      "loss": 0.5228,
      "step": 5059
    },
    {
      "epoch": 1.0401891252955082,
      "grad_norm": 0.16263644397258759,
      "learning_rate": 6.832522516775462e-05,
      "loss": 0.5724,
      "step": 5060
    },
    {
      "epoch": 1.0403946962688868,
      "grad_norm": 0.2015358805656433,
      "learning_rate": 6.831668300732938e-05,
      "loss": 0.5652,
      "step": 5061
    },
    {
      "epoch": 1.0406002672422654,
      "grad_norm": 0.19693398475646973,
      "learning_rate": 6.830813969820015e-05,
      "loss": 0.5457,
      "step": 5062
    },
    {
      "epoch": 1.040805838215644,
      "grad_norm": 0.19118033349514008,
      "learning_rate": 6.829959524078782e-05,
      "loss": 0.5615,
      "step": 5063
    },
    {
      "epoch": 1.0410114091890226,
      "grad_norm": 0.2018975466489792,
      "learning_rate": 6.829104963551332e-05,
      "loss": 0.5883,
      "step": 5064
    },
    {
      "epoch": 1.0412169801624012,
      "grad_norm": 0.1714175045490265,
      "learning_rate": 6.828250288279768e-05,
      "loss": 0.514,
      "step": 5065
    },
    {
      "epoch": 1.0414225511357795,
      "grad_norm": 0.1588568538427353,
      "learning_rate": 6.827395498306195e-05,
      "loss": 0.5603,
      "step": 5066
    },
    {
      "epoch": 1.0416281221091581,
      "grad_norm": 0.18845230340957642,
      "learning_rate": 6.826540593672724e-05,
      "loss": 0.5685,
      "step": 5067
    },
    {
      "epoch": 1.0418336930825367,
      "grad_norm": 0.19369468092918396,
      "learning_rate": 6.825685574421471e-05,
      "loss": 0.5599,
      "step": 5068
    },
    {
      "epoch": 1.0420392640559153,
      "grad_norm": 0.19052891433238983,
      "learning_rate": 6.824830440594561e-05,
      "loss": 0.5593,
      "step": 5069
    },
    {
      "epoch": 1.0422448350292939,
      "grad_norm": 0.19876545667648315,
      "learning_rate": 6.823975192234123e-05,
      "loss": 0.5911,
      "step": 5070
    },
    {
      "epoch": 1.0424504060026725,
      "grad_norm": 0.19648174941539764,
      "learning_rate": 6.823119829382285e-05,
      "loss": 0.544,
      "step": 5071
    },
    {
      "epoch": 1.042655976976051,
      "grad_norm": 0.20026130974292755,
      "learning_rate": 6.822264352081194e-05,
      "loss": 0.5574,
      "step": 5072
    },
    {
      "epoch": 1.0428615479494296,
      "grad_norm": 0.18708030879497528,
      "learning_rate": 6.821408760372994e-05,
      "loss": 0.5367,
      "step": 5073
    },
    {
      "epoch": 1.043067118922808,
      "grad_norm": 0.18605582416057587,
      "learning_rate": 6.820553054299832e-05,
      "loss": 0.5383,
      "step": 5074
    },
    {
      "epoch": 1.0432726898961866,
      "grad_norm": 0.19726519286632538,
      "learning_rate": 6.81969723390387e-05,
      "loss": 0.5922,
      "step": 5075
    },
    {
      "epoch": 1.0434782608695652,
      "grad_norm": 0.1689324975013733,
      "learning_rate": 6.818841299227264e-05,
      "loss": 0.5238,
      "step": 5076
    },
    {
      "epoch": 1.0436838318429438,
      "grad_norm": 0.16380931437015533,
      "learning_rate": 6.817985250312187e-05,
      "loss": 0.5622,
      "step": 5077
    },
    {
      "epoch": 1.0438894028163224,
      "grad_norm": 0.18795007467269897,
      "learning_rate": 6.817129087200812e-05,
      "loss": 0.5529,
      "step": 5078
    },
    {
      "epoch": 1.044094973789701,
      "grad_norm": 0.21080972254276276,
      "learning_rate": 6.816272809935315e-05,
      "loss": 0.5553,
      "step": 5079
    },
    {
      "epoch": 1.0443005447630795,
      "grad_norm": 0.19788028299808502,
      "learning_rate": 6.815416418557885e-05,
      "loss": 0.5868,
      "step": 5080
    },
    {
      "epoch": 1.044506115736458,
      "grad_norm": 0.17419970035552979,
      "learning_rate": 6.81455991311071e-05,
      "loss": 0.5412,
      "step": 5081
    },
    {
      "epoch": 1.0447116867098365,
      "grad_norm": 0.1657952070236206,
      "learning_rate": 6.813703293635986e-05,
      "loss": 0.557,
      "step": 5082
    },
    {
      "epoch": 1.044917257683215,
      "grad_norm": 0.19872242212295532,
      "learning_rate": 6.812846560175916e-05,
      "loss": 0.5702,
      "step": 5083
    },
    {
      "epoch": 1.0451228286565937,
      "grad_norm": 0.18654018640518188,
      "learning_rate": 6.811989712772704e-05,
      "loss": 0.5414,
      "step": 5084
    },
    {
      "epoch": 1.0453283996299723,
      "grad_norm": 0.1918267160654068,
      "learning_rate": 6.811132751468566e-05,
      "loss": 0.5687,
      "step": 5085
    },
    {
      "epoch": 1.0455339706033508,
      "grad_norm": 0.1659933179616928,
      "learning_rate": 6.810275676305719e-05,
      "loss": 0.5324,
      "step": 5086
    },
    {
      "epoch": 1.0457395415767294,
      "grad_norm": 0.13235360383987427,
      "learning_rate": 6.809418487326388e-05,
      "loss": 0.5161,
      "step": 5087
    },
    {
      "epoch": 1.045945112550108,
      "grad_norm": 0.17006467282772064,
      "learning_rate": 6.808561184572802e-05,
      "loss": 0.5641,
      "step": 5088
    },
    {
      "epoch": 1.0461506835234864,
      "grad_norm": 0.20476646721363068,
      "learning_rate": 6.807703768087196e-05,
      "loss": 0.5604,
      "step": 5089
    },
    {
      "epoch": 1.046356254496865,
      "grad_norm": 0.2010469287633896,
      "learning_rate": 6.806846237911815e-05,
      "loss": 0.559,
      "step": 5090
    },
    {
      "epoch": 1.0465618254702436,
      "grad_norm": 0.1989106982946396,
      "learning_rate": 6.805988594088898e-05,
      "loss": 0.5642,
      "step": 5091
    },
    {
      "epoch": 1.0467673964436222,
      "grad_norm": 0.17077521979808807,
      "learning_rate": 6.805130836660703e-05,
      "loss": 0.537,
      "step": 5092
    },
    {
      "epoch": 1.0469729674170007,
      "grad_norm": 0.16533872485160828,
      "learning_rate": 6.804272965669486e-05,
      "loss": 0.5552,
      "step": 5093
    },
    {
      "epoch": 1.0471785383903793,
      "grad_norm": 0.19124990701675415,
      "learning_rate": 6.80341498115751e-05,
      "loss": 0.5574,
      "step": 5094
    },
    {
      "epoch": 1.047384109363758,
      "grad_norm": 0.19139717519283295,
      "learning_rate": 6.802556883167043e-05,
      "loss": 0.5446,
      "step": 5095
    },
    {
      "epoch": 1.0475896803371363,
      "grad_norm": 0.19126202166080475,
      "learning_rate": 6.801698671740362e-05,
      "loss": 0.5634,
      "step": 5096
    },
    {
      "epoch": 1.0477952513105149,
      "grad_norm": 0.19115038216114044,
      "learning_rate": 6.800840346919744e-05,
      "loss": 0.5393,
      "step": 5097
    },
    {
      "epoch": 1.0480008222838935,
      "grad_norm": 0.1927635818719864,
      "learning_rate": 6.799981908747476e-05,
      "loss": 0.5527,
      "step": 5098
    },
    {
      "epoch": 1.048206393257272,
      "grad_norm": 0.20182408392429352,
      "learning_rate": 6.799123357265852e-05,
      "loss": 0.5691,
      "step": 5099
    },
    {
      "epoch": 1.0484119642306506,
      "grad_norm": 0.1980399638414383,
      "learning_rate": 6.798264692517165e-05,
      "loss": 0.5593,
      "step": 5100
    },
    {
      "epoch": 1.0486175352040292,
      "grad_norm": 0.19788923859596252,
      "learning_rate": 6.797405914543717e-05,
      "loss": 0.571,
      "step": 5101
    },
    {
      "epoch": 1.0488231061774078,
      "grad_norm": 0.18928498029708862,
      "learning_rate": 6.79654702338782e-05,
      "loss": 0.5616,
      "step": 5102
    },
    {
      "epoch": 1.0490286771507864,
      "grad_norm": 0.18653394281864166,
      "learning_rate": 6.795688019091784e-05,
      "loss": 0.5553,
      "step": 5103
    },
    {
      "epoch": 1.0492342481241648,
      "grad_norm": 0.17353960871696472,
      "learning_rate": 6.79482890169793e-05,
      "loss": 0.5348,
      "step": 5104
    },
    {
      "epoch": 1.0494398190975434,
      "grad_norm": 0.1659521758556366,
      "learning_rate": 6.79396967124858e-05,
      "loss": 0.5799,
      "step": 5105
    },
    {
      "epoch": 1.049645390070922,
      "grad_norm": 0.16796258091926575,
      "learning_rate": 6.79311032778607e-05,
      "loss": 0.5178,
      "step": 5106
    },
    {
      "epoch": 1.0498509610443005,
      "grad_norm": 0.16122405230998993,
      "learning_rate": 6.79225087135273e-05,
      "loss": 0.555,
      "step": 5107
    },
    {
      "epoch": 1.0500565320176791,
      "grad_norm": 0.20025917887687683,
      "learning_rate": 6.791391301990902e-05,
      "loss": 0.5649,
      "step": 5108
    },
    {
      "epoch": 1.0502621029910577,
      "grad_norm": 0.19537703692913055,
      "learning_rate": 6.790531619742936e-05,
      "loss": 0.5517,
      "step": 5109
    },
    {
      "epoch": 1.0504676739644363,
      "grad_norm": 0.18383800983428955,
      "learning_rate": 6.789671824651183e-05,
      "loss": 0.5673,
      "step": 5110
    },
    {
      "epoch": 1.0506732449378149,
      "grad_norm": 0.19807079434394836,
      "learning_rate": 6.788811916758002e-05,
      "loss": 0.5811,
      "step": 5111
    },
    {
      "epoch": 1.0508788159111933,
      "grad_norm": 0.19630371034145355,
      "learning_rate": 6.787951896105754e-05,
      "loss": 0.5306,
      "step": 5112
    },
    {
      "epoch": 1.0510843868845718,
      "grad_norm": 0.18975067138671875,
      "learning_rate": 6.78709176273681e-05,
      "loss": 0.569,
      "step": 5113
    },
    {
      "epoch": 1.0512899578579504,
      "grad_norm": 0.20035415887832642,
      "learning_rate": 6.786231516693547e-05,
      "loss": 0.5387,
      "step": 5114
    },
    {
      "epoch": 1.051495528831329,
      "grad_norm": 0.1959179788827896,
      "learning_rate": 6.785371158018341e-05,
      "loss": 0.5653,
      "step": 5115
    },
    {
      "epoch": 1.0517010998047076,
      "grad_norm": 0.16765445470809937,
      "learning_rate": 6.78451068675358e-05,
      "loss": 0.5406,
      "step": 5116
    },
    {
      "epoch": 1.0519066707780862,
      "grad_norm": 0.15998391807079315,
      "learning_rate": 6.783650102941656e-05,
      "loss": 0.5506,
      "step": 5117
    },
    {
      "epoch": 1.0521122417514648,
      "grad_norm": 0.19628630578517914,
      "learning_rate": 6.782789406624964e-05,
      "loss": 0.5581,
      "step": 5118
    },
    {
      "epoch": 1.0523178127248431,
      "grad_norm": 0.20828314125537872,
      "learning_rate": 6.781928597845909e-05,
      "loss": 0.549,
      "step": 5119
    },
    {
      "epoch": 1.0525233836982217,
      "grad_norm": 0.1985846757888794,
      "learning_rate": 6.781067676646896e-05,
      "loss": 0.5625,
      "step": 5120
    },
    {
      "epoch": 1.0527289546716003,
      "grad_norm": 0.21041736006736755,
      "learning_rate": 6.780206643070343e-05,
      "loss": 0.5387,
      "step": 5121
    },
    {
      "epoch": 1.052934525644979,
      "grad_norm": 0.20188267529010773,
      "learning_rate": 6.779345497158664e-05,
      "loss": 0.5511,
      "step": 5122
    },
    {
      "epoch": 1.0531400966183575,
      "grad_norm": 0.19628292322158813,
      "learning_rate": 6.778484238954287e-05,
      "loss": 0.5509,
      "step": 5123
    },
    {
      "epoch": 1.053345667591736,
      "grad_norm": 0.19556038081645966,
      "learning_rate": 6.77762286849964e-05,
      "loss": 0.5563,
      "step": 5124
    },
    {
      "epoch": 1.0535512385651147,
      "grad_norm": 0.18803851306438446,
      "learning_rate": 6.776761385837161e-05,
      "loss": 0.5833,
      "step": 5125
    },
    {
      "epoch": 1.0537568095384933,
      "grad_norm": 0.20179903507232666,
      "learning_rate": 6.77589979100929e-05,
      "loss": 0.5745,
      "step": 5126
    },
    {
      "epoch": 1.0539623805118716,
      "grad_norm": 0.19985097646713257,
      "learning_rate": 6.775038084058473e-05,
      "loss": 0.5741,
      "step": 5127
    },
    {
      "epoch": 1.0541679514852502,
      "grad_norm": 0.19471241533756256,
      "learning_rate": 6.774176265027164e-05,
      "loss": 0.569,
      "step": 5128
    },
    {
      "epoch": 1.0543735224586288,
      "grad_norm": 0.18633931875228882,
      "learning_rate": 6.77331433395782e-05,
      "loss": 0.5274,
      "step": 5129
    },
    {
      "epoch": 1.0545790934320074,
      "grad_norm": 0.18471461534500122,
      "learning_rate": 6.772452290892902e-05,
      "loss": 0.5643,
      "step": 5130
    },
    {
      "epoch": 1.054784664405386,
      "grad_norm": 0.16938886046409607,
      "learning_rate": 6.771590135874883e-05,
      "loss": 0.5321,
      "step": 5131
    },
    {
      "epoch": 1.0549902353787646,
      "grad_norm": 0.16139011085033417,
      "learning_rate": 6.770727868946237e-05,
      "loss": 0.5531,
      "step": 5132
    },
    {
      "epoch": 1.0551958063521432,
      "grad_norm": 0.1963978409767151,
      "learning_rate": 6.769865490149439e-05,
      "loss": 0.5727,
      "step": 5133
    },
    {
      "epoch": 1.0554013773255218,
      "grad_norm": 0.16599130630493164,
      "learning_rate": 6.76900299952698e-05,
      "loss": 0.5472,
      "step": 5134
    },
    {
      "epoch": 1.0556069482989001,
      "grad_norm": 0.16204878687858582,
      "learning_rate": 6.768140397121347e-05,
      "loss": 0.5799,
      "step": 5135
    },
    {
      "epoch": 1.0558125192722787,
      "grad_norm": 0.19888906180858612,
      "learning_rate": 6.767277682975037e-05,
      "loss": 0.571,
      "step": 5136
    },
    {
      "epoch": 1.0560180902456573,
      "grad_norm": 0.19019466638565063,
      "learning_rate": 6.766414857130556e-05,
      "loss": 0.5547,
      "step": 5137
    },
    {
      "epoch": 1.0562236612190359,
      "grad_norm": 0.19364401698112488,
      "learning_rate": 6.765551919630407e-05,
      "loss": 0.5889,
      "step": 5138
    },
    {
      "epoch": 1.0564292321924145,
      "grad_norm": 0.19653116166591644,
      "learning_rate": 6.764688870517104e-05,
      "loss": 0.5778,
      "step": 5139
    },
    {
      "epoch": 1.056634803165793,
      "grad_norm": 0.20161549746990204,
      "learning_rate": 6.763825709833164e-05,
      "loss": 0.5708,
      "step": 5140
    },
    {
      "epoch": 1.0568403741391716,
      "grad_norm": 0.19760295748710632,
      "learning_rate": 6.762962437621112e-05,
      "loss": 0.5555,
      "step": 5141
    },
    {
      "epoch": 1.05704594511255,
      "grad_norm": 0.1907760202884674,
      "learning_rate": 6.76209905392348e-05,
      "loss": 0.5496,
      "step": 5142
    },
    {
      "epoch": 1.0572515160859286,
      "grad_norm": 0.19071267545223236,
      "learning_rate": 6.7612355587828e-05,
      "loss": 0.5601,
      "step": 5143
    },
    {
      "epoch": 1.0574570870593072,
      "grad_norm": 0.1920759677886963,
      "learning_rate": 6.760371952241613e-05,
      "loss": 0.5783,
      "step": 5144
    },
    {
      "epoch": 1.0576626580326858,
      "grad_norm": 0.17088396847248077,
      "learning_rate": 6.759508234342465e-05,
      "loss": 0.5436,
      "step": 5145
    },
    {
      "epoch": 1.0578682290060644,
      "grad_norm": 0.13676489889621735,
      "learning_rate": 6.758644405127908e-05,
      "loss": 0.5261,
      "step": 5146
    },
    {
      "epoch": 1.058073799979443,
      "grad_norm": 0.16138045489788055,
      "learning_rate": 6.757780464640496e-05,
      "loss": 0.5751,
      "step": 5147
    },
    {
      "epoch": 1.0582793709528215,
      "grad_norm": 0.191030353307724,
      "learning_rate": 6.756916412922794e-05,
      "loss": 0.5597,
      "step": 5148
    },
    {
      "epoch": 1.0584849419262001,
      "grad_norm": 0.19613182544708252,
      "learning_rate": 6.75605225001737e-05,
      "loss": 0.5644,
      "step": 5149
    },
    {
      "epoch": 1.0586905128995785,
      "grad_norm": 0.1948171854019165,
      "learning_rate": 6.755187975966795e-05,
      "loss": 0.5637,
      "step": 5150
    },
    {
      "epoch": 1.058896083872957,
      "grad_norm": 0.1817820966243744,
      "learning_rate": 6.754323590813649e-05,
      "loss": 0.5389,
      "step": 5151
    },
    {
      "epoch": 1.0591016548463357,
      "grad_norm": 0.1902126520872116,
      "learning_rate": 6.753459094600518e-05,
      "loss": 0.5745,
      "step": 5152
    },
    {
      "epoch": 1.0593072258197143,
      "grad_norm": 0.19361747801303864,
      "learning_rate": 6.752594487369989e-05,
      "loss": 0.5834,
      "step": 5153
    },
    {
      "epoch": 1.0595127967930928,
      "grad_norm": 0.176842600107193,
      "learning_rate": 6.751729769164659e-05,
      "loss": 0.5306,
      "step": 5154
    },
    {
      "epoch": 1.0597183677664714,
      "grad_norm": 0.16082750260829926,
      "learning_rate": 6.750864940027127e-05,
      "loss": 0.5461,
      "step": 5155
    },
    {
      "epoch": 1.05992393873985,
      "grad_norm": 0.17407187819480896,
      "learning_rate": 6.75e-05,
      "loss": 0.5479,
      "step": 5156
    },
    {
      "epoch": 1.0601295097132284,
      "grad_norm": 0.2048111855983734,
      "learning_rate": 6.74913494912589e-05,
      "loss": 0.5636,
      "step": 5157
    },
    {
      "epoch": 1.060335080686607,
      "grad_norm": 0.19643527269363403,
      "learning_rate": 6.748269787447414e-05,
      "loss": 0.577,
      "step": 5158
    },
    {
      "epoch": 1.0605406516599856,
      "grad_norm": 0.19927412271499634,
      "learning_rate": 6.747404515007194e-05,
      "loss": 0.5753,
      "step": 5159
    },
    {
      "epoch": 1.0607462226333642,
      "grad_norm": 0.20352114737033844,
      "learning_rate": 6.746539131847856e-05,
      "loss": 0.5699,
      "step": 5160
    },
    {
      "epoch": 1.0609517936067427,
      "grad_norm": 0.19683125615119934,
      "learning_rate": 6.745673638012037e-05,
      "loss": 0.5847,
      "step": 5161
    },
    {
      "epoch": 1.0611573645801213,
      "grad_norm": 0.19472289085388184,
      "learning_rate": 6.744808033542373e-05,
      "loss": 0.5613,
      "step": 5162
    },
    {
      "epoch": 1.0613629355535,
      "grad_norm": 0.19928975403308868,
      "learning_rate": 6.74394231848151e-05,
      "loss": 0.5638,
      "step": 5163
    },
    {
      "epoch": 1.0615685065268785,
      "grad_norm": 0.2239234298467636,
      "learning_rate": 6.743076492872096e-05,
      "loss": 0.5674,
      "step": 5164
    },
    {
      "epoch": 1.0617740775002569,
      "grad_norm": 0.18867623805999756,
      "learning_rate": 6.742210556756789e-05,
      "loss": 0.5242,
      "step": 5165
    },
    {
      "epoch": 1.0619796484736355,
      "grad_norm": 0.17035862803459167,
      "learning_rate": 6.741344510178247e-05,
      "loss": 0.5613,
      "step": 5166
    },
    {
      "epoch": 1.062185219447014,
      "grad_norm": 0.20985183119773865,
      "learning_rate": 6.740478353179138e-05,
      "loss": 0.5737,
      "step": 5167
    },
    {
      "epoch": 1.0623907904203926,
      "grad_norm": 0.21184340119361877,
      "learning_rate": 6.739612085802131e-05,
      "loss": 0.5656,
      "step": 5168
    },
    {
      "epoch": 1.0625963613937712,
      "grad_norm": 0.19316667318344116,
      "learning_rate": 6.738745708089905e-05,
      "loss": 0.5726,
      "step": 5169
    },
    {
      "epoch": 1.0628019323671498,
      "grad_norm": 0.19877000153064728,
      "learning_rate": 6.737879220085143e-05,
      "loss": 0.5813,
      "step": 5170
    },
    {
      "epoch": 1.0630075033405284,
      "grad_norm": 0.20379842817783356,
      "learning_rate": 6.73701262183053e-05,
      "loss": 0.5675,
      "step": 5171
    },
    {
      "epoch": 1.0632130743139068,
      "grad_norm": 0.2046133428812027,
      "learning_rate": 6.736145913368762e-05,
      "loss": 0.5525,
      "step": 5172
    },
    {
      "epoch": 1.0634186452872854,
      "grad_norm": 0.19095589220523834,
      "learning_rate": 6.735279094742535e-05,
      "loss": 0.549,
      "step": 5173
    },
    {
      "epoch": 1.063624216260664,
      "grad_norm": 0.20165902376174927,
      "learning_rate": 6.734412165994556e-05,
      "loss": 0.5807,
      "step": 5174
    },
    {
      "epoch": 1.0638297872340425,
      "grad_norm": 0.19797958433628082,
      "learning_rate": 6.733545127167533e-05,
      "loss": 0.5532,
      "step": 5175
    },
    {
      "epoch": 1.0640353582074211,
      "grad_norm": 0.1996331512928009,
      "learning_rate": 6.732677978304182e-05,
      "loss": 0.5686,
      "step": 5176
    },
    {
      "epoch": 1.0642409291807997,
      "grad_norm": 0.19206491112709045,
      "learning_rate": 6.731810719447222e-05,
      "loss": 0.5458,
      "step": 5177
    },
    {
      "epoch": 1.0644465001541783,
      "grad_norm": 0.2006731927394867,
      "learning_rate": 6.730943350639379e-05,
      "loss": 0.5576,
      "step": 5178
    },
    {
      "epoch": 1.0646520711275569,
      "grad_norm": 0.20075003802776337,
      "learning_rate": 6.730075871923384e-05,
      "loss": 0.615,
      "step": 5179
    },
    {
      "epoch": 1.0648576421009353,
      "grad_norm": 0.1874108761548996,
      "learning_rate": 6.729208283341975e-05,
      "loss": 0.5487,
      "step": 5180
    },
    {
      "epoch": 1.0650632130743138,
      "grad_norm": 0.19633813202381134,
      "learning_rate": 6.728340584937892e-05,
      "loss": 0.57,
      "step": 5181
    },
    {
      "epoch": 1.0652687840476924,
      "grad_norm": 0.18520689010620117,
      "learning_rate": 6.727472776753885e-05,
      "loss": 0.5608,
      "step": 5182
    },
    {
      "epoch": 1.065474355021071,
      "grad_norm": 0.19115997850894928,
      "learning_rate": 6.726604858832704e-05,
      "loss": 0.5627,
      "step": 5183
    },
    {
      "epoch": 1.0656799259944496,
      "grad_norm": 0.19133557379245758,
      "learning_rate": 6.725736831217111e-05,
      "loss": 0.5502,
      "step": 5184
    },
    {
      "epoch": 1.0658854969678282,
      "grad_norm": 0.19521182775497437,
      "learning_rate": 6.724868693949864e-05,
      "loss": 0.5613,
      "step": 5185
    },
    {
      "epoch": 1.0660910679412068,
      "grad_norm": 0.19791938364505768,
      "learning_rate": 6.724000447073739e-05,
      "loss": 0.5791,
      "step": 5186
    },
    {
      "epoch": 1.0662966389145851,
      "grad_norm": 0.20099619030952454,
      "learning_rate": 6.723132090631505e-05,
      "loss": 0.5536,
      "step": 5187
    },
    {
      "epoch": 1.0665022098879637,
      "grad_norm": 0.19620858132839203,
      "learning_rate": 6.722263624665944e-05,
      "loss": 0.564,
      "step": 5188
    },
    {
      "epoch": 1.0667077808613423,
      "grad_norm": 0.19696368277072906,
      "learning_rate": 6.721395049219841e-05,
      "loss": 0.5791,
      "step": 5189
    },
    {
      "epoch": 1.066913351834721,
      "grad_norm": 0.1899513453245163,
      "learning_rate": 6.720526364335987e-05,
      "loss": 0.5405,
      "step": 5190
    },
    {
      "epoch": 1.0671189228080995,
      "grad_norm": 0.1824042797088623,
      "learning_rate": 6.719657570057178e-05,
      "loss": 0.5631,
      "step": 5191
    },
    {
      "epoch": 1.067324493781478,
      "grad_norm": 0.18800349533557892,
      "learning_rate": 6.718788666426216e-05,
      "loss": 0.5563,
      "step": 5192
    },
    {
      "epoch": 1.0675300647548567,
      "grad_norm": 0.18425163626670837,
      "learning_rate": 6.717919653485905e-05,
      "loss": 0.5475,
      "step": 5193
    },
    {
      "epoch": 1.0677356357282353,
      "grad_norm": 0.1711423397064209,
      "learning_rate": 6.71705053127906e-05,
      "loss": 0.5814,
      "step": 5194
    },
    {
      "epoch": 1.0679412067016136,
      "grad_norm": 0.2124950885772705,
      "learning_rate": 6.716181299848497e-05,
      "loss": 0.5802,
      "step": 5195
    },
    {
      "epoch": 1.0681467776749922,
      "grad_norm": 0.16117288172245026,
      "learning_rate": 6.715311959237042e-05,
      "loss": 0.5269,
      "step": 5196
    },
    {
      "epoch": 1.0683523486483708,
      "grad_norm": 0.16499152779579163,
      "learning_rate": 6.714442509487519e-05,
      "loss": 0.5588,
      "step": 5197
    },
    {
      "epoch": 1.0685579196217494,
      "grad_norm": 0.20156873762607574,
      "learning_rate": 6.713572950642765e-05,
      "loss": 0.575,
      "step": 5198
    },
    {
      "epoch": 1.068763490595128,
      "grad_norm": 0.19442300498485565,
      "learning_rate": 6.712703282745618e-05,
      "loss": 0.5727,
      "step": 5199
    },
    {
      "epoch": 1.0689690615685066,
      "grad_norm": 0.19860216975212097,
      "learning_rate": 6.711833505838921e-05,
      "loss": 0.5325,
      "step": 5200
    },
    {
      "epoch": 1.0691746325418852,
      "grad_norm": 0.1945996880531311,
      "learning_rate": 6.710963619965526e-05,
      "loss": 0.569,
      "step": 5201
    },
    {
      "epoch": 1.0693802035152635,
      "grad_norm": 0.20251908898353577,
      "learning_rate": 6.710093625168289e-05,
      "loss": 0.5884,
      "step": 5202
    },
    {
      "epoch": 1.0695857744886421,
      "grad_norm": 0.20491336286067963,
      "learning_rate": 6.709223521490067e-05,
      "loss": 0.5788,
      "step": 5203
    },
    {
      "epoch": 1.0697913454620207,
      "grad_norm": 0.19236387312412262,
      "learning_rate": 6.708353308973728e-05,
      "loss": 0.5606,
      "step": 5204
    },
    {
      "epoch": 1.0699969164353993,
      "grad_norm": 0.16769090294837952,
      "learning_rate": 6.707482987662144e-05,
      "loss": 0.5143,
      "step": 5205
    },
    {
      "epoch": 1.0702024874087779,
      "grad_norm": 0.1578342467546463,
      "learning_rate": 6.70661255759819e-05,
      "loss": 0.556,
      "step": 5206
    },
    {
      "epoch": 1.0704080583821565,
      "grad_norm": 0.19746308028697968,
      "learning_rate": 6.705742018824751e-05,
      "loss": 0.5619,
      "step": 5207
    },
    {
      "epoch": 1.070613629355535,
      "grad_norm": 0.1942613422870636,
      "learning_rate": 6.704871371384711e-05,
      "loss": 0.5738,
      "step": 5208
    },
    {
      "epoch": 1.0708192003289136,
      "grad_norm": 0.1871325820684433,
      "learning_rate": 6.704000615320964e-05,
      "loss": 0.578,
      "step": 5209
    },
    {
      "epoch": 1.0710247713022922,
      "grad_norm": 0.19533534348011017,
      "learning_rate": 6.703129750676409e-05,
      "loss": 0.574,
      "step": 5210
    },
    {
      "epoch": 1.0712303422756706,
      "grad_norm": 0.19535306096076965,
      "learning_rate": 6.702258777493947e-05,
      "loss": 0.5756,
      "step": 5211
    },
    {
      "epoch": 1.0714359132490492,
      "grad_norm": 0.18510495126247406,
      "learning_rate": 6.70138769581649e-05,
      "loss": 0.5639,
      "step": 5212
    },
    {
      "epoch": 1.0716414842224278,
      "grad_norm": 0.18865178525447845,
      "learning_rate": 6.70051650568695e-05,
      "loss": 0.5588,
      "step": 5213
    },
    {
      "epoch": 1.0718470551958064,
      "grad_norm": 0.19118379056453705,
      "learning_rate": 6.699645207148247e-05,
      "loss": 0.5695,
      "step": 5214
    },
    {
      "epoch": 1.072052626169185,
      "grad_norm": 0.19162502884864807,
      "learning_rate": 6.698773800243305e-05,
      "loss": 0.5647,
      "step": 5215
    },
    {
      "epoch": 1.0722581971425635,
      "grad_norm": 0.18810777366161346,
      "learning_rate": 6.697902285015056e-05,
      "loss": 0.5608,
      "step": 5216
    },
    {
      "epoch": 1.0724637681159421,
      "grad_norm": 0.16454185545444489,
      "learning_rate": 6.697030661506433e-05,
      "loss": 0.5383,
      "step": 5217
    },
    {
      "epoch": 1.0726693390893205,
      "grad_norm": 0.15946544706821442,
      "learning_rate": 6.69615892976038e-05,
      "loss": 0.551,
      "step": 5218
    },
    {
      "epoch": 1.072874910062699,
      "grad_norm": 0.20395736396312714,
      "learning_rate": 6.695287089819838e-05,
      "loss": 0.5643,
      "step": 5219
    },
    {
      "epoch": 1.0730804810360777,
      "grad_norm": 0.19649049639701843,
      "learning_rate": 6.694415141727766e-05,
      "loss": 0.5594,
      "step": 5220
    },
    {
      "epoch": 1.0732860520094563,
      "grad_norm": 0.18935894966125488,
      "learning_rate": 6.693543085527115e-05,
      "loss": 0.5627,
      "step": 5221
    },
    {
      "epoch": 1.0734916229828348,
      "grad_norm": 0.21237939596176147,
      "learning_rate": 6.69267092126085e-05,
      "loss": 0.5703,
      "step": 5222
    },
    {
      "epoch": 1.0736971939562134,
      "grad_norm": 0.19148610532283783,
      "learning_rate": 6.691798648971935e-05,
      "loss": 0.5535,
      "step": 5223
    },
    {
      "epoch": 1.073902764929592,
      "grad_norm": 2.203571319580078,
      "learning_rate": 6.690926268703345e-05,
      "loss": 0.6328,
      "step": 5224
    },
    {
      "epoch": 1.0741083359029706,
      "grad_norm": 0.20368382334709167,
      "learning_rate": 6.69005378049806e-05,
      "loss": 0.559,
      "step": 5225
    },
    {
      "epoch": 1.074313906876349,
      "grad_norm": 0.22243089973926544,
      "learning_rate": 6.68918118439906e-05,
      "loss": 0.5621,
      "step": 5226
    },
    {
      "epoch": 1.0745194778497276,
      "grad_norm": 0.25355663895606995,
      "learning_rate": 6.688308480449335e-05,
      "loss": 0.5876,
      "step": 5227
    },
    {
      "epoch": 1.0747250488231062,
      "grad_norm": 0.2832355201244354,
      "learning_rate": 6.68743566869188e-05,
      "loss": 0.5542,
      "step": 5228
    },
    {
      "epoch": 1.0749306197964847,
      "grad_norm": 0.1938430219888687,
      "learning_rate": 6.686562749169694e-05,
      "loss": 0.5282,
      "step": 5229
    },
    {
      "epoch": 1.0751361907698633,
      "grad_norm": 0.263157457113266,
      "learning_rate": 6.685689721925782e-05,
      "loss": 0.5783,
      "step": 5230
    },
    {
      "epoch": 1.075341761743242,
      "grad_norm": 0.2489389330148697,
      "learning_rate": 6.684816587003152e-05,
      "loss": 0.5584,
      "step": 5231
    },
    {
      "epoch": 1.0755473327166205,
      "grad_norm": 0.18948674201965332,
      "learning_rate": 6.683943344444821e-05,
      "loss": 0.5549,
      "step": 5232
    },
    {
      "epoch": 1.0757529036899989,
      "grad_norm": 0.18527735769748688,
      "learning_rate": 6.683069994293808e-05,
      "loss": 0.5533,
      "step": 5233
    },
    {
      "epoch": 1.0759584746633775,
      "grad_norm": 0.23963753879070282,
      "learning_rate": 6.682196536593142e-05,
      "loss": 0.5675,
      "step": 5234
    },
    {
      "epoch": 1.076164045636756,
      "grad_norm": 0.23618869483470917,
      "learning_rate": 6.681322971385852e-05,
      "loss": 0.581,
      "step": 5235
    },
    {
      "epoch": 1.0763696166101346,
      "grad_norm": 0.18081532418727875,
      "learning_rate": 6.680449298714974e-05,
      "loss": 0.5488,
      "step": 5236
    },
    {
      "epoch": 1.0765751875835132,
      "grad_norm": 0.17601439356803894,
      "learning_rate": 6.679575518623549e-05,
      "loss": 0.5718,
      "step": 5237
    },
    {
      "epoch": 1.0767807585568918,
      "grad_norm": 0.22532643377780914,
      "learning_rate": 6.678701631154627e-05,
      "loss": 0.5777,
      "step": 5238
    },
    {
      "epoch": 1.0769863295302704,
      "grad_norm": 0.21322833001613617,
      "learning_rate": 6.677827636351259e-05,
      "loss": 0.5803,
      "step": 5239
    },
    {
      "epoch": 1.077191900503649,
      "grad_norm": 0.19407659769058228,
      "learning_rate": 6.676953534256501e-05,
      "loss": 0.525,
      "step": 5240
    },
    {
      "epoch": 1.0773974714770274,
      "grad_norm": 0.32453683018684387,
      "learning_rate": 6.676079324913419e-05,
      "loss": 0.5812,
      "step": 5241
    },
    {
      "epoch": 1.077603042450406,
      "grad_norm": 0.2009628862142563,
      "learning_rate": 6.675205008365081e-05,
      "loss": 0.557,
      "step": 5242
    },
    {
      "epoch": 1.0778086134237845,
      "grad_norm": 0.17055638134479523,
      "learning_rate": 6.674330584654557e-05,
      "loss": 0.5503,
      "step": 5243
    },
    {
      "epoch": 1.0780141843971631,
      "grad_norm": 0.21184168756008148,
      "learning_rate": 6.673456053824928e-05,
      "loss": 0.5644,
      "step": 5244
    },
    {
      "epoch": 1.0782197553705417,
      "grad_norm": 0.20383425056934357,
      "learning_rate": 6.672581415919279e-05,
      "loss": 0.586,
      "step": 5245
    },
    {
      "epoch": 1.0784253263439203,
      "grad_norm": 0.20638933777809143,
      "learning_rate": 6.671706670980697e-05,
      "loss": 0.5577,
      "step": 5246
    },
    {
      "epoch": 1.0786308973172989,
      "grad_norm": 0.2084139883518219,
      "learning_rate": 6.670831819052278e-05,
      "loss": 0.5818,
      "step": 5247
    },
    {
      "epoch": 1.0788364682906773,
      "grad_norm": 0.1990043967962265,
      "learning_rate": 6.669956860177122e-05,
      "loss": 0.5694,
      "step": 5248
    },
    {
      "epoch": 1.0790420392640558,
      "grad_norm": 0.22096286714076996,
      "learning_rate": 6.669081794398334e-05,
      "loss": 0.5815,
      "step": 5249
    },
    {
      "epoch": 1.0792476102374344,
      "grad_norm": 0.19977039098739624,
      "learning_rate": 6.668206621759023e-05,
      "loss": 0.552,
      "step": 5250
    },
    {
      "epoch": 1.079453181210813,
      "grad_norm": 0.16734324395656586,
      "learning_rate": 6.667331342302308e-05,
      "loss": 0.5378,
      "step": 5251
    },
    {
      "epoch": 1.0796587521841916,
      "grad_norm": 0.1833125352859497,
      "learning_rate": 6.666455956071307e-05,
      "loss": 0.5605,
      "step": 5252
    },
    {
      "epoch": 1.0798643231575702,
      "grad_norm": 0.2064771205186844,
      "learning_rate": 6.665580463109147e-05,
      "loss": 0.5548,
      "step": 5253
    },
    {
      "epoch": 1.0800698941309488,
      "grad_norm": 0.19935967028141022,
      "learning_rate": 6.664704863458959e-05,
      "loss": 0.5673,
      "step": 5254
    },
    {
      "epoch": 1.0802754651043274,
      "grad_norm": 0.20248223841190338,
      "learning_rate": 6.66382915716388e-05,
      "loss": 0.5799,
      "step": 5255
    },
    {
      "epoch": 1.0804810360777057,
      "grad_norm": 0.19460590183734894,
      "learning_rate": 6.662953344267054e-05,
      "loss": 0.5549,
      "step": 5256
    },
    {
      "epoch": 1.0806866070510843,
      "grad_norm": 0.19697195291519165,
      "learning_rate": 6.662077424811624e-05,
      "loss": 0.5809,
      "step": 5257
    },
    {
      "epoch": 1.080892178024463,
      "grad_norm": 0.19642481207847595,
      "learning_rate": 6.661201398840747e-05,
      "loss": 0.5708,
      "step": 5258
    },
    {
      "epoch": 1.0810977489978415,
      "grad_norm": 0.1706124097108841,
      "learning_rate": 6.660325266397576e-05,
      "loss": 0.5569,
      "step": 5259
    },
    {
      "epoch": 1.08130331997122,
      "grad_norm": 0.16144689917564392,
      "learning_rate": 6.659449027525279e-05,
      "loss": 0.5646,
      "step": 5260
    },
    {
      "epoch": 1.0815088909445987,
      "grad_norm": 0.17022046446800232,
      "learning_rate": 6.658572682267019e-05,
      "loss": 0.5469,
      "step": 5261
    },
    {
      "epoch": 1.0817144619179773,
      "grad_norm": 0.16290414333343506,
      "learning_rate": 6.657696230665974e-05,
      "loss": 0.5779,
      "step": 5262
    },
    {
      "epoch": 1.0819200328913556,
      "grad_norm": 0.20431680977344513,
      "learning_rate": 6.656819672765321e-05,
      "loss": 0.5886,
      "step": 5263
    },
    {
      "epoch": 1.0821256038647342,
      "grad_norm": 0.19603441655635834,
      "learning_rate": 6.655943008608243e-05,
      "loss": 0.5559,
      "step": 5264
    },
    {
      "epoch": 1.0823311748381128,
      "grad_norm": 0.19341200590133667,
      "learning_rate": 6.65506623823793e-05,
      "loss": 0.5685,
      "step": 5265
    },
    {
      "epoch": 1.0825367458114914,
      "grad_norm": 0.17218126356601715,
      "learning_rate": 6.654189361697576e-05,
      "loss": 0.5443,
      "step": 5266
    },
    {
      "epoch": 1.08274231678487,
      "grad_norm": 0.13615413010120392,
      "learning_rate": 6.653312379030381e-05,
      "loss": 0.5515,
      "step": 5267
    },
    {
      "epoch": 1.0829478877582486,
      "grad_norm": 0.1653267741203308,
      "learning_rate": 6.652435290279549e-05,
      "loss": 0.5599,
      "step": 5268
    },
    {
      "epoch": 1.0831534587316272,
      "grad_norm": 0.1650351732969284,
      "learning_rate": 6.651558095488292e-05,
      "loss": 0.5362,
      "step": 5269
    },
    {
      "epoch": 1.0833590297050057,
      "grad_norm": 0.16134947538375854,
      "learning_rate": 6.650680794699823e-05,
      "loss": 0.559,
      "step": 5270
    },
    {
      "epoch": 1.0835646006783841,
      "grad_norm": 0.1984068751335144,
      "learning_rate": 6.649803387957362e-05,
      "loss": 0.5631,
      "step": 5271
    },
    {
      "epoch": 1.0837701716517627,
      "grad_norm": 0.1926686316728592,
      "learning_rate": 6.648925875304139e-05,
      "loss": 0.5864,
      "step": 5272
    },
    {
      "epoch": 1.0839757426251413,
      "grad_norm": 0.1906096339225769,
      "learning_rate": 6.648048256783382e-05,
      "loss": 0.557,
      "step": 5273
    },
    {
      "epoch": 1.0841813135985199,
      "grad_norm": 0.1856287568807602,
      "learning_rate": 6.647170532438327e-05,
      "loss": 0.5717,
      "step": 5274
    },
    {
      "epoch": 1.0843868845718985,
      "grad_norm": 0.17093409597873688,
      "learning_rate": 6.646292702312214e-05,
      "loss": 0.5314,
      "step": 5275
    },
    {
      "epoch": 1.084592455545277,
      "grad_norm": 0.17193061113357544,
      "learning_rate": 6.645414766448293e-05,
      "loss": 0.5795,
      "step": 5276
    },
    {
      "epoch": 1.0847980265186556,
      "grad_norm": 0.1909974366426468,
      "learning_rate": 6.644536724889814e-05,
      "loss": 0.5638,
      "step": 5277
    },
    {
      "epoch": 1.085003597492034,
      "grad_norm": 0.16503417491912842,
      "learning_rate": 6.643658577680033e-05,
      "loss": 0.5364,
      "step": 5278
    },
    {
      "epoch": 1.0852091684654126,
      "grad_norm": 0.12726576626300812,
      "learning_rate": 6.642780324862215e-05,
      "loss": 0.5181,
      "step": 5279
    },
    {
      "epoch": 1.0854147394387912,
      "grad_norm": 0.15936200320720673,
      "learning_rate": 6.641901966479623e-05,
      "loss": 0.5601,
      "step": 5280
    },
    {
      "epoch": 1.0856203104121698,
      "grad_norm": 0.1608133465051651,
      "learning_rate": 6.641023502575535e-05,
      "loss": 0.5125,
      "step": 5281
    },
    {
      "epoch": 1.0858258813855484,
      "grad_norm": 0.15383280813694,
      "learning_rate": 6.640144933193223e-05,
      "loss": 0.5724,
      "step": 5282
    },
    {
      "epoch": 1.086031452358927,
      "grad_norm": 0.15517185628414154,
      "learning_rate": 6.639266258375977e-05,
      "loss": 0.5275,
      "step": 5283
    },
    {
      "epoch": 1.0862370233323055,
      "grad_norm": 0.16167797148227692,
      "learning_rate": 6.63838747816708e-05,
      "loss": 0.5644,
      "step": 5284
    },
    {
      "epoch": 1.0864425943056841,
      "grad_norm": 0.1940879076719284,
      "learning_rate": 6.637508592609827e-05,
      "loss": 0.5677,
      "step": 5285
    },
    {
      "epoch": 1.0866481652790625,
      "grad_norm": 0.18758495151996613,
      "learning_rate": 6.636629601747515e-05,
      "loss": 0.5874,
      "step": 5286
    },
    {
      "epoch": 1.086853736252441,
      "grad_norm": 0.1906895488500595,
      "learning_rate": 6.635750505623451e-05,
      "loss": 0.5747,
      "step": 5287
    },
    {
      "epoch": 1.0870593072258197,
      "grad_norm": 0.18820390105247498,
      "learning_rate": 6.63487130428094e-05,
      "loss": 0.5581,
      "step": 5288
    },
    {
      "epoch": 1.0872648781991983,
      "grad_norm": 0.19802720844745636,
      "learning_rate": 6.633991997763299e-05,
      "loss": 0.5604,
      "step": 5289
    },
    {
      "epoch": 1.0874704491725768,
      "grad_norm": 0.16742005944252014,
      "learning_rate": 6.633112586113847e-05,
      "loss": 0.5223,
      "step": 5290
    },
    {
      "epoch": 1.0876760201459554,
      "grad_norm": 0.14373008906841278,
      "learning_rate": 6.632233069375907e-05,
      "loss": 0.5368,
      "step": 5291
    },
    {
      "epoch": 1.087881591119334,
      "grad_norm": 0.1611548662185669,
      "learning_rate": 6.63135344759281e-05,
      "loss": 0.5546,
      "step": 5292
    },
    {
      "epoch": 1.0880871620927126,
      "grad_norm": 0.19752389192581177,
      "learning_rate": 6.630473720807892e-05,
      "loss": 0.5597,
      "step": 5293
    },
    {
      "epoch": 1.088292733066091,
      "grad_norm": 0.19738554954528809,
      "learning_rate": 6.62959388906449e-05,
      "loss": 0.5787,
      "step": 5294
    },
    {
      "epoch": 1.0884983040394696,
      "grad_norm": 0.1929868459701538,
      "learning_rate": 6.628713952405951e-05,
      "loss": 0.5614,
      "step": 5295
    },
    {
      "epoch": 1.0887038750128482,
      "grad_norm": 0.2048940360546112,
      "learning_rate": 6.627833910875626e-05,
      "loss": 0.5715,
      "step": 5296
    },
    {
      "epoch": 1.0889094459862267,
      "grad_norm": 0.19857628643512726,
      "learning_rate": 6.62695376451687e-05,
      "loss": 0.5694,
      "step": 5297
    },
    {
      "epoch": 1.0891150169596053,
      "grad_norm": 0.19346579909324646,
      "learning_rate": 6.626073513373043e-05,
      "loss": 0.5612,
      "step": 5298
    },
    {
      "epoch": 1.089320587932984,
      "grad_norm": 0.1919691264629364,
      "learning_rate": 6.62519315748751e-05,
      "loss": 0.5584,
      "step": 5299
    },
    {
      "epoch": 1.0895261589063625,
      "grad_norm": 0.1884642243385315,
      "learning_rate": 6.624312696903644e-05,
      "loss": 0.5576,
      "step": 5300
    },
    {
      "epoch": 1.089731729879741,
      "grad_norm": 0.1730055809020996,
      "learning_rate": 6.623432131664822e-05,
      "loss": 0.5565,
      "step": 5301
    },
    {
      "epoch": 1.0899373008531195,
      "grad_norm": 0.19262228906154633,
      "learning_rate": 6.62255146181442e-05,
      "loss": 0.5645,
      "step": 5302
    },
    {
      "epoch": 1.090142871826498,
      "grad_norm": 0.19675207138061523,
      "learning_rate": 6.62167068739583e-05,
      "loss": 0.5845,
      "step": 5303
    },
    {
      "epoch": 1.0903484427998766,
      "grad_norm": 0.1958772838115692,
      "learning_rate": 6.620789808452443e-05,
      "loss": 0.5653,
      "step": 5304
    },
    {
      "epoch": 1.0905540137732552,
      "grad_norm": 0.18935401737689972,
      "learning_rate": 6.619908825027655e-05,
      "loss": 0.5523,
      "step": 5305
    },
    {
      "epoch": 1.0907595847466338,
      "grad_norm": 0.19371245801448822,
      "learning_rate": 6.619027737164865e-05,
      "loss": 0.551,
      "step": 5306
    },
    {
      "epoch": 1.0909651557200124,
      "grad_norm": 0.19392549991607666,
      "learning_rate": 6.618146544907485e-05,
      "loss": 0.5731,
      "step": 5307
    },
    {
      "epoch": 1.091170726693391,
      "grad_norm": 0.19857439398765564,
      "learning_rate": 6.617265248298926e-05,
      "loss": 0.5364,
      "step": 5308
    },
    {
      "epoch": 1.0913762976667694,
      "grad_norm": 0.2129819244146347,
      "learning_rate": 6.616383847382601e-05,
      "loss": 0.5635,
      "step": 5309
    },
    {
      "epoch": 1.091581868640148,
      "grad_norm": 0.18669261038303375,
      "learning_rate": 6.615502342201938e-05,
      "loss": 0.5533,
      "step": 5310
    },
    {
      "epoch": 1.0917874396135265,
      "grad_norm": 0.20277494192123413,
      "learning_rate": 6.614620732800363e-05,
      "loss": 0.5874,
      "step": 5311
    },
    {
      "epoch": 1.0919930105869051,
      "grad_norm": 0.19775375723838806,
      "learning_rate": 6.613739019221306e-05,
      "loss": 0.5737,
      "step": 5312
    },
    {
      "epoch": 1.0921985815602837,
      "grad_norm": 0.19743028283119202,
      "learning_rate": 6.612857201508208e-05,
      "loss": 0.5853,
      "step": 5313
    },
    {
      "epoch": 1.0924041525336623,
      "grad_norm": 0.18763835728168488,
      "learning_rate": 6.611975279704511e-05,
      "loss": 0.5728,
      "step": 5314
    },
    {
      "epoch": 1.0926097235070409,
      "grad_norm": 0.19164253771305084,
      "learning_rate": 6.611093253853664e-05,
      "loss": 0.5734,
      "step": 5315
    },
    {
      "epoch": 1.0928152944804195,
      "grad_norm": 0.19013293087482452,
      "learning_rate": 6.610211123999119e-05,
      "loss": 0.5647,
      "step": 5316
    },
    {
      "epoch": 1.0930208654537978,
      "grad_norm": 0.19846196472644806,
      "learning_rate": 6.609328890184334e-05,
      "loss": 0.5613,
      "step": 5317
    },
    {
      "epoch": 1.0932264364271764,
      "grad_norm": 0.18824782967567444,
      "learning_rate": 6.608446552452777e-05,
      "loss": 0.5496,
      "step": 5318
    },
    {
      "epoch": 1.093432007400555,
      "grad_norm": 0.19030706584453583,
      "learning_rate": 6.60756411084791e-05,
      "loss": 0.5488,
      "step": 5319
    },
    {
      "epoch": 1.0936375783739336,
      "grad_norm": 0.16634370386600494,
      "learning_rate": 6.606681565413211e-05,
      "loss": 0.5618,
      "step": 5320
    },
    {
      "epoch": 1.0938431493473122,
      "grad_norm": 0.20281003415584564,
      "learning_rate": 6.605798916192157e-05,
      "loss": 0.5718,
      "step": 5321
    },
    {
      "epoch": 1.0940487203206908,
      "grad_norm": 0.16052670776844025,
      "learning_rate": 6.604916163228235e-05,
      "loss": 0.5356,
      "step": 5322
    },
    {
      "epoch": 1.0942542912940694,
      "grad_norm": 0.19108809530735016,
      "learning_rate": 6.60403330656493e-05,
      "loss": 0.5525,
      "step": 5323
    },
    {
      "epoch": 1.0944598622674477,
      "grad_norm": 0.20535770058631897,
      "learning_rate": 6.603150346245738e-05,
      "loss": 0.5542,
      "step": 5324
    },
    {
      "epoch": 1.0946654332408263,
      "grad_norm": 0.20727907121181488,
      "learning_rate": 6.60226728231416e-05,
      "loss": 0.583,
      "step": 5325
    },
    {
      "epoch": 1.094871004214205,
      "grad_norm": 0.19222858548164368,
      "learning_rate": 6.601384114813699e-05,
      "loss": 0.5626,
      "step": 5326
    },
    {
      "epoch": 1.0950765751875835,
      "grad_norm": 0.19487687945365906,
      "learning_rate": 6.600500843787864e-05,
      "loss": 0.5649,
      "step": 5327
    },
    {
      "epoch": 1.095282146160962,
      "grad_norm": 0.1915174126625061,
      "learning_rate": 6.599617469280171e-05,
      "loss": 0.5561,
      "step": 5328
    },
    {
      "epoch": 1.0954877171343407,
      "grad_norm": 0.17732886970043182,
      "learning_rate": 6.598733991334137e-05,
      "loss": 0.5102,
      "step": 5329
    },
    {
      "epoch": 1.0956932881077193,
      "grad_norm": 0.17406459152698517,
      "learning_rate": 6.59785040999329e-05,
      "loss": 0.5818,
      "step": 5330
    },
    {
      "epoch": 1.0958988590810979,
      "grad_norm": 0.19681067764759064,
      "learning_rate": 6.596966725301158e-05,
      "loss": 0.5573,
      "step": 5331
    },
    {
      "epoch": 1.0961044300544762,
      "grad_norm": 0.2060333490371704,
      "learning_rate": 6.596082937301277e-05,
      "loss": 0.5757,
      "step": 5332
    },
    {
      "epoch": 1.0963100010278548,
      "grad_norm": 0.1740088164806366,
      "learning_rate": 6.595199046037187e-05,
      "loss": 0.5204,
      "step": 5333
    },
    {
      "epoch": 1.0965155720012334,
      "grad_norm": 0.15425589680671692,
      "learning_rate": 6.594315051552434e-05,
      "loss": 0.549,
      "step": 5334
    },
    {
      "epoch": 1.096721142974612,
      "grad_norm": 0.20004071295261383,
      "learning_rate": 6.593430953890564e-05,
      "loss": 0.5745,
      "step": 5335
    },
    {
      "epoch": 1.0969267139479906,
      "grad_norm": 0.21765153110027313,
      "learning_rate": 6.592546753095138e-05,
      "loss": 0.5779,
      "step": 5336
    },
    {
      "epoch": 1.0971322849213692,
      "grad_norm": 0.2069845348596573,
      "learning_rate": 6.591662449209714e-05,
      "loss": 0.5681,
      "step": 5337
    },
    {
      "epoch": 1.0973378558947477,
      "grad_norm": 0.16083793342113495,
      "learning_rate": 6.590778042277856e-05,
      "loss": 0.522,
      "step": 5338
    },
    {
      "epoch": 1.0975434268681261,
      "grad_norm": 0.13454684615135193,
      "learning_rate": 6.589893532343137e-05,
      "loss": 0.5234,
      "step": 5339
    },
    {
      "epoch": 1.0977489978415047,
      "grad_norm": 0.16017797589302063,
      "learning_rate": 6.589008919449132e-05,
      "loss": 0.5456,
      "step": 5340
    },
    {
      "epoch": 1.0979545688148833,
      "grad_norm": 0.19706310331821442,
      "learning_rate": 6.588124203639421e-05,
      "loss": 0.5598,
      "step": 5341
    },
    {
      "epoch": 1.0981601397882619,
      "grad_norm": 0.19830232858657837,
      "learning_rate": 6.587239384957593e-05,
      "loss": 0.5587,
      "step": 5342
    },
    {
      "epoch": 1.0983657107616405,
      "grad_norm": 0.19337981939315796,
      "learning_rate": 6.586354463447233e-05,
      "loss": 0.5635,
      "step": 5343
    },
    {
      "epoch": 1.098571281735019,
      "grad_norm": 0.19587767124176025,
      "learning_rate": 6.585469439151942e-05,
      "loss": 0.5626,
      "step": 5344
    },
    {
      "epoch": 1.0987768527083976,
      "grad_norm": 0.20316268503665924,
      "learning_rate": 6.584584312115318e-05,
      "loss": 0.5743,
      "step": 5345
    },
    {
      "epoch": 1.0989824236817762,
      "grad_norm": 0.19595171511173248,
      "learning_rate": 6.583699082380969e-05,
      "loss": 0.5579,
      "step": 5346
    },
    {
      "epoch": 1.0991879946551546,
      "grad_norm": 0.18075229227542877,
      "learning_rate": 6.582813749992504e-05,
      "loss": 0.5336,
      "step": 5347
    },
    {
      "epoch": 1.0993935656285332,
      "grad_norm": 0.1714819073677063,
      "learning_rate": 6.581928314993542e-05,
      "loss": 0.5727,
      "step": 5348
    },
    {
      "epoch": 1.0995991366019118,
      "grad_norm": 0.2072882503271103,
      "learning_rate": 6.581042777427703e-05,
      "loss": 0.5859,
      "step": 5349
    },
    {
      "epoch": 1.0998047075752904,
      "grad_norm": 0.19539666175842285,
      "learning_rate": 6.580157137338613e-05,
      "loss": 0.5764,
      "step": 5350
    },
    {
      "epoch": 1.100010278548669,
      "grad_norm": 0.20228314399719238,
      "learning_rate": 6.579271394769901e-05,
      "loss": 0.5831,
      "step": 5351
    },
    {
      "epoch": 1.1002158495220475,
      "grad_norm": 0.16698522865772247,
      "learning_rate": 6.578385549765209e-05,
      "loss": 0.5292,
      "step": 5352
    },
    {
      "epoch": 1.1004214204954261,
      "grad_norm": 0.15886962413787842,
      "learning_rate": 6.577499602368176e-05,
      "loss": 0.5586,
      "step": 5353
    },
    {
      "epoch": 1.1006269914688045,
      "grad_norm": 0.16973358392715454,
      "learning_rate": 6.576613552622443e-05,
      "loss": 0.5373,
      "step": 5354
    },
    {
      "epoch": 1.100832562442183,
      "grad_norm": 0.16206781566143036,
      "learning_rate": 6.575727400571672e-05,
      "loss": 0.5407,
      "step": 5355
    },
    {
      "epoch": 1.1010381334155617,
      "grad_norm": 0.19393891096115112,
      "learning_rate": 6.57484114625951e-05,
      "loss": 0.5683,
      "step": 5356
    },
    {
      "epoch": 1.1012437043889403,
      "grad_norm": 0.19983628392219543,
      "learning_rate": 6.573954789729625e-05,
      "loss": 0.5771,
      "step": 5357
    },
    {
      "epoch": 1.1014492753623188,
      "grad_norm": 0.19126926362514496,
      "learning_rate": 6.573068331025679e-05,
      "loss": 0.559,
      "step": 5358
    },
    {
      "epoch": 1.1016548463356974,
      "grad_norm": 0.19822482764720917,
      "learning_rate": 6.572181770191347e-05,
      "loss": 0.5587,
      "step": 5359
    },
    {
      "epoch": 1.101860417309076,
      "grad_norm": 0.19558537006378174,
      "learning_rate": 6.571295107270304e-05,
      "loss": 0.562,
      "step": 5360
    },
    {
      "epoch": 1.1020659882824546,
      "grad_norm": 0.1923174262046814,
      "learning_rate": 6.570408342306233e-05,
      "loss": 0.545,
      "step": 5361
    },
    {
      "epoch": 1.102271559255833,
      "grad_norm": 0.19644415378570557,
      "learning_rate": 6.569521475342819e-05,
      "loss": 0.5765,
      "step": 5362
    },
    {
      "epoch": 1.1024771302292116,
      "grad_norm": 0.16399532556533813,
      "learning_rate": 6.568634506423757e-05,
      "loss": 0.5231,
      "step": 5363
    },
    {
      "epoch": 1.1026827012025902,
      "grad_norm": 0.13997915387153625,
      "learning_rate": 6.567747435592738e-05,
      "loss": 0.5166,
      "step": 5364
    },
    {
      "epoch": 1.1028882721759687,
      "grad_norm": 0.16803216934204102,
      "learning_rate": 6.56686026289347e-05,
      "loss": 0.5639,
      "step": 5365
    },
    {
      "epoch": 1.1030938431493473,
      "grad_norm": 0.16553597152233124,
      "learning_rate": 6.565972988369658e-05,
      "loss": 0.5339,
      "step": 5366
    },
    {
      "epoch": 1.103299414122726,
      "grad_norm": 0.16284188628196716,
      "learning_rate": 6.565085612065012e-05,
      "loss": 0.5559,
      "step": 5367
    },
    {
      "epoch": 1.1035049850961045,
      "grad_norm": 0.20028123259544373,
      "learning_rate": 6.56419813402325e-05,
      "loss": 0.5803,
      "step": 5368
    },
    {
      "epoch": 1.1037105560694829,
      "grad_norm": 0.1928570717573166,
      "learning_rate": 6.563310554288094e-05,
      "loss": 0.5508,
      "step": 5369
    },
    {
      "epoch": 1.1039161270428615,
      "grad_norm": 0.1684267520904541,
      "learning_rate": 6.562422872903271e-05,
      "loss": 0.5431,
      "step": 5370
    },
    {
      "epoch": 1.10412169801624,
      "grad_norm": 0.13885952532291412,
      "learning_rate": 6.561535089912512e-05,
      "loss": 0.5324,
      "step": 5371
    },
    {
      "epoch": 1.1043272689896186,
      "grad_norm": 0.4177161455154419,
      "learning_rate": 6.560647205359556e-05,
      "loss": 0.5474,
      "step": 5372
    },
    {
      "epoch": 1.1045328399629972,
      "grad_norm": 0.201374813914299,
      "learning_rate": 6.559759219288145e-05,
      "loss": 0.5477,
      "step": 5373
    },
    {
      "epoch": 1.1047384109363758,
      "grad_norm": 0.19698698818683624,
      "learning_rate": 6.558871131742022e-05,
      "loss": 0.5579,
      "step": 5374
    },
    {
      "epoch": 1.1049439819097544,
      "grad_norm": 0.19668418169021606,
      "learning_rate": 6.557982942764941e-05,
      "loss": 0.592,
      "step": 5375
    },
    {
      "epoch": 1.105149552883133,
      "grad_norm": 0.18167072534561157,
      "learning_rate": 6.557094652400662e-05,
      "loss": 0.5506,
      "step": 5376
    },
    {
      "epoch": 1.1053551238565114,
      "grad_norm": 0.15802860260009766,
      "learning_rate": 6.556206260692943e-05,
      "loss": 0.5303,
      "step": 5377
    },
    {
      "epoch": 1.10556069482989,
      "grad_norm": 0.1602732092142105,
      "learning_rate": 6.55531776768555e-05,
      "loss": 0.5617,
      "step": 5378
    },
    {
      "epoch": 1.1057662658032685,
      "grad_norm": 0.20059829950332642,
      "learning_rate": 6.55442917342226e-05,
      "loss": 0.5598,
      "step": 5379
    },
    {
      "epoch": 1.1059718367766471,
      "grad_norm": 0.19668720662593842,
      "learning_rate": 6.553540477946846e-05,
      "loss": 0.5747,
      "step": 5380
    },
    {
      "epoch": 1.1061774077500257,
      "grad_norm": 0.19892635941505432,
      "learning_rate": 6.552651681303091e-05,
      "loss": 0.5767,
      "step": 5381
    },
    {
      "epoch": 1.1063829787234043,
      "grad_norm": 0.2011987864971161,
      "learning_rate": 6.551762783534783e-05,
      "loss": 0.5782,
      "step": 5382
    },
    {
      "epoch": 1.1065885496967829,
      "grad_norm": 0.18638330698013306,
      "learning_rate": 6.550873784685711e-05,
      "loss": 0.5516,
      "step": 5383
    },
    {
      "epoch": 1.1067941206701615,
      "grad_norm": 0.1961633563041687,
      "learning_rate": 6.549984684799675e-05,
      "loss": 0.5462,
      "step": 5384
    },
    {
      "epoch": 1.1069996916435398,
      "grad_norm": 0.18275189399719238,
      "learning_rate": 6.549095483920473e-05,
      "loss": 0.5296,
      "step": 5385
    },
    {
      "epoch": 1.1072052626169184,
      "grad_norm": 0.16657038033008575,
      "learning_rate": 6.548206182091915e-05,
      "loss": 0.5184,
      "step": 5386
    },
    {
      "epoch": 1.107410833590297,
      "grad_norm": 0.16570012271404266,
      "learning_rate": 6.547316779357812e-05,
      "loss": 0.5533,
      "step": 5387
    },
    {
      "epoch": 1.1076164045636756,
      "grad_norm": 0.21582432091236115,
      "learning_rate": 6.546427275761979e-05,
      "loss": 0.5526,
      "step": 5388
    },
    {
      "epoch": 1.1078219755370542,
      "grad_norm": 0.19760467112064362,
      "learning_rate": 6.54553767134824e-05,
      "loss": 0.5558,
      "step": 5389
    },
    {
      "epoch": 1.1080275465104328,
      "grad_norm": 0.19710463285446167,
      "learning_rate": 6.544647966160421e-05,
      "loss": 0.5413,
      "step": 5390
    },
    {
      "epoch": 1.1082331174838114,
      "grad_norm": 0.195608451962471,
      "learning_rate": 6.543758160242353e-05,
      "loss": 0.581,
      "step": 5391
    },
    {
      "epoch": 1.10843868845719,
      "grad_norm": 0.1914118230342865,
      "learning_rate": 6.542868253637873e-05,
      "loss": 0.5282,
      "step": 5392
    },
    {
      "epoch": 1.1086442594305683,
      "grad_norm": 0.16971172392368317,
      "learning_rate": 6.541978246390823e-05,
      "loss": 0.5427,
      "step": 5393
    },
    {
      "epoch": 1.108849830403947,
      "grad_norm": 0.19938012957572937,
      "learning_rate": 6.541088138545049e-05,
      "loss": 0.5378,
      "step": 5394
    },
    {
      "epoch": 1.1090554013773255,
      "grad_norm": 0.2031160593032837,
      "learning_rate": 6.540197930144403e-05,
      "loss": 0.5679,
      "step": 5395
    },
    {
      "epoch": 1.109260972350704,
      "grad_norm": 0.19984202086925507,
      "learning_rate": 6.53930762123274e-05,
      "loss": 0.5565,
      "step": 5396
    },
    {
      "epoch": 1.1094665433240827,
      "grad_norm": 0.17272289097309113,
      "learning_rate": 6.538417211853923e-05,
      "loss": 0.5411,
      "step": 5397
    },
    {
      "epoch": 1.1096721142974613,
      "grad_norm": 0.17256368696689606,
      "learning_rate": 6.537526702051815e-05,
      "loss": 0.5649,
      "step": 5398
    },
    {
      "epoch": 1.1098776852708399,
      "grad_norm": 0.1994207799434662,
      "learning_rate": 6.536636091870292e-05,
      "loss": 0.5794,
      "step": 5399
    },
    {
      "epoch": 1.1100832562442182,
      "grad_norm": 0.18973985314369202,
      "learning_rate": 6.535745381353226e-05,
      "loss": 0.5726,
      "step": 5400
    },
    {
      "epoch": 1.1102888272175968,
      "grad_norm": 0.18999481201171875,
      "learning_rate": 6.534854570544502e-05,
      "loss": 0.582,
      "step": 5401
    },
    {
      "epoch": 1.1104943981909754,
      "grad_norm": 0.19933466613292694,
      "learning_rate": 6.533963659488005e-05,
      "loss": 0.5795,
      "step": 5402
    },
    {
      "epoch": 1.110699969164354,
      "grad_norm": 0.16730111837387085,
      "learning_rate": 6.533072648227623e-05,
      "loss": 0.5339,
      "step": 5403
    },
    {
      "epoch": 1.1109055401377326,
      "grad_norm": 0.13518232107162476,
      "learning_rate": 6.532181536807256e-05,
      "loss": 0.5358,
      "step": 5404
    },
    {
      "epoch": 1.1111111111111112,
      "grad_norm": 0.13433937728405,
      "learning_rate": 6.531290325270802e-05,
      "loss": 0.5316,
      "step": 5405
    },
    {
      "epoch": 1.1113166820844897,
      "grad_norm": 0.16764889657497406,
      "learning_rate": 6.530399013662168e-05,
      "loss": 0.5494,
      "step": 5406
    },
    {
      "epoch": 1.1115222530578683,
      "grad_norm": 0.20979219675064087,
      "learning_rate": 6.529507602025265e-05,
      "loss": 0.5727,
      "step": 5407
    },
    {
      "epoch": 1.1117278240312467,
      "grad_norm": 0.196084126830101,
      "learning_rate": 6.528616090404008e-05,
      "loss": 0.5542,
      "step": 5408
    },
    {
      "epoch": 1.1119333950046253,
      "grad_norm": 0.19469597935676575,
      "learning_rate": 6.527724478842318e-05,
      "loss": 0.5717,
      "step": 5409
    },
    {
      "epoch": 1.1121389659780039,
      "grad_norm": 0.19987237453460693,
      "learning_rate": 6.526832767384121e-05,
      "loss": 0.5782,
      "step": 5410
    },
    {
      "epoch": 1.1123445369513825,
      "grad_norm": 0.20070701837539673,
      "learning_rate": 6.525940956073347e-05,
      "loss": 0.5578,
      "step": 5411
    },
    {
      "epoch": 1.112550107924761,
      "grad_norm": 0.20500093698501587,
      "learning_rate": 6.52504904495393e-05,
      "loss": 0.566,
      "step": 5412
    },
    {
      "epoch": 1.1127556788981396,
      "grad_norm": 0.17960810661315918,
      "learning_rate": 6.524157034069813e-05,
      "loss": 0.5331,
      "step": 5413
    },
    {
      "epoch": 1.1129612498715182,
      "grad_norm": 0.17575471103191376,
      "learning_rate": 6.523264923464939e-05,
      "loss": 0.5575,
      "step": 5414
    },
    {
      "epoch": 1.1131668208448966,
      "grad_norm": 0.2222844958305359,
      "learning_rate": 6.522372713183259e-05,
      "loss": 0.5928,
      "step": 5415
    },
    {
      "epoch": 1.1133723918182752,
      "grad_norm": 0.19698132574558258,
      "learning_rate": 6.521480403268727e-05,
      "loss": 0.5484,
      "step": 5416
    },
    {
      "epoch": 1.1135779627916538,
      "grad_norm": 0.16682282090187073,
      "learning_rate": 6.520587993765305e-05,
      "loss": 0.5474,
      "step": 5417
    },
    {
      "epoch": 1.1137835337650324,
      "grad_norm": 0.5412604808807373,
      "learning_rate": 6.519695484716958e-05,
      "loss": 0.5692,
      "step": 5418
    },
    {
      "epoch": 1.113989104738411,
      "grad_norm": 0.1983635425567627,
      "learning_rate": 6.518802876167654e-05,
      "loss": 0.5231,
      "step": 5419
    },
    {
      "epoch": 1.1141946757117895,
      "grad_norm": 0.1765107810497284,
      "learning_rate": 6.517910168161367e-05,
      "loss": 0.5307,
      "step": 5420
    },
    {
      "epoch": 1.1144002466851681,
      "grad_norm": 0.17099499702453613,
      "learning_rate": 6.517017360742077e-05,
      "loss": 0.5787,
      "step": 5421
    },
    {
      "epoch": 1.1146058176585467,
      "grad_norm": 0.174418643116951,
      "learning_rate": 6.51612445395377e-05,
      "loss": 0.524,
      "step": 5422
    },
    {
      "epoch": 1.114811388631925,
      "grad_norm": 0.1620262712240219,
      "learning_rate": 6.515231447840435e-05,
      "loss": 0.5454,
      "step": 5423
    },
    {
      "epoch": 1.1150169596053037,
      "grad_norm": 0.20404332876205444,
      "learning_rate": 6.514338342446066e-05,
      "loss": 0.5735,
      "step": 5424
    },
    {
      "epoch": 1.1152225305786823,
      "grad_norm": 0.19146005809307098,
      "learning_rate": 6.513445137814661e-05,
      "loss": 0.5627,
      "step": 5425
    },
    {
      "epoch": 1.1154281015520608,
      "grad_norm": 0.1799180954694748,
      "learning_rate": 6.512551833990226e-05,
      "loss": 0.5394,
      "step": 5426
    },
    {
      "epoch": 1.1156336725254394,
      "grad_norm": 0.14841869473457336,
      "learning_rate": 6.511658431016768e-05,
      "loss": 0.5174,
      "step": 5427
    },
    {
      "epoch": 1.115839243498818,
      "grad_norm": 0.1631341278553009,
      "learning_rate": 6.510764928938301e-05,
      "loss": 0.5401,
      "step": 5428
    },
    {
      "epoch": 1.1160448144721966,
      "grad_norm": 0.202928826212883,
      "learning_rate": 6.509871327798846e-05,
      "loss": 0.5576,
      "step": 5429
    },
    {
      "epoch": 1.116250385445575,
      "grad_norm": 0.20302633941173553,
      "learning_rate": 6.508977627642423e-05,
      "loss": 0.5684,
      "step": 5430
    },
    {
      "epoch": 1.1164559564189536,
      "grad_norm": 0.2004452496767044,
      "learning_rate": 6.508083828513062e-05,
      "loss": 0.5695,
      "step": 5431
    },
    {
      "epoch": 1.1166615273923322,
      "grad_norm": 0.21618925034999847,
      "learning_rate": 6.507189930454797e-05,
      "loss": 0.5447,
      "step": 5432
    },
    {
      "epoch": 1.1168670983657107,
      "grad_norm": 0.41041454672813416,
      "learning_rate": 6.506295933511667e-05,
      "loss": 0.5416,
      "step": 5433
    },
    {
      "epoch": 1.1170726693390893,
      "grad_norm": 0.17429187893867493,
      "learning_rate": 6.505401837727712e-05,
      "loss": 0.5784,
      "step": 5434
    },
    {
      "epoch": 1.117278240312468,
      "grad_norm": 0.20878252387046814,
      "learning_rate": 6.504507643146983e-05,
      "loss": 0.5594,
      "step": 5435
    },
    {
      "epoch": 1.1174838112858465,
      "grad_norm": 0.21358852088451385,
      "learning_rate": 6.503613349813532e-05,
      "loss": 0.5902,
      "step": 5436
    },
    {
      "epoch": 1.117689382259225,
      "grad_norm": 0.1675240397453308,
      "learning_rate": 6.502718957771415e-05,
      "loss": 0.5253,
      "step": 5437
    },
    {
      "epoch": 1.1178949532326035,
      "grad_norm": 0.1364658623933792,
      "learning_rate": 6.501824467064695e-05,
      "loss": 0.5097,
      "step": 5438
    },
    {
      "epoch": 1.118100524205982,
      "grad_norm": 0.170567587018013,
      "learning_rate": 6.500929877737442e-05,
      "loss": 0.5704,
      "step": 5439
    },
    {
      "epoch": 1.1183060951793606,
      "grad_norm": 0.2064054310321808,
      "learning_rate": 6.500035189833725e-05,
      "loss": 0.5945,
      "step": 5440
    },
    {
      "epoch": 1.1185116661527392,
      "grad_norm": 0.19481217861175537,
      "learning_rate": 6.499140403397623e-05,
      "loss": 0.5454,
      "step": 5441
    },
    {
      "epoch": 1.1187172371261178,
      "grad_norm": 0.19907177984714508,
      "learning_rate": 6.498245518473216e-05,
      "loss": 0.5479,
      "step": 5442
    },
    {
      "epoch": 1.1189228080994964,
      "grad_norm": 0.20047436654567719,
      "learning_rate": 6.497350535104592e-05,
      "loss": 0.5321,
      "step": 5443
    },
    {
      "epoch": 1.119128379072875,
      "grad_norm": 0.19042466580867767,
      "learning_rate": 6.496455453335842e-05,
      "loss": 0.5252,
      "step": 5444
    },
    {
      "epoch": 1.1193339500462534,
      "grad_norm": 0.17058107256889343,
      "learning_rate": 6.495560273211066e-05,
      "loss": 0.5588,
      "step": 5445
    },
    {
      "epoch": 1.119539521019632,
      "grad_norm": 0.21633589267730713,
      "learning_rate": 6.494664994774363e-05,
      "loss": 0.5613,
      "step": 5446
    },
    {
      "epoch": 1.1197450919930105,
      "grad_norm": 0.18286935985088348,
      "learning_rate": 6.493769618069835e-05,
      "loss": 0.5415,
      "step": 5447
    },
    {
      "epoch": 1.1199506629663891,
      "grad_norm": 0.17194852232933044,
      "learning_rate": 6.492874143141599e-05,
      "loss": 0.5713,
      "step": 5448
    },
    {
      "epoch": 1.1201562339397677,
      "grad_norm": 0.1954166442155838,
      "learning_rate": 6.49197857003377e-05,
      "loss": 0.5635,
      "step": 5449
    },
    {
      "epoch": 1.1203618049131463,
      "grad_norm": 0.22501884400844574,
      "learning_rate": 6.491082898790465e-05,
      "loss": 0.5615,
      "step": 5450
    },
    {
      "epoch": 1.1205673758865249,
      "grad_norm": 0.19493956863880157,
      "learning_rate": 6.490187129455813e-05,
      "loss": 0.5409,
      "step": 5451
    },
    {
      "epoch": 1.1207729468599035,
      "grad_norm": 0.19634434580802917,
      "learning_rate": 6.489291262073942e-05,
      "loss": 0.5698,
      "step": 5452
    },
    {
      "epoch": 1.1209785178332818,
      "grad_norm": 0.22804930806159973,
      "learning_rate": 6.48839529668899e-05,
      "loss": 0.5685,
      "step": 5453
    },
    {
      "epoch": 1.1211840888066604,
      "grad_norm": 0.18841257691383362,
      "learning_rate": 6.487499233345094e-05,
      "loss": 0.5362,
      "step": 5454
    },
    {
      "epoch": 1.121389659780039,
      "grad_norm": 0.16956526041030884,
      "learning_rate": 6.4866030720864e-05,
      "loss": 0.5396,
      "step": 5455
    },
    {
      "epoch": 1.1215952307534176,
      "grad_norm": 0.1671314686536789,
      "learning_rate": 6.48570681295706e-05,
      "loss": 0.5741,
      "step": 5456
    },
    {
      "epoch": 1.1218008017267962,
      "grad_norm": 0.16687725484371185,
      "learning_rate": 6.484810456001226e-05,
      "loss": 0.5651,
      "step": 5457
    },
    {
      "epoch": 1.1220063727001748,
      "grad_norm": 0.16390031576156616,
      "learning_rate": 6.483914001263058e-05,
      "loss": 0.5638,
      "step": 5458
    },
    {
      "epoch": 1.1222119436735534,
      "grad_norm": 0.16974018514156342,
      "learning_rate": 6.483017448786719e-05,
      "loss": 0.5198,
      "step": 5459
    },
    {
      "epoch": 1.1224175146469317,
      "grad_norm": 0.17362362146377563,
      "learning_rate": 6.48212079861638e-05,
      "loss": 0.5437,
      "step": 5460
    },
    {
      "epoch": 1.1226230856203103,
      "grad_norm": 0.19400741159915924,
      "learning_rate": 6.481224050796213e-05,
      "loss": 0.5481,
      "step": 5461
    },
    {
      "epoch": 1.122828656593689,
      "grad_norm": 0.1908549964427948,
      "learning_rate": 6.480327205370397e-05,
      "loss": 0.5593,
      "step": 5462
    },
    {
      "epoch": 1.1230342275670675,
      "grad_norm": 0.1656675636768341,
      "learning_rate": 6.479430262383116e-05,
      "loss": 0.5369,
      "step": 5463
    },
    {
      "epoch": 1.123239798540446,
      "grad_norm": 0.16304363310337067,
      "learning_rate": 6.478533221878556e-05,
      "loss": 0.5697,
      "step": 5464
    },
    {
      "epoch": 1.1234453695138247,
      "grad_norm": 0.19559811055660248,
      "learning_rate": 6.477636083900914e-05,
      "loss": 0.5856,
      "step": 5465
    },
    {
      "epoch": 1.1236509404872033,
      "grad_norm": 0.17000918090343475,
      "learning_rate": 6.476738848494385e-05,
      "loss": 0.5545,
      "step": 5466
    },
    {
      "epoch": 1.1238565114605819,
      "grad_norm": 0.1553100198507309,
      "learning_rate": 6.475841515703172e-05,
      "loss": 0.5531,
      "step": 5467
    },
    {
      "epoch": 1.1240620824339604,
      "grad_norm": 0.1977023035287857,
      "learning_rate": 6.474944085571482e-05,
      "loss": 0.5735,
      "step": 5468
    },
    {
      "epoch": 1.1242676534073388,
      "grad_norm": 0.1898386925458908,
      "learning_rate": 6.47404655814353e-05,
      "loss": 0.5487,
      "step": 5469
    },
    {
      "epoch": 1.1244732243807174,
      "grad_norm": 0.18860745429992676,
      "learning_rate": 6.473148933463529e-05,
      "loss": 0.5634,
      "step": 5470
    },
    {
      "epoch": 1.124678795354096,
      "grad_norm": 0.1715293824672699,
      "learning_rate": 6.472251211575704e-05,
      "loss": 0.546,
      "step": 5471
    },
    {
      "epoch": 1.1248843663274746,
      "grad_norm": 0.13662804663181305,
      "learning_rate": 6.471353392524277e-05,
      "loss": 0.5186,
      "step": 5472
    },
    {
      "epoch": 1.1250899373008532,
      "grad_norm": 0.16437150537967682,
      "learning_rate": 6.470455476353486e-05,
      "loss": 0.5628,
      "step": 5473
    },
    {
      "epoch": 1.1252955082742317,
      "grad_norm": 0.20408563315868378,
      "learning_rate": 6.469557463107562e-05,
      "loss": 0.5723,
      "step": 5474
    },
    {
      "epoch": 1.1255010792476101,
      "grad_norm": 0.19299282133579254,
      "learning_rate": 6.468659352830746e-05,
      "loss": 0.5923,
      "step": 5475
    },
    {
      "epoch": 1.1257066502209887,
      "grad_norm": 0.18226416409015656,
      "learning_rate": 6.467761145567286e-05,
      "loss": 0.5624,
      "step": 5476
    },
    {
      "epoch": 1.1259122211943673,
      "grad_norm": 0.18840613961219788,
      "learning_rate": 6.466862841361432e-05,
      "loss": 0.5697,
      "step": 5477
    },
    {
      "epoch": 1.1261177921677459,
      "grad_norm": 0.1877157837152481,
      "learning_rate": 6.465964440257438e-05,
      "loss": 0.5625,
      "step": 5478
    },
    {
      "epoch": 1.1263233631411245,
      "grad_norm": 0.25759997963905334,
      "learning_rate": 6.465065942299567e-05,
      "loss": 0.5425,
      "step": 5479
    },
    {
      "epoch": 1.126528934114503,
      "grad_norm": 0.19235903024673462,
      "learning_rate": 6.46416734753208e-05,
      "loss": 0.5562,
      "step": 5480
    },
    {
      "epoch": 1.1267345050878816,
      "grad_norm": 0.20213893055915833,
      "learning_rate": 6.46326865599925e-05,
      "loss": 0.5467,
      "step": 5481
    },
    {
      "epoch": 1.1269400760612602,
      "grad_norm": 0.19602036476135254,
      "learning_rate": 6.462369867745348e-05,
      "loss": 0.5814,
      "step": 5482
    },
    {
      "epoch": 1.1271456470346388,
      "grad_norm": 0.19586962461471558,
      "learning_rate": 6.461470982814657e-05,
      "loss": 0.5604,
      "step": 5483
    },
    {
      "epoch": 1.1273512180080172,
      "grad_norm": 0.18865470588207245,
      "learning_rate": 6.460572001251456e-05,
      "loss": 0.5345,
      "step": 5484
    },
    {
      "epoch": 1.1275567889813958,
      "grad_norm": 0.19333775341510773,
      "learning_rate": 6.459672923100036e-05,
      "loss": 0.572,
      "step": 5485
    },
    {
      "epoch": 1.1277623599547744,
      "grad_norm": 0.1982879638671875,
      "learning_rate": 6.458773748404693e-05,
      "loss": 0.593,
      "step": 5486
    },
    {
      "epoch": 1.127967930928153,
      "grad_norm": 0.19559934735298157,
      "learning_rate": 6.457874477209722e-05,
      "loss": 0.5625,
      "step": 5487
    },
    {
      "epoch": 1.1281735019015315,
      "grad_norm": 0.190285325050354,
      "learning_rate": 6.456975109559425e-05,
      "loss": 0.5579,
      "step": 5488
    },
    {
      "epoch": 1.1283790728749101,
      "grad_norm": 0.1935376673936844,
      "learning_rate": 6.456075645498113e-05,
      "loss": 0.5611,
      "step": 5489
    },
    {
      "epoch": 1.1285846438482887,
      "grad_norm": 0.1908402442932129,
      "learning_rate": 6.455176085070095e-05,
      "loss": 0.5556,
      "step": 5490
    },
    {
      "epoch": 1.128790214821667,
      "grad_norm": 0.1894407868385315,
      "learning_rate": 6.45427642831969e-05,
      "loss": 0.5655,
      "step": 5491
    },
    {
      "epoch": 1.1289957857950457,
      "grad_norm": 0.16991651058197021,
      "learning_rate": 6.453376675291221e-05,
      "loss": 0.5269,
      "step": 5492
    },
    {
      "epoch": 1.1292013567684243,
      "grad_norm": 0.14893554151058197,
      "learning_rate": 6.452476826029012e-05,
      "loss": 0.5192,
      "step": 5493
    },
    {
      "epoch": 1.1294069277418028,
      "grad_norm": 0.15781262516975403,
      "learning_rate": 6.451576880577397e-05,
      "loss": 0.5827,
      "step": 5494
    },
    {
      "epoch": 1.1296124987151814,
      "grad_norm": 0.16692712903022766,
      "learning_rate": 6.45067683898071e-05,
      "loss": 0.5338,
      "step": 5495
    },
    {
      "epoch": 1.12981806968856,
      "grad_norm": 0.18039274215698242,
      "learning_rate": 6.449776701283292e-05,
      "loss": 0.5598,
      "step": 5496
    },
    {
      "epoch": 1.1300236406619386,
      "grad_norm": 0.20324920117855072,
      "learning_rate": 6.448876467529488e-05,
      "loss": 0.5711,
      "step": 5497
    },
    {
      "epoch": 1.1302292116353172,
      "grad_norm": 0.19356949627399445,
      "learning_rate": 6.447976137763652e-05,
      "loss": 0.5498,
      "step": 5498
    },
    {
      "epoch": 1.1304347826086956,
      "grad_norm": 0.19591811299324036,
      "learning_rate": 6.447075712030135e-05,
      "loss": 0.5585,
      "step": 5499
    },
    {
      "epoch": 1.1306403535820742,
      "grad_norm": 0.18893134593963623,
      "learning_rate": 6.4461751903733e-05,
      "loss": 0.5425,
      "step": 5500
    },
    {
      "epoch": 1.1308459245554527,
      "grad_norm": 0.1979568600654602,
      "learning_rate": 6.445274572837509e-05,
      "loss": 0.5395,
      "step": 5501
    },
    {
      "epoch": 1.1310514955288313,
      "grad_norm": 0.20097365975379944,
      "learning_rate": 6.444373859467131e-05,
      "loss": 0.5571,
      "step": 5502
    },
    {
      "epoch": 1.13125706650221,
      "grad_norm": 0.1974884420633316,
      "learning_rate": 6.443473050306541e-05,
      "loss": 0.5778,
      "step": 5503
    },
    {
      "epoch": 1.1314626374755885,
      "grad_norm": 0.19491606950759888,
      "learning_rate": 6.442572145400119e-05,
      "loss": 0.5408,
      "step": 5504
    },
    {
      "epoch": 1.131668208448967,
      "grad_norm": 0.2038283497095108,
      "learning_rate": 6.441671144792245e-05,
      "loss": 0.5597,
      "step": 5505
    },
    {
      "epoch": 1.1318737794223455,
      "grad_norm": 0.2011345475912094,
      "learning_rate": 6.440770048527311e-05,
      "loss": 0.5645,
      "step": 5506
    },
    {
      "epoch": 1.132079350395724,
      "grad_norm": 0.20046375691890717,
      "learning_rate": 6.439868856649706e-05,
      "loss": 0.565,
      "step": 5507
    },
    {
      "epoch": 1.1322849213691026,
      "grad_norm": 0.19624361395835876,
      "learning_rate": 6.438967569203831e-05,
      "loss": 0.5556,
      "step": 5508
    },
    {
      "epoch": 1.1324904923424812,
      "grad_norm": 0.19601401686668396,
      "learning_rate": 6.438066186234086e-05,
      "loss": 0.5608,
      "step": 5509
    },
    {
      "epoch": 1.1326960633158598,
      "grad_norm": 0.19871017336845398,
      "learning_rate": 6.437164707784877e-05,
      "loss": 0.5616,
      "step": 5510
    },
    {
      "epoch": 1.1329016342892384,
      "grad_norm": 0.19127802550792694,
      "learning_rate": 6.43626313390062e-05,
      "loss": 0.5778,
      "step": 5511
    },
    {
      "epoch": 1.133107205262617,
      "grad_norm": 0.19276481866836548,
      "learning_rate": 6.435361464625726e-05,
      "loss": 0.5488,
      "step": 5512
    },
    {
      "epoch": 1.1333127762359956,
      "grad_norm": 0.19331035017967224,
      "learning_rate": 6.434459700004619e-05,
      "loss": 0.5149,
      "step": 5513
    },
    {
      "epoch": 1.133518347209374,
      "grad_norm": 0.19106508791446686,
      "learning_rate": 6.433557840081726e-05,
      "loss": 0.5277,
      "step": 5514
    },
    {
      "epoch": 1.1337239181827525,
      "grad_norm": 0.1550726294517517,
      "learning_rate": 6.432655884901473e-05,
      "loss": 0.5596,
      "step": 5515
    },
    {
      "epoch": 1.1339294891561311,
      "grad_norm": 0.20075179636478424,
      "learning_rate": 6.431753834508299e-05,
      "loss": 0.5461,
      "step": 5516
    },
    {
      "epoch": 1.1341350601295097,
      "grad_norm": 0.20653320848941803,
      "learning_rate": 6.430851688946643e-05,
      "loss": 0.6038,
      "step": 5517
    },
    {
      "epoch": 1.1343406311028883,
      "grad_norm": 0.19482316076755524,
      "learning_rate": 6.42994944826095e-05,
      "loss": 0.5716,
      "step": 5518
    },
    {
      "epoch": 1.1345462020762669,
      "grad_norm": 0.18027710914611816,
      "learning_rate": 6.429047112495667e-05,
      "loss": 0.5531,
      "step": 5519
    },
    {
      "epoch": 1.1347517730496455,
      "grad_norm": 0.15849310159683228,
      "learning_rate": 6.428144681695247e-05,
      "loss": 0.5674,
      "step": 5520
    },
    {
      "epoch": 1.1349573440230238,
      "grad_norm": 0.19099898636341095,
      "learning_rate": 6.427242155904154e-05,
      "loss": 0.5405,
      "step": 5521
    },
    {
      "epoch": 1.1351629149964024,
      "grad_norm": 0.2118232399225235,
      "learning_rate": 6.426339535166847e-05,
      "loss": 0.5569,
      "step": 5522
    },
    {
      "epoch": 1.135368485969781,
      "grad_norm": 0.19552506506443024,
      "learning_rate": 6.425436819527792e-05,
      "loss": 0.5575,
      "step": 5523
    },
    {
      "epoch": 1.1355740569431596,
      "grad_norm": 0.19680903851985931,
      "learning_rate": 6.424534009031468e-05,
      "loss": 0.5644,
      "step": 5524
    },
    {
      "epoch": 1.1357796279165382,
      "grad_norm": 0.19150924682617188,
      "learning_rate": 6.423631103722348e-05,
      "loss": 0.5453,
      "step": 5525
    },
    {
      "epoch": 1.1359851988899168,
      "grad_norm": 0.19185394048690796,
      "learning_rate": 6.422728103644915e-05,
      "loss": 0.5408,
      "step": 5526
    },
    {
      "epoch": 1.1361907698632954,
      "grad_norm": 0.19700084626674652,
      "learning_rate": 6.421825008843652e-05,
      "loss": 0.5664,
      "step": 5527
    },
    {
      "epoch": 1.136396340836674,
      "grad_norm": 0.19622080028057098,
      "learning_rate": 6.420921819363057e-05,
      "loss": 0.5848,
      "step": 5528
    },
    {
      "epoch": 1.1366019118100525,
      "grad_norm": 0.19052082300186157,
      "learning_rate": 6.420018535247621e-05,
      "loss": 0.5607,
      "step": 5529
    },
    {
      "epoch": 1.136807482783431,
      "grad_norm": 0.18648898601531982,
      "learning_rate": 6.419115156541846e-05,
      "loss": 0.5627,
      "step": 5530
    },
    {
      "epoch": 1.1370130537568095,
      "grad_norm": 0.20063170790672302,
      "learning_rate": 6.418211683290235e-05,
      "loss": 0.5857,
      "step": 5531
    },
    {
      "epoch": 1.137218624730188,
      "grad_norm": 0.18962214887142181,
      "learning_rate": 6.417308115537303e-05,
      "loss": 0.5854,
      "step": 5532
    },
    {
      "epoch": 1.1374241957035667,
      "grad_norm": 0.20246468484401703,
      "learning_rate": 6.41640445332756e-05,
      "loss": 0.5883,
      "step": 5533
    },
    {
      "epoch": 1.1376297666769453,
      "grad_norm": 0.18931740522384644,
      "learning_rate": 6.415500696705528e-05,
      "loss": 0.5262,
      "step": 5534
    },
    {
      "epoch": 1.1378353376503239,
      "grad_norm": 0.19331716001033783,
      "learning_rate": 6.41459684571573e-05,
      "loss": 0.5534,
      "step": 5535
    },
    {
      "epoch": 1.1380409086237022,
      "grad_norm": 0.19788740575313568,
      "learning_rate": 6.413692900402693e-05,
      "loss": 0.5702,
      "step": 5536
    },
    {
      "epoch": 1.1382464795970808,
      "grad_norm": 0.19547824561595917,
      "learning_rate": 6.41278886081095e-05,
      "loss": 0.5647,
      "step": 5537
    },
    {
      "epoch": 1.1384520505704594,
      "grad_norm": 0.1888136863708496,
      "learning_rate": 6.411884726985043e-05,
      "loss": 0.5445,
      "step": 5538
    },
    {
      "epoch": 1.138657621543838,
      "grad_norm": 0.19497732818126678,
      "learning_rate": 6.410980498969512e-05,
      "loss": 0.5777,
      "step": 5539
    },
    {
      "epoch": 1.1388631925172166,
      "grad_norm": 0.18465173244476318,
      "learning_rate": 6.410076176808901e-05,
      "loss": 0.5299,
      "step": 5540
    },
    {
      "epoch": 1.1390687634905952,
      "grad_norm": 0.1675313413143158,
      "learning_rate": 6.409171760547765e-05,
      "loss": 0.5722,
      "step": 5541
    },
    {
      "epoch": 1.1392743344639737,
      "grad_norm": 0.2085336148738861,
      "learning_rate": 6.408267250230661e-05,
      "loss": 0.5745,
      "step": 5542
    },
    {
      "epoch": 1.1394799054373523,
      "grad_norm": 0.19899022579193115,
      "learning_rate": 6.407362645902148e-05,
      "loss": 0.5709,
      "step": 5543
    },
    {
      "epoch": 1.139685476410731,
      "grad_norm": 0.1954008936882019,
      "learning_rate": 6.406457947606792e-05,
      "loss": 0.5704,
      "step": 5544
    },
    {
      "epoch": 1.1398910473841093,
      "grad_norm": 0.17613859474658966,
      "learning_rate": 6.405553155389165e-05,
      "loss": 0.5395,
      "step": 5545
    },
    {
      "epoch": 1.1400966183574879,
      "grad_norm": 0.1824086457490921,
      "learning_rate": 6.40464826929384e-05,
      "loss": 0.5558,
      "step": 5546
    },
    {
      "epoch": 1.1403021893308665,
      "grad_norm": 0.20690536499023438,
      "learning_rate": 6.403743289365398e-05,
      "loss": 0.5626,
      "step": 5547
    },
    {
      "epoch": 1.140507760304245,
      "grad_norm": 0.20793819427490234,
      "learning_rate": 6.40283821564842e-05,
      "loss": 0.5819,
      "step": 5548
    },
    {
      "epoch": 1.1407133312776236,
      "grad_norm": 0.19500964879989624,
      "learning_rate": 6.401933048187499e-05,
      "loss": 0.5696,
      "step": 5549
    },
    {
      "epoch": 1.1409189022510022,
      "grad_norm": 0.19967152178287506,
      "learning_rate": 6.401027787027225e-05,
      "loss": 0.5567,
      "step": 5550
    },
    {
      "epoch": 1.1411244732243806,
      "grad_norm": 0.19124870002269745,
      "learning_rate": 6.400122432212198e-05,
      "loss": 0.5276,
      "step": 5551
    },
    {
      "epoch": 1.1413300441977592,
      "grad_norm": 0.1926882117986679,
      "learning_rate": 6.399216983787019e-05,
      "loss": 0.5785,
      "step": 5552
    },
    {
      "epoch": 1.1415356151711378,
      "grad_norm": 0.1873985081911087,
      "learning_rate": 6.398311441796297e-05,
      "loss": 0.5496,
      "step": 5553
    },
    {
      "epoch": 1.1417411861445164,
      "grad_norm": 0.164115771651268,
      "learning_rate": 6.397405806284642e-05,
      "loss": 0.5343,
      "step": 5554
    },
    {
      "epoch": 1.141946757117895,
      "grad_norm": 0.16665267944335938,
      "learning_rate": 6.396500077296673e-05,
      "loss": 0.5769,
      "step": 5555
    },
    {
      "epoch": 1.1421523280912735,
      "grad_norm": 0.1954329013824463,
      "learning_rate": 6.395594254877009e-05,
      "loss": 0.5652,
      "step": 5556
    },
    {
      "epoch": 1.1423578990646521,
      "grad_norm": 0.19422973692417145,
      "learning_rate": 6.394688339070277e-05,
      "loss": 0.5596,
      "step": 5557
    },
    {
      "epoch": 1.1425634700380307,
      "grad_norm": 0.19732142984867096,
      "learning_rate": 6.393782329921104e-05,
      "loss": 0.5887,
      "step": 5558
    },
    {
      "epoch": 1.1427690410114093,
      "grad_norm": 0.195445254445076,
      "learning_rate": 6.392876227474128e-05,
      "loss": 0.5737,
      "step": 5559
    },
    {
      "epoch": 1.1429746119847877,
      "grad_norm": 0.18976660072803497,
      "learning_rate": 6.391970031773988e-05,
      "loss": 0.5693,
      "step": 5560
    },
    {
      "epoch": 1.1431801829581663,
      "grad_norm": 0.18721553683280945,
      "learning_rate": 6.391063742865327e-05,
      "loss": 0.5393,
      "step": 5561
    },
    {
      "epoch": 1.1433857539315448,
      "grad_norm": 0.19081273674964905,
      "learning_rate": 6.390157360792794e-05,
      "loss": 0.5565,
      "step": 5562
    },
    {
      "epoch": 1.1435913249049234,
      "grad_norm": 0.19391131401062012,
      "learning_rate": 6.389250885601043e-05,
      "loss": 0.5571,
      "step": 5563
    },
    {
      "epoch": 1.143796895878302,
      "grad_norm": 0.18970650434494019,
      "learning_rate": 6.388344317334732e-05,
      "loss": 0.571,
      "step": 5564
    },
    {
      "epoch": 1.1440024668516806,
      "grad_norm": 0.1937200129032135,
      "learning_rate": 6.38743765603852e-05,
      "loss": 0.5619,
      "step": 5565
    },
    {
      "epoch": 1.1442080378250592,
      "grad_norm": 0.189813494682312,
      "learning_rate": 6.386530901757078e-05,
      "loss": 0.562,
      "step": 5566
    },
    {
      "epoch": 1.1444136087984376,
      "grad_norm": 0.19848157465457916,
      "learning_rate": 6.385624054535078e-05,
      "loss": 0.5776,
      "step": 5567
    },
    {
      "epoch": 1.1446191797718162,
      "grad_norm": 0.19412924349308014,
      "learning_rate": 6.384717114417191e-05,
      "loss": 0.5637,
      "step": 5568
    },
    {
      "epoch": 1.1448247507451947,
      "grad_norm": 0.20294548571109772,
      "learning_rate": 6.383810081448103e-05,
      "loss": 0.5626,
      "step": 5569
    },
    {
      "epoch": 1.1450303217185733,
      "grad_norm": 0.17422319948673248,
      "learning_rate": 6.382902955672496e-05,
      "loss": 0.5506,
      "step": 5570
    },
    {
      "epoch": 1.145235892691952,
      "grad_norm": 0.15921704471111298,
      "learning_rate": 6.381995737135062e-05,
      "loss": 0.5882,
      "step": 5571
    },
    {
      "epoch": 1.1454414636653305,
      "grad_norm": 0.20002704858779907,
      "learning_rate": 6.381088425880495e-05,
      "loss": 0.5677,
      "step": 5572
    },
    {
      "epoch": 1.145647034638709,
      "grad_norm": 0.1957893818616867,
      "learning_rate": 6.38018102195349e-05,
      "loss": 0.5629,
      "step": 5573
    },
    {
      "epoch": 1.1458526056120877,
      "grad_norm": 0.19180312752723694,
      "learning_rate": 6.379273525398758e-05,
      "loss": 0.5645,
      "step": 5574
    },
    {
      "epoch": 1.146058176585466,
      "grad_norm": 0.18908941745758057,
      "learning_rate": 6.378365936261e-05,
      "loss": 0.558,
      "step": 5575
    },
    {
      "epoch": 1.1462637475588446,
      "grad_norm": 0.19693338871002197,
      "learning_rate": 6.377458254584934e-05,
      "loss": 0.5741,
      "step": 5576
    },
    {
      "epoch": 1.1464693185322232,
      "grad_norm": 0.190039724111557,
      "learning_rate": 6.376550480415275e-05,
      "loss": 0.5431,
      "step": 5577
    },
    {
      "epoch": 1.1466748895056018,
      "grad_norm": 0.1961604356765747,
      "learning_rate": 6.375642613796745e-05,
      "loss": 0.563,
      "step": 5578
    },
    {
      "epoch": 1.1468804604789804,
      "grad_norm": 0.19689500331878662,
      "learning_rate": 6.374734654774068e-05,
      "loss": 0.5579,
      "step": 5579
    },
    {
      "epoch": 1.147086031452359,
      "grad_norm": 0.1831909865140915,
      "learning_rate": 6.373826603391979e-05,
      "loss": 0.5688,
      "step": 5580
    },
    {
      "epoch": 1.1472916024257376,
      "grad_norm": 0.18834874033927917,
      "learning_rate": 6.372918459695212e-05,
      "loss": 0.55,
      "step": 5581
    },
    {
      "epoch": 1.147497173399116,
      "grad_norm": 0.19605682790279388,
      "learning_rate": 6.372010223728504e-05,
      "loss": 0.5774,
      "step": 5582
    },
    {
      "epoch": 1.1477027443724945,
      "grad_norm": 0.19253866374492645,
      "learning_rate": 6.371101895536605e-05,
      "loss": 0.589,
      "step": 5583
    },
    {
      "epoch": 1.1479083153458731,
      "grad_norm": 0.18777357041835785,
      "learning_rate": 6.370193475164258e-05,
      "loss": 0.5665,
      "step": 5584
    },
    {
      "epoch": 1.1481138863192517,
      "grad_norm": 0.1673029363155365,
      "learning_rate": 6.36928496265622e-05,
      "loss": 0.5298,
      "step": 5585
    },
    {
      "epoch": 1.1483194572926303,
      "grad_norm": 0.15895609557628632,
      "learning_rate": 6.36837635805725e-05,
      "loss": 0.5323,
      "step": 5586
    },
    {
      "epoch": 1.1485250282660089,
      "grad_norm": 0.19794027507305145,
      "learning_rate": 6.367467661412111e-05,
      "loss": 0.5677,
      "step": 5587
    },
    {
      "epoch": 1.1487305992393875,
      "grad_norm": 0.20095770061016083,
      "learning_rate": 6.366558872765569e-05,
      "loss": 0.5562,
      "step": 5588
    },
    {
      "epoch": 1.148936170212766,
      "grad_norm": 0.16546010971069336,
      "learning_rate": 6.365649992162393e-05,
      "loss": 0.5212,
      "step": 5589
    },
    {
      "epoch": 1.1491417411861444,
      "grad_norm": 0.16688905656337738,
      "learning_rate": 6.364741019647363e-05,
      "loss": 0.5421,
      "step": 5590
    },
    {
      "epoch": 1.149347312159523,
      "grad_norm": 0.12764035165309906,
      "learning_rate": 6.36383195526526e-05,
      "loss": 0.5154,
      "step": 5591
    },
    {
      "epoch": 1.1495528831329016,
      "grad_norm": 0.16854409873485565,
      "learning_rate": 6.362922799060866e-05,
      "loss": 0.5689,
      "step": 5592
    },
    {
      "epoch": 1.1497584541062802,
      "grad_norm": 0.19557413458824158,
      "learning_rate": 6.362013551078974e-05,
      "loss": 0.5581,
      "step": 5593
    },
    {
      "epoch": 1.1499640250796588,
      "grad_norm": 0.19200196862220764,
      "learning_rate": 6.361104211364377e-05,
      "loss": 0.5744,
      "step": 5594
    },
    {
      "epoch": 1.1501695960530374,
      "grad_norm": 0.19255445897579193,
      "learning_rate": 6.360194779961875e-05,
      "loss": 0.5677,
      "step": 5595
    },
    {
      "epoch": 1.150375167026416,
      "grad_norm": 0.1860598772764206,
      "learning_rate": 6.359285256916269e-05,
      "loss": 0.5239,
      "step": 5596
    },
    {
      "epoch": 1.1505807379997943,
      "grad_norm": 0.18977835774421692,
      "learning_rate": 6.358375642272371e-05,
      "loss": 0.5502,
      "step": 5597
    },
    {
      "epoch": 1.150786308973173,
      "grad_norm": 0.18825951218605042,
      "learning_rate": 6.35746593607499e-05,
      "loss": 0.5701,
      "step": 5598
    },
    {
      "epoch": 1.1509918799465515,
      "grad_norm": 0.1946858912706375,
      "learning_rate": 6.356556138368945e-05,
      "loss": 0.5735,
      "step": 5599
    },
    {
      "epoch": 1.15119745091993,
      "grad_norm": 0.1934114545583725,
      "learning_rate": 6.355646249199055e-05,
      "loss": 0.57,
      "step": 5600
    },
    {
      "epoch": 1.1514030218933087,
      "grad_norm": 0.19345784187316895,
      "learning_rate": 6.354736268610148e-05,
      "loss": 0.568,
      "step": 5601
    },
    {
      "epoch": 1.1516085928666873,
      "grad_norm": 0.1907486766576767,
      "learning_rate": 6.353826196647056e-05,
      "loss": 0.5609,
      "step": 5602
    },
    {
      "epoch": 1.1518141638400659,
      "grad_norm": 0.19529633224010468,
      "learning_rate": 6.35291603335461e-05,
      "loss": 0.5531,
      "step": 5603
    },
    {
      "epoch": 1.1520197348134444,
      "grad_norm": 0.19347496330738068,
      "learning_rate": 6.352005778777652e-05,
      "loss": 0.5748,
      "step": 5604
    },
    {
      "epoch": 1.1522253057868228,
      "grad_norm": 0.1948879212141037,
      "learning_rate": 6.351095432961024e-05,
      "loss": 0.565,
      "step": 5605
    },
    {
      "epoch": 1.1524308767602014,
      "grad_norm": 0.19510291516780853,
      "learning_rate": 6.350184995949578e-05,
      "loss": 0.5492,
      "step": 5606
    },
    {
      "epoch": 1.15263644773358,
      "grad_norm": 0.198397696018219,
      "learning_rate": 6.349274467788165e-05,
      "loss": 0.5506,
      "step": 5607
    },
    {
      "epoch": 1.1528420187069586,
      "grad_norm": 0.1937544345855713,
      "learning_rate": 6.348363848521643e-05,
      "loss": 0.556,
      "step": 5608
    },
    {
      "epoch": 1.1530475896803372,
      "grad_norm": 0.1949324756860733,
      "learning_rate": 6.347453138194872e-05,
      "loss": 0.5608,
      "step": 5609
    },
    {
      "epoch": 1.1532531606537157,
      "grad_norm": 0.18160304427146912,
      "learning_rate": 6.34654233685272e-05,
      "loss": 0.5396,
      "step": 5610
    },
    {
      "epoch": 1.1534587316270943,
      "grad_norm": 0.1651293933391571,
      "learning_rate": 6.345631444540058e-05,
      "loss": 0.5618,
      "step": 5611
    },
    {
      "epoch": 1.1536643026004727,
      "grad_norm": 0.19430503249168396,
      "learning_rate": 6.344720461301761e-05,
      "loss": 0.5766,
      "step": 5612
    },
    {
      "epoch": 1.1538698735738513,
      "grad_norm": 0.19232423603534698,
      "learning_rate": 6.34380938718271e-05,
      "loss": 0.5543,
      "step": 5613
    },
    {
      "epoch": 1.1540754445472299,
      "grad_norm": 0.19700485467910767,
      "learning_rate": 6.342898222227788e-05,
      "loss": 0.6007,
      "step": 5614
    },
    {
      "epoch": 1.1542810155206085,
      "grad_norm": 0.18897385895252228,
      "learning_rate": 6.341986966481883e-05,
      "loss": 0.5658,
      "step": 5615
    },
    {
      "epoch": 1.154486586493987,
      "grad_norm": 0.18891417980194092,
      "learning_rate": 6.341075619989891e-05,
      "loss": 0.5725,
      "step": 5616
    },
    {
      "epoch": 1.1546921574673656,
      "grad_norm": 0.17012788355350494,
      "learning_rate": 6.340164182796707e-05,
      "loss": 0.5365,
      "step": 5617
    },
    {
      "epoch": 1.1548977284407442,
      "grad_norm": 0.16269567608833313,
      "learning_rate": 6.339252654947236e-05,
      "loss": 0.5708,
      "step": 5618
    },
    {
      "epoch": 1.1551032994141228,
      "grad_norm": 0.19354234635829926,
      "learning_rate": 6.338341036486385e-05,
      "loss": 0.5645,
      "step": 5619
    },
    {
      "epoch": 1.1553088703875014,
      "grad_norm": 0.19386227428913116,
      "learning_rate": 6.33742932745906e-05,
      "loss": 0.5772,
      "step": 5620
    },
    {
      "epoch": 1.1555144413608798,
      "grad_norm": 0.17871583998203278,
      "learning_rate": 6.336517527910182e-05,
      "loss": 0.5568,
      "step": 5621
    },
    {
      "epoch": 1.1557200123342584,
      "grad_norm": 0.18921589851379395,
      "learning_rate": 6.335605637884668e-05,
      "loss": 0.5555,
      "step": 5622
    },
    {
      "epoch": 1.155925583307637,
      "grad_norm": 0.19476552307605743,
      "learning_rate": 6.334693657427446e-05,
      "loss": 0.5581,
      "step": 5623
    },
    {
      "epoch": 1.1561311542810155,
      "grad_norm": 0.18380312621593475,
      "learning_rate": 6.333781586583441e-05,
      "loss": 0.5322,
      "step": 5624
    },
    {
      "epoch": 1.1563367252543941,
      "grad_norm": 0.18677309155464172,
      "learning_rate": 6.332869425397588e-05,
      "loss": 0.5712,
      "step": 5625
    },
    {
      "epoch": 1.1565422962277727,
      "grad_norm": 0.19158649444580078,
      "learning_rate": 6.331957173914826e-05,
      "loss": 0.5846,
      "step": 5626
    },
    {
      "epoch": 1.156747867201151,
      "grad_norm": 0.19586919248104095,
      "learning_rate": 6.331044832180098e-05,
      "loss": 0.5589,
      "step": 5627
    },
    {
      "epoch": 1.1569534381745297,
      "grad_norm": 0.15967777371406555,
      "learning_rate": 6.330132400238347e-05,
      "loss": 0.5268,
      "step": 5628
    },
    {
      "epoch": 1.1571590091479083,
      "grad_norm": 0.1551171988248825,
      "learning_rate": 6.329219878134528e-05,
      "loss": 0.5509,
      "step": 5629
    },
    {
      "epoch": 1.1573645801212868,
      "grad_norm": 0.18467473983764648,
      "learning_rate": 6.328307265913595e-05,
      "loss": 0.5574,
      "step": 5630
    },
    {
      "epoch": 1.1575701510946654,
      "grad_norm": 0.19859431684017181,
      "learning_rate": 6.327394563620509e-05,
      "loss": 0.5613,
      "step": 5631
    },
    {
      "epoch": 1.157775722068044,
      "grad_norm": 0.19411081075668335,
      "learning_rate": 6.326481771300234e-05,
      "loss": 0.5589,
      "step": 5632
    },
    {
      "epoch": 1.1579812930414226,
      "grad_norm": 0.18985684216022491,
      "learning_rate": 6.325568888997739e-05,
      "loss": 0.5673,
      "step": 5633
    },
    {
      "epoch": 1.1581868640148012,
      "grad_norm": 0.1923382729291916,
      "learning_rate": 6.324655916757997e-05,
      "loss": 0.558,
      "step": 5634
    },
    {
      "epoch": 1.1583924349881798,
      "grad_norm": 0.20484760403633118,
      "learning_rate": 6.323742854625986e-05,
      "loss": 0.5561,
      "step": 5635
    },
    {
      "epoch": 1.1585980059615582,
      "grad_norm": 0.15869790315628052,
      "learning_rate": 6.32282970264669e-05,
      "loss": 0.5412,
      "step": 5636
    },
    {
      "epoch": 1.1588035769349367,
      "grad_norm": 0.16667144000530243,
      "learning_rate": 6.321916460865092e-05,
      "loss": 0.5605,
      "step": 5637
    },
    {
      "epoch": 1.1590091479083153,
      "grad_norm": 0.1636246144771576,
      "learning_rate": 6.321003129326187e-05,
      "loss": 0.5297,
      "step": 5638
    },
    {
      "epoch": 1.159214718881694,
      "grad_norm": 0.1557888388633728,
      "learning_rate": 6.320089708074971e-05,
      "loss": 0.5433,
      "step": 5639
    },
    {
      "epoch": 1.1594202898550725,
      "grad_norm": 0.18941344320774078,
      "learning_rate": 6.31917619715644e-05,
      "loss": 0.552,
      "step": 5640
    },
    {
      "epoch": 1.159625860828451,
      "grad_norm": 0.18825402855873108,
      "learning_rate": 6.318262596615602e-05,
      "loss": 0.5447,
      "step": 5641
    },
    {
      "epoch": 1.1598314318018295,
      "grad_norm": 0.16153112053871155,
      "learning_rate": 6.317348906497463e-05,
      "loss": 0.5363,
      "step": 5642
    },
    {
      "epoch": 1.160037002775208,
      "grad_norm": 0.15847079455852509,
      "learning_rate": 6.31643512684704e-05,
      "loss": 0.5523,
      "step": 5643
    },
    {
      "epoch": 1.1602425737485866,
      "grad_norm": 0.19507279992103577,
      "learning_rate": 6.315521257709345e-05,
      "loss": 0.5575,
      "step": 5644
    },
    {
      "epoch": 1.1604481447219652,
      "grad_norm": 0.19227170944213867,
      "learning_rate": 6.314607299129406e-05,
      "loss": 0.5725,
      "step": 5645
    },
    {
      "epoch": 1.1606537156953438,
      "grad_norm": 0.18888604640960693,
      "learning_rate": 6.313693251152247e-05,
      "loss": 0.5532,
      "step": 5646
    },
    {
      "epoch": 1.1608592866687224,
      "grad_norm": 0.20485495030879974,
      "learning_rate": 6.312779113822896e-05,
      "loss": 0.5469,
      "step": 5647
    },
    {
      "epoch": 1.161064857642101,
      "grad_norm": 0.1900404691696167,
      "learning_rate": 6.311864887186393e-05,
      "loss": 0.5593,
      "step": 5648
    },
    {
      "epoch": 1.1612704286154796,
      "grad_norm": 0.1928151249885559,
      "learning_rate": 6.310950571287774e-05,
      "loss": 0.553,
      "step": 5649
    },
    {
      "epoch": 1.1614759995888582,
      "grad_norm": 0.2048550844192505,
      "learning_rate": 6.310036166172086e-05,
      "loss": 0.5602,
      "step": 5650
    },
    {
      "epoch": 1.1616815705622365,
      "grad_norm": 0.16492126882076263,
      "learning_rate": 6.309121671884375e-05,
      "loss": 0.5306,
      "step": 5651
    },
    {
      "epoch": 1.1618871415356151,
      "grad_norm": 0.1620352864265442,
      "learning_rate": 6.308207088469697e-05,
      "loss": 0.5384,
      "step": 5652
    },
    {
      "epoch": 1.1620927125089937,
      "grad_norm": 0.19016869366168976,
      "learning_rate": 6.307292415973108e-05,
      "loss": 0.5666,
      "step": 5653
    },
    {
      "epoch": 1.1622982834823723,
      "grad_norm": 0.18808448314666748,
      "learning_rate": 6.306377654439666e-05,
      "loss": 0.5522,
      "step": 5654
    },
    {
      "epoch": 1.1625038544557509,
      "grad_norm": 0.1816331297159195,
      "learning_rate": 6.305462803914441e-05,
      "loss": 0.543,
      "step": 5655
    },
    {
      "epoch": 1.1627094254291295,
      "grad_norm": 0.18618269264698029,
      "learning_rate": 6.304547864442503e-05,
      "loss": 0.5674,
      "step": 5656
    },
    {
      "epoch": 1.162914996402508,
      "grad_norm": 0.1989988088607788,
      "learning_rate": 6.303632836068925e-05,
      "loss": 0.5658,
      "step": 5657
    },
    {
      "epoch": 1.1631205673758864,
      "grad_norm": 0.1896226555109024,
      "learning_rate": 6.302717718838788e-05,
      "loss": 0.572,
      "step": 5658
    },
    {
      "epoch": 1.163326138349265,
      "grad_norm": 0.16433072090148926,
      "learning_rate": 6.301802512797176e-05,
      "loss": 0.542,
      "step": 5659
    },
    {
      "epoch": 1.1635317093226436,
      "grad_norm": 0.1611773520708084,
      "learning_rate": 6.300887217989174e-05,
      "loss": 0.5528,
      "step": 5660
    },
    {
      "epoch": 1.1637372802960222,
      "grad_norm": 0.1935834139585495,
      "learning_rate": 6.299971834459877e-05,
      "loss": 0.5699,
      "step": 5661
    },
    {
      "epoch": 1.1639428512694008,
      "grad_norm": 0.19088830053806305,
      "learning_rate": 6.29905636225438e-05,
      "loss": 0.5742,
      "step": 5662
    },
    {
      "epoch": 1.1641484222427794,
      "grad_norm": 0.1988966315984726,
      "learning_rate": 6.298140801417786e-05,
      "loss": 0.566,
      "step": 5663
    },
    {
      "epoch": 1.164353993216158,
      "grad_norm": 0.2001844048500061,
      "learning_rate": 6.297225151995198e-05,
      "loss": 0.5765,
      "step": 5664
    },
    {
      "epoch": 1.1645595641895365,
      "grad_norm": 0.16796830296516418,
      "learning_rate": 6.296309414031727e-05,
      "loss": 0.5534,
      "step": 5665
    },
    {
      "epoch": 1.164765135162915,
      "grad_norm": 0.15863637626171112,
      "learning_rate": 6.295393587572489e-05,
      "loss": 0.576,
      "step": 5666
    },
    {
      "epoch": 1.1649707061362935,
      "grad_norm": 0.19147972762584686,
      "learning_rate": 6.2944776726626e-05,
      "loss": 0.5694,
      "step": 5667
    },
    {
      "epoch": 1.165176277109672,
      "grad_norm": 0.18630050122737885,
      "learning_rate": 6.293561669347181e-05,
      "loss": 0.561,
      "step": 5668
    },
    {
      "epoch": 1.1653818480830507,
      "grad_norm": 0.1899455487728119,
      "learning_rate": 6.292645577671364e-05,
      "loss": 0.5807,
      "step": 5669
    },
    {
      "epoch": 1.1655874190564293,
      "grad_norm": 0.19663108885288239,
      "learning_rate": 6.291729397680277e-05,
      "loss": 0.5594,
      "step": 5670
    },
    {
      "epoch": 1.1657929900298079,
      "grad_norm": 0.18838699162006378,
      "learning_rate": 6.290813129419058e-05,
      "loss": 0.5572,
      "step": 5671
    },
    {
      "epoch": 1.1659985610031864,
      "grad_norm": 0.19074362516403198,
      "learning_rate": 6.289896772932845e-05,
      "loss": 0.5593,
      "step": 5672
    },
    {
      "epoch": 1.1662041319765648,
      "grad_norm": 0.1634715497493744,
      "learning_rate": 6.288980328266785e-05,
      "loss": 0.5333,
      "step": 5673
    },
    {
      "epoch": 1.1664097029499434,
      "grad_norm": 0.13483376801013947,
      "learning_rate": 6.288063795466027e-05,
      "loss": 0.5092,
      "step": 5674
    },
    {
      "epoch": 1.166615273923322,
      "grad_norm": 0.18257947266101837,
      "learning_rate": 6.28714717457572e-05,
      "loss": 0.5607,
      "step": 5675
    },
    {
      "epoch": 1.1668208448967006,
      "grad_norm": 0.19993911683559418,
      "learning_rate": 6.286230465641028e-05,
      "loss": 0.5628,
      "step": 5676
    },
    {
      "epoch": 1.1670264158700792,
      "grad_norm": 0.1948871612548828,
      "learning_rate": 6.28531366870711e-05,
      "loss": 0.5566,
      "step": 5677
    },
    {
      "epoch": 1.1672319868434577,
      "grad_norm": 0.1864452362060547,
      "learning_rate": 6.28439678381913e-05,
      "loss": 0.5496,
      "step": 5678
    },
    {
      "epoch": 1.1674375578168363,
      "grad_norm": 0.17033499479293823,
      "learning_rate": 6.28347981102226e-05,
      "loss": 0.5291,
      "step": 5679
    },
    {
      "epoch": 1.167643128790215,
      "grad_norm": 0.16329137980937958,
      "learning_rate": 6.282562750361679e-05,
      "loss": 0.5538,
      "step": 5680
    },
    {
      "epoch": 1.1678486997635933,
      "grad_norm": 0.20135296881198883,
      "learning_rate": 6.281645601882561e-05,
      "loss": 0.5409,
      "step": 5681
    },
    {
      "epoch": 1.1680542707369719,
      "grad_norm": 0.16525396704673767,
      "learning_rate": 6.28072836563009e-05,
      "loss": 0.5034,
      "step": 5682
    },
    {
      "epoch": 1.1682598417103505,
      "grad_norm": 0.16303305327892303,
      "learning_rate": 6.279811041649457e-05,
      "loss": 0.5464,
      "step": 5683
    },
    {
      "epoch": 1.168465412683729,
      "grad_norm": 0.20432288944721222,
      "learning_rate": 6.278893629985854e-05,
      "loss": 0.5617,
      "step": 5684
    },
    {
      "epoch": 1.1686709836571076,
      "grad_norm": 0.19627077877521515,
      "learning_rate": 6.277976130684476e-05,
      "loss": 0.5516,
      "step": 5685
    },
    {
      "epoch": 1.1688765546304862,
      "grad_norm": 0.19442994892597198,
      "learning_rate": 6.277058543790522e-05,
      "loss": 0.5859,
      "step": 5686
    },
    {
      "epoch": 1.1690821256038648,
      "grad_norm": 0.1668756902217865,
      "learning_rate": 6.276140869349202e-05,
      "loss": 0.5412,
      "step": 5687
    },
    {
      "epoch": 1.1692876965772432,
      "grad_norm": 0.16319718956947327,
      "learning_rate": 6.275223107405723e-05,
      "loss": 0.5365,
      "step": 5688
    },
    {
      "epoch": 1.1694932675506218,
      "grad_norm": 0.20029065012931824,
      "learning_rate": 6.274305258005296e-05,
      "loss": 0.5555,
      "step": 5689
    },
    {
      "epoch": 1.1696988385240004,
      "grad_norm": 0.16278813779354095,
      "learning_rate": 6.273387321193146e-05,
      "loss": 0.5314,
      "step": 5690
    },
    {
      "epoch": 1.169904409497379,
      "grad_norm": 0.16741250455379486,
      "learning_rate": 6.272469297014488e-05,
      "loss": 0.5435,
      "step": 5691
    },
    {
      "epoch": 1.1701099804707575,
      "grad_norm": 0.2003338634967804,
      "learning_rate": 6.271551185514553e-05,
      "loss": 0.5842,
      "step": 5692
    },
    {
      "epoch": 1.1703155514441361,
      "grad_norm": 0.17789803445339203,
      "learning_rate": 6.270632986738573e-05,
      "loss": 0.5276,
      "step": 5693
    },
    {
      "epoch": 1.1705211224175147,
      "grad_norm": 0.16743101179599762,
      "learning_rate": 6.269714700731782e-05,
      "loss": 0.5777,
      "step": 5694
    },
    {
      "epoch": 1.1707266933908933,
      "grad_norm": 0.19358138740062714,
      "learning_rate": 6.268796327539417e-05,
      "loss": 0.5585,
      "step": 5695
    },
    {
      "epoch": 1.1709322643642717,
      "grad_norm": 0.16014361381530762,
      "learning_rate": 6.267877867206724e-05,
      "loss": 0.506,
      "step": 5696
    },
    {
      "epoch": 1.1711378353376503,
      "grad_norm": 0.15720070898532867,
      "learning_rate": 6.266959319778953e-05,
      "loss": 0.5688,
      "step": 5697
    },
    {
      "epoch": 1.1713434063110288,
      "grad_norm": 0.1944281905889511,
      "learning_rate": 6.266040685301356e-05,
      "loss": 0.5611,
      "step": 5698
    },
    {
      "epoch": 1.1715489772844074,
      "grad_norm": 0.19197237491607666,
      "learning_rate": 6.265121963819189e-05,
      "loss": 0.5491,
      "step": 5699
    },
    {
      "epoch": 1.171754548257786,
      "grad_norm": 0.1880941390991211,
      "learning_rate": 6.26420315537771e-05,
      "loss": 0.5478,
      "step": 5700
    },
    {
      "epoch": 1.1719601192311646,
      "grad_norm": 0.18762564659118652,
      "learning_rate": 6.26328426002219e-05,
      "loss": 0.5592,
      "step": 5701
    },
    {
      "epoch": 1.1721656902045432,
      "grad_norm": 0.19078297913074493,
      "learning_rate": 6.262365277797894e-05,
      "loss": 0.5801,
      "step": 5702
    },
    {
      "epoch": 1.1723712611779216,
      "grad_norm": 0.15825822949409485,
      "learning_rate": 6.2614462087501e-05,
      "loss": 0.5238,
      "step": 5703
    },
    {
      "epoch": 1.1725768321513002,
      "grad_norm": 0.16313259303569794,
      "learning_rate": 6.260527052924083e-05,
      "loss": 0.5675,
      "step": 5704
    },
    {
      "epoch": 1.1727824031246787,
      "grad_norm": 0.20915348827838898,
      "learning_rate": 6.259607810365128e-05,
      "loss": 0.5871,
      "step": 5705
    },
    {
      "epoch": 1.1729879740980573,
      "grad_norm": 0.1840449571609497,
      "learning_rate": 6.258688481118519e-05,
      "loss": 0.5617,
      "step": 5706
    },
    {
      "epoch": 1.173193545071436,
      "grad_norm": 0.19125378131866455,
      "learning_rate": 6.257769065229551e-05,
      "loss": 0.5525,
      "step": 5707
    },
    {
      "epoch": 1.1733991160448145,
      "grad_norm": 0.16844969987869263,
      "learning_rate": 6.256849562743514e-05,
      "loss": 0.5422,
      "step": 5708
    },
    {
      "epoch": 1.173604687018193,
      "grad_norm": 0.17428073287010193,
      "learning_rate": 6.255929973705714e-05,
      "loss": 0.5564,
      "step": 5709
    },
    {
      "epoch": 1.1738102579915717,
      "grad_norm": 0.1962093710899353,
      "learning_rate": 6.255010298161448e-05,
      "loss": 0.5671,
      "step": 5710
    },
    {
      "epoch": 1.1740158289649503,
      "grad_norm": 0.19688303768634796,
      "learning_rate": 6.254090536156028e-05,
      "loss": 0.5736,
      "step": 5711
    },
    {
      "epoch": 1.1742213999383286,
      "grad_norm": 0.19924046099185944,
      "learning_rate": 6.253170687734769e-05,
      "loss": 0.5536,
      "step": 5712
    },
    {
      "epoch": 1.1744269709117072,
      "grad_norm": 0.21053309738636017,
      "learning_rate": 6.252250752942981e-05,
      "loss": 0.5725,
      "step": 5713
    },
    {
      "epoch": 1.1746325418850858,
      "grad_norm": 0.15548844635486603,
      "learning_rate": 6.251330731825989e-05,
      "loss": 0.5061,
      "step": 5714
    },
    {
      "epoch": 1.1748381128584644,
      "grad_norm": 0.16448529064655304,
      "learning_rate": 6.250410624429118e-05,
      "loss": 0.5618,
      "step": 5715
    },
    {
      "epoch": 1.175043683831843,
      "grad_norm": 0.19345583021640778,
      "learning_rate": 6.249490430797699e-05,
      "loss": 0.548,
      "step": 5716
    },
    {
      "epoch": 1.1752492548052216,
      "grad_norm": 0.19691455364227295,
      "learning_rate": 6.248570150977061e-05,
      "loss": 0.5466,
      "step": 5717
    },
    {
      "epoch": 1.1754548257786,
      "grad_norm": 0.19735218584537506,
      "learning_rate": 6.247649785012545e-05,
      "loss": 0.5595,
      "step": 5718
    },
    {
      "epoch": 1.1756603967519785,
      "grad_norm": 0.19617964327335358,
      "learning_rate": 6.246729332949493e-05,
      "loss": 0.5774,
      "step": 5719
    },
    {
      "epoch": 1.1758659677253571,
      "grad_norm": 0.19635650515556335,
      "learning_rate": 6.24580879483325e-05,
      "loss": 0.5542,
      "step": 5720
    },
    {
      "epoch": 1.1760715386987357,
      "grad_norm": 0.19671329855918884,
      "learning_rate": 6.244888170709169e-05,
      "loss": 0.5775,
      "step": 5721
    },
    {
      "epoch": 1.1762771096721143,
      "grad_norm": 0.20057837665081024,
      "learning_rate": 6.243967460622603e-05,
      "loss": 0.5706,
      "step": 5722
    },
    {
      "epoch": 1.1764826806454929,
      "grad_norm": 0.1965552419424057,
      "learning_rate": 6.243046664618911e-05,
      "loss": 0.5698,
      "step": 5723
    },
    {
      "epoch": 1.1766882516188715,
      "grad_norm": 0.19308249652385712,
      "learning_rate": 6.242125782743456e-05,
      "loss": 0.5642,
      "step": 5724
    },
    {
      "epoch": 1.17689382259225,
      "grad_norm": 0.19306235015392303,
      "learning_rate": 6.241204815041608e-05,
      "loss": 0.576,
      "step": 5725
    },
    {
      "epoch": 1.1770993935656286,
      "grad_norm": 0.18735530972480774,
      "learning_rate": 6.240283761558737e-05,
      "loss": 0.5678,
      "step": 5726
    },
    {
      "epoch": 1.177304964539007,
      "grad_norm": 0.1929217427968979,
      "learning_rate": 6.239362622340218e-05,
      "loss": 0.5542,
      "step": 5727
    },
    {
      "epoch": 1.1775105355123856,
      "grad_norm": 0.19190043210983276,
      "learning_rate": 6.238441397431433e-05,
      "loss": 0.5836,
      "step": 5728
    },
    {
      "epoch": 1.1777161064857642,
      "grad_norm": 0.1934564858675003,
      "learning_rate": 6.237520086877766e-05,
      "loss": 0.5532,
      "step": 5729
    },
    {
      "epoch": 1.1779216774591428,
      "grad_norm": 0.16846685111522675,
      "learning_rate": 6.236598690724606e-05,
      "loss": 0.5279,
      "step": 5730
    },
    {
      "epoch": 1.1781272484325214,
      "grad_norm": 0.1717388778924942,
      "learning_rate": 6.235677209017345e-05,
      "loss": 0.5595,
      "step": 5731
    },
    {
      "epoch": 1.1783328194059,
      "grad_norm": 0.18958315253257751,
      "learning_rate": 6.234755641801379e-05,
      "loss": 0.5657,
      "step": 5732
    },
    {
      "epoch": 1.1785383903792783,
      "grad_norm": 0.19686202704906464,
      "learning_rate": 6.233833989122112e-05,
      "loss": 0.5983,
      "step": 5733
    },
    {
      "epoch": 1.178743961352657,
      "grad_norm": 0.1927022784948349,
      "learning_rate": 6.232912251024948e-05,
      "loss": 0.5968,
      "step": 5734
    },
    {
      "epoch": 1.1789495323260355,
      "grad_norm": 0.19848833978176117,
      "learning_rate": 6.231990427555297e-05,
      "loss": 0.5491,
      "step": 5735
    },
    {
      "epoch": 1.179155103299414,
      "grad_norm": 0.189555823802948,
      "learning_rate": 6.231068518758572e-05,
      "loss": 0.5525,
      "step": 5736
    },
    {
      "epoch": 1.1793606742727927,
      "grad_norm": 0.19321559369564056,
      "learning_rate": 6.230146524680194e-05,
      "loss": 0.5792,
      "step": 5737
    },
    {
      "epoch": 1.1795662452461713,
      "grad_norm": 0.19412335753440857,
      "learning_rate": 6.229224445365582e-05,
      "loss": 0.5731,
      "step": 5738
    },
    {
      "epoch": 1.1797718162195499,
      "grad_norm": 0.20160719752311707,
      "learning_rate": 6.228302280860166e-05,
      "loss": 0.5931,
      "step": 5739
    },
    {
      "epoch": 1.1799773871929284,
      "grad_norm": 0.19900692999362946,
      "learning_rate": 6.227380031209373e-05,
      "loss": 0.5437,
      "step": 5740
    },
    {
      "epoch": 1.180182958166307,
      "grad_norm": 0.19047874212265015,
      "learning_rate": 6.226457696458639e-05,
      "loss": 0.5529,
      "step": 5741
    },
    {
      "epoch": 1.1803885291396854,
      "grad_norm": 0.19529984891414642,
      "learning_rate": 6.225535276653405e-05,
      "loss": 0.5672,
      "step": 5742
    },
    {
      "epoch": 1.180594100113064,
      "grad_norm": 0.19696053862571716,
      "learning_rate": 6.224612771839113e-05,
      "loss": 0.572,
      "step": 5743
    },
    {
      "epoch": 1.1807996710864426,
      "grad_norm": 0.19073131680488586,
      "learning_rate": 6.22369018206121e-05,
      "loss": 0.5524,
      "step": 5744
    },
    {
      "epoch": 1.1810052420598212,
      "grad_norm": 0.18917502462863922,
      "learning_rate": 6.222767507365148e-05,
      "loss": 0.5542,
      "step": 5745
    },
    {
      "epoch": 1.1812108130331997,
      "grad_norm": 0.19207759201526642,
      "learning_rate": 6.221844747796384e-05,
      "loss": 0.5594,
      "step": 5746
    },
    {
      "epoch": 1.1814163840065783,
      "grad_norm": 0.1916734278202057,
      "learning_rate": 6.220921903400376e-05,
      "loss": 0.554,
      "step": 5747
    },
    {
      "epoch": 1.181621954979957,
      "grad_norm": 0.1720525622367859,
      "learning_rate": 6.21999897422259e-05,
      "loss": 0.517,
      "step": 5748
    },
    {
      "epoch": 1.1818275259533353,
      "grad_norm": 0.1582804173231125,
      "learning_rate": 6.219075960308494e-05,
      "loss": 0.5714,
      "step": 5749
    },
    {
      "epoch": 1.1820330969267139,
      "grad_norm": 0.20018833875656128,
      "learning_rate": 6.218152861703561e-05,
      "loss": 0.5783,
      "step": 5750
    },
    {
      "epoch": 1.1822386679000925,
      "grad_norm": 0.16681919991970062,
      "learning_rate": 6.217229678453265e-05,
      "loss": 0.5182,
      "step": 5751
    },
    {
      "epoch": 1.182444238873471,
      "grad_norm": 0.1674472838640213,
      "learning_rate": 6.21630641060309e-05,
      "loss": 0.5756,
      "step": 5752
    },
    {
      "epoch": 1.1826498098468496,
      "grad_norm": 0.19080859422683716,
      "learning_rate": 6.215383058198521e-05,
      "loss": 0.5616,
      "step": 5753
    },
    {
      "epoch": 1.1828553808202282,
      "grad_norm": 0.18792377412319183,
      "learning_rate": 6.214459621285047e-05,
      "loss": 0.5482,
      "step": 5754
    },
    {
      "epoch": 1.1830609517936068,
      "grad_norm": 0.1907912641763687,
      "learning_rate": 6.21353609990816e-05,
      "loss": 0.5613,
      "step": 5755
    },
    {
      "epoch": 1.1832665227669854,
      "grad_norm": 0.1828346997499466,
      "learning_rate": 6.212612494113358e-05,
      "loss": 0.5496,
      "step": 5756
    },
    {
      "epoch": 1.1834720937403638,
      "grad_norm": 0.19093002378940582,
      "learning_rate": 6.211688803946142e-05,
      "loss": 0.5769,
      "step": 5757
    },
    {
      "epoch": 1.1836776647137424,
      "grad_norm": 0.1904676854610443,
      "learning_rate": 6.21076502945202e-05,
      "loss": 0.5385,
      "step": 5758
    },
    {
      "epoch": 1.183883235687121,
      "grad_norm": 0.1881975680589676,
      "learning_rate": 6.209841170676502e-05,
      "loss": 0.5633,
      "step": 5759
    },
    {
      "epoch": 1.1840888066604995,
      "grad_norm": 0.20327463746070862,
      "learning_rate": 6.208917227665102e-05,
      "loss": 0.5714,
      "step": 5760
    },
    {
      "epoch": 1.1842943776338781,
      "grad_norm": 0.18997357785701752,
      "learning_rate": 6.207993200463335e-05,
      "loss": 0.551,
      "step": 5761
    },
    {
      "epoch": 1.1844999486072567,
      "grad_norm": 0.1653435230255127,
      "learning_rate": 6.207069089116728e-05,
      "loss": 0.5465,
      "step": 5762
    },
    {
      "epoch": 1.1847055195806353,
      "grad_norm": 0.1645163893699646,
      "learning_rate": 6.206144893670805e-05,
      "loss": 0.5411,
      "step": 5763
    },
    {
      "epoch": 1.1849110905540137,
      "grad_norm": 0.18971189856529236,
      "learning_rate": 6.205220614171098e-05,
      "loss": 0.5724,
      "step": 5764
    },
    {
      "epoch": 1.1851166615273923,
      "grad_norm": 0.19266551733016968,
      "learning_rate": 6.204296250663142e-05,
      "loss": 0.544,
      "step": 5765
    },
    {
      "epoch": 1.1853222325007708,
      "grad_norm": 0.1676861196756363,
      "learning_rate": 6.203371803192475e-05,
      "loss": 0.5232,
      "step": 5766
    },
    {
      "epoch": 1.1855278034741494,
      "grad_norm": 0.16158527135849,
      "learning_rate": 6.20244727180464e-05,
      "loss": 0.5324,
      "step": 5767
    },
    {
      "epoch": 1.185733374447528,
      "grad_norm": 0.16184964776039124,
      "learning_rate": 6.201522656545186e-05,
      "loss": 0.5454,
      "step": 5768
    },
    {
      "epoch": 1.1859389454209066,
      "grad_norm": 0.16072934865951538,
      "learning_rate": 6.200597957459664e-05,
      "loss": 0.5676,
      "step": 5769
    },
    {
      "epoch": 1.1861445163942852,
      "grad_norm": 0.19808636605739594,
      "learning_rate": 6.199673174593629e-05,
      "loss": 0.5426,
      "step": 5770
    },
    {
      "epoch": 1.1863500873676638,
      "grad_norm": 0.19355566799640656,
      "learning_rate": 6.19874830799264e-05,
      "loss": 0.5601,
      "step": 5771
    },
    {
      "epoch": 1.1865556583410422,
      "grad_norm": 0.1977650374174118,
      "learning_rate": 6.197823357702263e-05,
      "loss": 0.5749,
      "step": 5772
    },
    {
      "epoch": 1.1867612293144207,
      "grad_norm": 0.17442461848258972,
      "learning_rate": 6.196898323768065e-05,
      "loss": 0.5253,
      "step": 5773
    },
    {
      "epoch": 1.1869668002877993,
      "grad_norm": 0.15890754759311676,
      "learning_rate": 6.195973206235616e-05,
      "loss": 0.5509,
      "step": 5774
    },
    {
      "epoch": 1.187172371261178,
      "grad_norm": 0.18826748430728912,
      "learning_rate": 6.195048005150496e-05,
      "loss": 0.54,
      "step": 5775
    },
    {
      "epoch": 1.1873779422345565,
      "grad_norm": 0.18961307406425476,
      "learning_rate": 6.194122720558282e-05,
      "loss": 0.5505,
      "step": 5776
    },
    {
      "epoch": 1.187583513207935,
      "grad_norm": 0.19002290070056915,
      "learning_rate": 6.193197352504561e-05,
      "loss": 0.5637,
      "step": 5777
    },
    {
      "epoch": 1.1877890841813137,
      "grad_norm": 0.1975557655096054,
      "learning_rate": 6.19227190103492e-05,
      "loss": 0.5667,
      "step": 5778
    },
    {
      "epoch": 1.187994655154692,
      "grad_norm": 0.20086504518985748,
      "learning_rate": 6.191346366194952e-05,
      "loss": 0.5792,
      "step": 5779
    },
    {
      "epoch": 1.1882002261280706,
      "grad_norm": 0.19469043612480164,
      "learning_rate": 6.190420748030253e-05,
      "loss": 0.562,
      "step": 5780
    },
    {
      "epoch": 1.1884057971014492,
      "grad_norm": 0.19469872117042542,
      "learning_rate": 6.189495046586427e-05,
      "loss": 0.5725,
      "step": 5781
    },
    {
      "epoch": 1.1886113680748278,
      "grad_norm": 0.1903071254491806,
      "learning_rate": 6.188569261909076e-05,
      "loss": 0.5604,
      "step": 5782
    },
    {
      "epoch": 1.1888169390482064,
      "grad_norm": 0.18922393023967743,
      "learning_rate": 6.187643394043808e-05,
      "loss": 0.5336,
      "step": 5783
    },
    {
      "epoch": 1.189022510021585,
      "grad_norm": 0.19879461824893951,
      "learning_rate": 6.186717443036239e-05,
      "loss": 0.5699,
      "step": 5784
    },
    {
      "epoch": 1.1892280809949636,
      "grad_norm": 0.19611231982707977,
      "learning_rate": 6.185791408931986e-05,
      "loss": 0.533,
      "step": 5785
    },
    {
      "epoch": 1.1894336519683422,
      "grad_norm": 0.17245331406593323,
      "learning_rate": 6.18486529177667e-05,
      "loss": 0.5268,
      "step": 5786
    },
    {
      "epoch": 1.1896392229417208,
      "grad_norm": 0.15049666166305542,
      "learning_rate": 6.183939091615915e-05,
      "loss": 0.5324,
      "step": 5787
    },
    {
      "epoch": 1.1898447939150991,
      "grad_norm": 0.1296570748090744,
      "learning_rate": 6.183012808495353e-05,
      "loss": 0.5245,
      "step": 5788
    },
    {
      "epoch": 1.1900503648884777,
      "grad_norm": 0.1654006838798523,
      "learning_rate": 6.182086442460614e-05,
      "loss": 0.5405,
      "step": 5789
    },
    {
      "epoch": 1.1902559358618563,
      "grad_norm": 0.20028263330459595,
      "learning_rate": 6.181159993557338e-05,
      "loss": 0.5792,
      "step": 5790
    },
    {
      "epoch": 1.1904615068352349,
      "grad_norm": 0.19533969461917877,
      "learning_rate": 6.18023346183117e-05,
      "loss": 0.5698,
      "step": 5791
    },
    {
      "epoch": 1.1906670778086135,
      "grad_norm": 0.16536763310432434,
      "learning_rate": 6.17930684732775e-05,
      "loss": 0.5253,
      "step": 5792
    },
    {
      "epoch": 1.190872648781992,
      "grad_norm": 0.16189715266227722,
      "learning_rate": 6.178380150092732e-05,
      "loss": 0.5759,
      "step": 5793
    },
    {
      "epoch": 1.1910782197553704,
      "grad_norm": 0.1967983990907669,
      "learning_rate": 6.177453370171768e-05,
      "loss": 0.5721,
      "step": 5794
    },
    {
      "epoch": 1.191283790728749,
      "grad_norm": 0.1946103274822235,
      "learning_rate": 6.176526507610518e-05,
      "loss": 0.5587,
      "step": 5795
    },
    {
      "epoch": 1.1914893617021276,
      "grad_norm": 0.20200130343437195,
      "learning_rate": 6.175599562454641e-05,
      "loss": 0.571,
      "step": 5796
    },
    {
      "epoch": 1.1916949326755062,
      "grad_norm": 0.19911526143550873,
      "learning_rate": 6.174672534749808e-05,
      "loss": 0.5615,
      "step": 5797
    },
    {
      "epoch": 1.1919005036488848,
      "grad_norm": 0.19905459880828857,
      "learning_rate": 6.173745424541684e-05,
      "loss": 0.5793,
      "step": 5798
    },
    {
      "epoch": 1.1921060746222634,
      "grad_norm": 0.1912047415971756,
      "learning_rate": 6.172818231875947e-05,
      "loss": 0.5543,
      "step": 5799
    },
    {
      "epoch": 1.192311645595642,
      "grad_norm": 0.16958840191364288,
      "learning_rate": 6.171890956798275e-05,
      "loss": 0.5339,
      "step": 5800
    },
    {
      "epoch": 1.1925172165690205,
      "grad_norm": 0.1356760561466217,
      "learning_rate": 6.170963599354349e-05,
      "loss": 0.5175,
      "step": 5801
    },
    {
      "epoch": 1.1927227875423991,
      "grad_norm": 0.1700810045003891,
      "learning_rate": 6.170036159589856e-05,
      "loss": 0.554,
      "step": 5802
    },
    {
      "epoch": 1.1929283585157775,
      "grad_norm": 0.17295996844768524,
      "learning_rate": 6.169108637550488e-05,
      "loss": 0.5169,
      "step": 5803
    },
    {
      "epoch": 1.193133929489156,
      "grad_norm": 0.1662554293870926,
      "learning_rate": 6.16818103328194e-05,
      "loss": 0.5882,
      "step": 5804
    },
    {
      "epoch": 1.1933395004625347,
      "grad_norm": 0.1974506676197052,
      "learning_rate": 6.167253346829909e-05,
      "loss": 0.5556,
      "step": 5805
    },
    {
      "epoch": 1.1935450714359133,
      "grad_norm": 0.19866618514060974,
      "learning_rate": 6.166325578240098e-05,
      "loss": 0.5748,
      "step": 5806
    },
    {
      "epoch": 1.1937506424092919,
      "grad_norm": 0.19283287227153778,
      "learning_rate": 6.165397727558214e-05,
      "loss": 0.5611,
      "step": 5807
    },
    {
      "epoch": 1.1939562133826704,
      "grad_norm": 0.19626696407794952,
      "learning_rate": 6.164469794829967e-05,
      "loss": 0.5579,
      "step": 5808
    },
    {
      "epoch": 1.1941617843560488,
      "grad_norm": 0.19367843866348267,
      "learning_rate": 6.163541780101075e-05,
      "loss": 0.5642,
      "step": 5809
    },
    {
      "epoch": 1.1943673553294274,
      "grad_norm": 0.19207385182380676,
      "learning_rate": 6.162613683417253e-05,
      "loss": 0.5586,
      "step": 5810
    },
    {
      "epoch": 1.194572926302806,
      "grad_norm": 0.19212685525417328,
      "learning_rate": 6.161685504824227e-05,
      "loss": 0.5427,
      "step": 5811
    },
    {
      "epoch": 1.1947784972761846,
      "grad_norm": 0.1972237080335617,
      "learning_rate": 6.160757244367723e-05,
      "loss": 0.5595,
      "step": 5812
    },
    {
      "epoch": 1.1949840682495632,
      "grad_norm": 0.2040352076292038,
      "learning_rate": 6.159828902093471e-05,
      "loss": 0.5384,
      "step": 5813
    },
    {
      "epoch": 1.1951896392229417,
      "grad_norm": 0.1992282271385193,
      "learning_rate": 6.158900478047206e-05,
      "loss": 0.5757,
      "step": 5814
    },
    {
      "epoch": 1.1953952101963203,
      "grad_norm": 0.18852105736732483,
      "learning_rate": 6.15797197227467e-05,
      "loss": 0.5714,
      "step": 5815
    },
    {
      "epoch": 1.195600781169699,
      "grad_norm": 0.18910926580429077,
      "learning_rate": 6.157043384821604e-05,
      "loss": 0.5506,
      "step": 5816
    },
    {
      "epoch": 1.1958063521430775,
      "grad_norm": 0.19245147705078125,
      "learning_rate": 6.156114715733756e-05,
      "loss": 0.5513,
      "step": 5817
    },
    {
      "epoch": 1.1960119231164559,
      "grad_norm": 0.19064119458198547,
      "learning_rate": 6.155185965056875e-05,
      "loss": 0.5643,
      "step": 5818
    },
    {
      "epoch": 1.1962174940898345,
      "grad_norm": 0.2007809430360794,
      "learning_rate": 6.15425713283672e-05,
      "loss": 0.5773,
      "step": 5819
    },
    {
      "epoch": 1.196423065063213,
      "grad_norm": 0.1933142989873886,
      "learning_rate": 6.153328219119048e-05,
      "loss": 0.5504,
      "step": 5820
    },
    {
      "epoch": 1.1966286360365916,
      "grad_norm": 0.16889862716197968,
      "learning_rate": 6.152399223949619e-05,
      "loss": 0.5338,
      "step": 5821
    },
    {
      "epoch": 1.1968342070099702,
      "grad_norm": 0.16849687695503235,
      "learning_rate": 6.151470147374206e-05,
      "loss": 0.5679,
      "step": 5822
    },
    {
      "epoch": 1.1970397779833488,
      "grad_norm": 0.19202522933483124,
      "learning_rate": 6.150540989438577e-05,
      "loss": 0.5656,
      "step": 5823
    },
    {
      "epoch": 1.1972453489567274,
      "grad_norm": 0.19393931329250336,
      "learning_rate": 6.149611750188508e-05,
      "loss": 0.5745,
      "step": 5824
    },
    {
      "epoch": 1.1974509199301058,
      "grad_norm": 0.15858381986618042,
      "learning_rate": 6.14868242966978e-05,
      "loss": 0.5202,
      "step": 5825
    },
    {
      "epoch": 1.1976564909034844,
      "grad_norm": 0.15841448307037354,
      "learning_rate": 6.147753027928173e-05,
      "loss": 0.5518,
      "step": 5826
    },
    {
      "epoch": 1.197862061876863,
      "grad_norm": 0.18990083038806915,
      "learning_rate": 6.146823545009475e-05,
      "loss": 0.5576,
      "step": 5827
    },
    {
      "epoch": 1.1980676328502415,
      "grad_norm": 0.1819765716791153,
      "learning_rate": 6.14589398095948e-05,
      "loss": 0.5608,
      "step": 5828
    },
    {
      "epoch": 1.1982732038236201,
      "grad_norm": 0.1861831545829773,
      "learning_rate": 6.144964335823981e-05,
      "loss": 0.5659,
      "step": 5829
    },
    {
      "epoch": 1.1984787747969987,
      "grad_norm": 0.18785440921783447,
      "learning_rate": 6.14403460964878e-05,
      "loss": 0.5752,
      "step": 5830
    },
    {
      "epoch": 1.1986843457703773,
      "grad_norm": 0.1981627196073532,
      "learning_rate": 6.143104802479673e-05,
      "loss": 0.578,
      "step": 5831
    },
    {
      "epoch": 1.198889916743756,
      "grad_norm": 0.19505171477794647,
      "learning_rate": 6.142174914362476e-05,
      "loss": 0.542,
      "step": 5832
    },
    {
      "epoch": 1.1990954877171343,
      "grad_norm": 0.1755106896162033,
      "learning_rate": 6.141244945342995e-05,
      "loss": 0.53,
      "step": 5833
    },
    {
      "epoch": 1.1993010586905128,
      "grad_norm": 0.1715668886899948,
      "learning_rate": 6.140314895467045e-05,
      "loss": 0.5479,
      "step": 5834
    },
    {
      "epoch": 1.1995066296638914,
      "grad_norm": 0.19255517423152924,
      "learning_rate": 6.13938476478045e-05,
      "loss": 0.5572,
      "step": 5835
    },
    {
      "epoch": 1.19971220063727,
      "grad_norm": 0.1867235153913498,
      "learning_rate": 6.13845455332903e-05,
      "loss": 0.5865,
      "step": 5836
    },
    {
      "epoch": 1.1999177716106486,
      "grad_norm": 0.18764084577560425,
      "learning_rate": 6.137524261158612e-05,
      "loss": 0.5437,
      "step": 5837
    },
    {
      "epoch": 1.2001233425840272,
      "grad_norm": 0.20819789171218872,
      "learning_rate": 6.136593888315025e-05,
      "loss": 0.5891,
      "step": 5838
    },
    {
      "epoch": 1.2003289135574058,
      "grad_norm": 0.1949729472398758,
      "learning_rate": 6.13566343484411e-05,
      "loss": 0.5662,
      "step": 5839
    },
    {
      "epoch": 1.2005344845307842,
      "grad_norm": 0.18804004788398743,
      "learning_rate": 6.1347329007917e-05,
      "loss": 0.5601,
      "step": 5840
    },
    {
      "epoch": 1.2007400555041627,
      "grad_norm": 0.18714557588100433,
      "learning_rate": 6.133802286203642e-05,
      "loss": 0.5637,
      "step": 5841
    },
    {
      "epoch": 1.2009456264775413,
      "grad_norm": 0.19639329612255096,
      "learning_rate": 6.132871591125781e-05,
      "loss": 0.5698,
      "step": 5842
    },
    {
      "epoch": 1.20115119745092,
      "grad_norm": 0.20430424809455872,
      "learning_rate": 6.131940815603969e-05,
      "loss": 0.5739,
      "step": 5843
    },
    {
      "epoch": 1.2013567684242985,
      "grad_norm": 0.19093136489391327,
      "learning_rate": 6.13100995968406e-05,
      "loss": 0.5455,
      "step": 5844
    },
    {
      "epoch": 1.201562339397677,
      "grad_norm": 0.1929858773946762,
      "learning_rate": 6.130079023411915e-05,
      "loss": 0.5741,
      "step": 5845
    },
    {
      "epoch": 1.2017679103710557,
      "grad_norm": 0.19032742083072662,
      "learning_rate": 6.129148006833394e-05,
      "loss": 0.5586,
      "step": 5846
    },
    {
      "epoch": 1.2019734813444343,
      "grad_norm": 0.19212977588176727,
      "learning_rate": 6.128216909994367e-05,
      "loss": 0.5655,
      "step": 5847
    },
    {
      "epoch": 1.2021790523178126,
      "grad_norm": 0.19061528146266937,
      "learning_rate": 6.127285732940702e-05,
      "loss": 0.5499,
      "step": 5848
    },
    {
      "epoch": 1.2023846232911912,
      "grad_norm": 0.19122721254825592,
      "learning_rate": 6.126354475718275e-05,
      "loss": 0.5456,
      "step": 5849
    },
    {
      "epoch": 1.2025901942645698,
      "grad_norm": 0.17146308720111847,
      "learning_rate": 6.125423138372965e-05,
      "loss": 0.5346,
      "step": 5850
    },
    {
      "epoch": 1.2027957652379484,
      "grad_norm": 0.1573454737663269,
      "learning_rate": 6.124491720950655e-05,
      "loss": 0.5312,
      "step": 5851
    },
    {
      "epoch": 1.203001336211327,
      "grad_norm": 0.16374094784259796,
      "learning_rate": 6.123560223497228e-05,
      "loss": 0.5587,
      "step": 5852
    },
    {
      "epoch": 1.2032069071847056,
      "grad_norm": 0.18009409308433533,
      "learning_rate": 6.12262864605858e-05,
      "loss": 0.5452,
      "step": 5853
    },
    {
      "epoch": 1.2034124781580842,
      "grad_norm": 0.17497576773166656,
      "learning_rate": 6.1216969886806e-05,
      "loss": 0.5535,
      "step": 5854
    },
    {
      "epoch": 1.2036180491314625,
      "grad_norm": 0.2043164074420929,
      "learning_rate": 6.120765251409191e-05,
      "loss": 0.591,
      "step": 5855
    },
    {
      "epoch": 1.2038236201048411,
      "grad_norm": 0.1914680004119873,
      "learning_rate": 6.119833434290255e-05,
      "loss": 0.5526,
      "step": 5856
    },
    {
      "epoch": 1.2040291910782197,
      "grad_norm": 0.1849730759859085,
      "learning_rate": 6.118901537369694e-05,
      "loss": 0.5739,
      "step": 5857
    },
    {
      "epoch": 1.2042347620515983,
      "grad_norm": 0.1906820684671402,
      "learning_rate": 6.117969560693423e-05,
      "loss": 0.5544,
      "step": 5858
    },
    {
      "epoch": 1.2044403330249769,
      "grad_norm": 0.19102442264556885,
      "learning_rate": 6.117037504307351e-05,
      "loss": 0.5478,
      "step": 5859
    },
    {
      "epoch": 1.2046459039983555,
      "grad_norm": 0.1686401218175888,
      "learning_rate": 6.116105368257403e-05,
      "loss": 0.5448,
      "step": 5860
    },
    {
      "epoch": 1.204851474971734,
      "grad_norm": 0.13795730471611023,
      "learning_rate": 6.115173152589495e-05,
      "loss": 0.5262,
      "step": 5861
    },
    {
      "epoch": 1.2050570459451126,
      "grad_norm": 0.164164200425148,
      "learning_rate": 6.114240857349556e-05,
      "loss": 0.5684,
      "step": 5862
    },
    {
      "epoch": 1.205262616918491,
      "grad_norm": 0.19996531307697296,
      "learning_rate": 6.113308482583514e-05,
      "loss": 0.5608,
      "step": 5863
    },
    {
      "epoch": 1.2054681878918696,
      "grad_norm": 0.19715693593025208,
      "learning_rate": 6.112376028337305e-05,
      "loss": 0.566,
      "step": 5864
    },
    {
      "epoch": 1.2056737588652482,
      "grad_norm": 0.1752108633518219,
      "learning_rate": 6.111443494656864e-05,
      "loss": 0.5366,
      "step": 5865
    },
    {
      "epoch": 1.2058793298386268,
      "grad_norm": 0.16722378134727478,
      "learning_rate": 6.110510881588135e-05,
      "loss": 0.5602,
      "step": 5866
    },
    {
      "epoch": 1.2060849008120054,
      "grad_norm": 0.18732362985610962,
      "learning_rate": 6.10957818917706e-05,
      "loss": 0.5498,
      "step": 5867
    },
    {
      "epoch": 1.206290471785384,
      "grad_norm": 0.1660609394311905,
      "learning_rate": 6.108645417469593e-05,
      "loss": 0.5257,
      "step": 5868
    },
    {
      "epoch": 1.2064960427587625,
      "grad_norm": 0.1357351690530777,
      "learning_rate": 6.107712566511685e-05,
      "loss": 0.5126,
      "step": 5869
    },
    {
      "epoch": 1.206701613732141,
      "grad_norm": 0.1652655005455017,
      "learning_rate": 6.106779636349292e-05,
      "loss": 0.5602,
      "step": 5870
    },
    {
      "epoch": 1.2069071847055195,
      "grad_norm": 0.20981089770793915,
      "learning_rate": 6.105846627028379e-05,
      "loss": 0.5616,
      "step": 5871
    },
    {
      "epoch": 1.207112755678898,
      "grad_norm": 0.19564464688301086,
      "learning_rate": 6.104913538594905e-05,
      "loss": 0.5609,
      "step": 5872
    },
    {
      "epoch": 1.2073183266522767,
      "grad_norm": 0.19752687215805054,
      "learning_rate": 6.103980371094844e-05,
      "loss": 0.5766,
      "step": 5873
    },
    {
      "epoch": 1.2075238976256553,
      "grad_norm": 0.20465241372585297,
      "learning_rate": 6.103047124574167e-05,
      "loss": 0.5877,
      "step": 5874
    },
    {
      "epoch": 1.2077294685990339,
      "grad_norm": 0.19926784932613373,
      "learning_rate": 6.102113799078851e-05,
      "loss": 0.5558,
      "step": 5875
    },
    {
      "epoch": 1.2079350395724124,
      "grad_norm": 0.1923745572566986,
      "learning_rate": 6.1011803946548774e-05,
      "loss": 0.5595,
      "step": 5876
    },
    {
      "epoch": 1.208140610545791,
      "grad_norm": 0.16840709745883942,
      "learning_rate": 6.100246911348227e-05,
      "loss": 0.5261,
      "step": 5877
    },
    {
      "epoch": 1.2083461815191696,
      "grad_norm": 0.16660816967487335,
      "learning_rate": 6.099313349204893e-05,
      "loss": 0.5633,
      "step": 5878
    },
    {
      "epoch": 1.208551752492548,
      "grad_norm": 0.1967456340789795,
      "learning_rate": 6.098379708270863e-05,
      "loss": 0.5616,
      "step": 5879
    },
    {
      "epoch": 1.2087573234659266,
      "grad_norm": 0.19242748618125916,
      "learning_rate": 6.097445988592138e-05,
      "loss": 0.5474,
      "step": 5880
    },
    {
      "epoch": 1.2089628944393052,
      "grad_norm": 0.2012694627046585,
      "learning_rate": 6.096512190214715e-05,
      "loss": 0.5508,
      "step": 5881
    },
    {
      "epoch": 1.2091684654126837,
      "grad_norm": 0.1632763296365738,
      "learning_rate": 6.0955783131845994e-05,
      "loss": 0.5535,
      "step": 5882
    },
    {
      "epoch": 1.2093740363860623,
      "grad_norm": 0.16215071082115173,
      "learning_rate": 6.094644357547796e-05,
      "loss": 0.5579,
      "step": 5883
    },
    {
      "epoch": 1.209579607359441,
      "grad_norm": 0.19483166933059692,
      "learning_rate": 6.09371032335032e-05,
      "loss": 0.5576,
      "step": 5884
    },
    {
      "epoch": 1.2097851783328193,
      "grad_norm": 0.18877603113651276,
      "learning_rate": 6.092776210638185e-05,
      "loss": 0.5426,
      "step": 5885
    },
    {
      "epoch": 1.2099907493061979,
      "grad_norm": 0.1930856853723526,
      "learning_rate": 6.0918420194574104e-05,
      "loss": 0.5597,
      "step": 5886
    },
    {
      "epoch": 1.2101963202795765,
      "grad_norm": 0.1913139820098877,
      "learning_rate": 6.0909077498540194e-05,
      "loss": 0.5747,
      "step": 5887
    },
    {
      "epoch": 1.210401891252955,
      "grad_norm": 0.16376695036888123,
      "learning_rate": 6.0899734018740396e-05,
      "loss": 0.502,
      "step": 5888
    },
    {
      "epoch": 1.2106074622263336,
      "grad_norm": 0.15658964216709137,
      "learning_rate": 6.0890389755635035e-05,
      "loss": 0.5453,
      "step": 5889
    },
    {
      "epoch": 1.2108130331997122,
      "grad_norm": 0.1946595311164856,
      "learning_rate": 6.088104470968441e-05,
      "loss": 0.5533,
      "step": 5890
    },
    {
      "epoch": 1.2110186041730908,
      "grad_norm": 0.19284933805465698,
      "learning_rate": 6.0871698881348966e-05,
      "loss": 0.5385,
      "step": 5891
    },
    {
      "epoch": 1.2112241751464694,
      "grad_norm": 0.19203589856624603,
      "learning_rate": 6.0862352271089104e-05,
      "loss": 0.5533,
      "step": 5892
    },
    {
      "epoch": 1.211429746119848,
      "grad_norm": 0.19579070806503296,
      "learning_rate": 6.0853004879365265e-05,
      "loss": 0.5648,
      "step": 5893
    },
    {
      "epoch": 1.2116353170932264,
      "grad_norm": 0.19746367633342743,
      "learning_rate": 6.084365670663799e-05,
      "loss": 0.5473,
      "step": 5894
    },
    {
      "epoch": 1.211840888066605,
      "grad_norm": 0.199397012591362,
      "learning_rate": 6.08343077533678e-05,
      "loss": 0.5522,
      "step": 5895
    },
    {
      "epoch": 1.2120464590399835,
      "grad_norm": 0.16631294786930084,
      "learning_rate": 6.082495802001527e-05,
      "loss": 0.5414,
      "step": 5896
    },
    {
      "epoch": 1.2122520300133621,
      "grad_norm": 0.15855452418327332,
      "learning_rate": 6.0815607507041024e-05,
      "loss": 0.5403,
      "step": 5897
    },
    {
      "epoch": 1.2124576009867407,
      "grad_norm": 0.196935772895813,
      "learning_rate": 6.08062562149057e-05,
      "loss": 0.5665,
      "step": 5898
    },
    {
      "epoch": 1.2126631719601193,
      "grad_norm": 0.19539684057235718,
      "learning_rate": 6.079690414407004e-05,
      "loss": 0.5524,
      "step": 5899
    },
    {
      "epoch": 1.2128687429334977,
      "grad_norm": 0.19079557061195374,
      "learning_rate": 6.078755129499475e-05,
      "loss": 0.5628,
      "step": 5900
    },
    {
      "epoch": 1.2130743139068763,
      "grad_norm": 0.19366958737373352,
      "learning_rate": 6.077819766814058e-05,
      "loss": 0.5889,
      "step": 5901
    },
    {
      "epoch": 1.2132798848802548,
      "grad_norm": 0.19458188116550446,
      "learning_rate": 6.076884326396837e-05,
      "loss": 0.571,
      "step": 5902
    },
    {
      "epoch": 1.2134854558536334,
      "grad_norm": 0.16850589215755463,
      "learning_rate": 6.075948808293894e-05,
      "loss": 0.5335,
      "step": 5903
    },
    {
      "epoch": 1.213691026827012,
      "grad_norm": 0.16787506639957428,
      "learning_rate": 6.075013212551321e-05,
      "loss": 0.5353,
      "step": 5904
    },
    {
      "epoch": 1.2138965978003906,
      "grad_norm": 0.1945338398218155,
      "learning_rate": 6.074077539215208e-05,
      "loss": 0.5491,
      "step": 5905
    },
    {
      "epoch": 1.2141021687737692,
      "grad_norm": 0.19000251591205597,
      "learning_rate": 6.0731417883316524e-05,
      "loss": 0.5523,
      "step": 5906
    },
    {
      "epoch": 1.2143077397471478,
      "grad_norm": 0.18971100449562073,
      "learning_rate": 6.0722059599467525e-05,
      "loss": 0.5531,
      "step": 5907
    },
    {
      "epoch": 1.2145133107205264,
      "grad_norm": 0.16435407102108002,
      "learning_rate": 6.071270054106613e-05,
      "loss": 0.5286,
      "step": 5908
    },
    {
      "epoch": 1.2147188816939047,
      "grad_norm": 0.17342285811901093,
      "learning_rate": 6.070334070857343e-05,
      "loss": 0.5616,
      "step": 5909
    },
    {
      "epoch": 1.2149244526672833,
      "grad_norm": 0.19488383829593658,
      "learning_rate": 6.069398010245053e-05,
      "loss": 0.5584,
      "step": 5910
    },
    {
      "epoch": 1.215130023640662,
      "grad_norm": 0.1964189112186432,
      "learning_rate": 6.068461872315858e-05,
      "loss": 0.5744,
      "step": 5911
    },
    {
      "epoch": 1.2153355946140405,
      "grad_norm": 0.19528479874134064,
      "learning_rate": 6.067525657115879e-05,
      "loss": 0.557,
      "step": 5912
    },
    {
      "epoch": 1.215541165587419,
      "grad_norm": 0.19183097779750824,
      "learning_rate": 6.066589364691237e-05,
      "loss": 0.5591,
      "step": 5913
    },
    {
      "epoch": 1.2157467365607977,
      "grad_norm": 0.19744020700454712,
      "learning_rate": 6.065652995088058e-05,
      "loss": 0.5627,
      "step": 5914
    },
    {
      "epoch": 1.2159523075341763,
      "grad_norm": 0.18547560274600983,
      "learning_rate": 6.064716548352475e-05,
      "loss": 0.5539,
      "step": 5915
    },
    {
      "epoch": 1.2161578785075546,
      "grad_norm": 0.19087590277194977,
      "learning_rate": 6.063780024530621e-05,
      "loss": 0.5627,
      "step": 5916
    },
    {
      "epoch": 1.2163634494809332,
      "grad_norm": 0.19286733865737915,
      "learning_rate": 6.0628434236686325e-05,
      "loss": 0.5523,
      "step": 5917
    },
    {
      "epoch": 1.2165690204543118,
      "grad_norm": 0.1942092925310135,
      "learning_rate": 6.061906745812655e-05,
      "loss": 0.574,
      "step": 5918
    },
    {
      "epoch": 1.2167745914276904,
      "grad_norm": 0.19682841002941132,
      "learning_rate": 6.060969991008832e-05,
      "loss": 0.5768,
      "step": 5919
    },
    {
      "epoch": 1.216980162401069,
      "grad_norm": 0.194288969039917,
      "learning_rate": 6.060033159303314e-05,
      "loss": 0.5704,
      "step": 5920
    },
    {
      "epoch": 1.2171857333744476,
      "grad_norm": 0.20371194183826447,
      "learning_rate": 6.059096250742252e-05,
      "loss": 0.5677,
      "step": 5921
    },
    {
      "epoch": 1.2173913043478262,
      "grad_norm": 0.20336924493312836,
      "learning_rate": 6.058159265371807e-05,
      "loss": 0.5228,
      "step": 5922
    },
    {
      "epoch": 1.2175968753212048,
      "grad_norm": 0.1702810525894165,
      "learning_rate": 6.0572222032381374e-05,
      "loss": 0.5534,
      "step": 5923
    },
    {
      "epoch": 1.2178024462945831,
      "grad_norm": 0.13445743918418884,
      "learning_rate": 6.056285064387407e-05,
      "loss": 0.5294,
      "step": 5924
    },
    {
      "epoch": 1.2180080172679617,
      "grad_norm": 0.12932245433330536,
      "learning_rate": 6.055347848865787e-05,
      "loss": 0.5243,
      "step": 5925
    },
    {
      "epoch": 1.2182135882413403,
      "grad_norm": 0.16721323132514954,
      "learning_rate": 6.054410556719448e-05,
      "loss": 0.5473,
      "step": 5926
    },
    {
      "epoch": 1.2184191592147189,
      "grad_norm": 0.2189573496580124,
      "learning_rate": 6.053473187994566e-05,
      "loss": 0.566,
      "step": 5927
    },
    {
      "epoch": 1.2186247301880975,
      "grad_norm": 0.19731007516384125,
      "learning_rate": 6.052535742737321e-05,
      "loss": 0.533,
      "step": 5928
    },
    {
      "epoch": 1.218830301161476,
      "grad_norm": 0.19551746547222137,
      "learning_rate": 6.051598220993896e-05,
      "loss": 0.5785,
      "step": 5929
    },
    {
      "epoch": 1.2190358721348546,
      "grad_norm": 0.2288779616355896,
      "learning_rate": 6.0506606228104784e-05,
      "loss": 0.5354,
      "step": 5930
    },
    {
      "epoch": 1.219241443108233,
      "grad_norm": 0.17528457939624786,
      "learning_rate": 6.0497229482332605e-05,
      "loss": 0.5383,
      "step": 5931
    },
    {
      "epoch": 1.2194470140816116,
      "grad_norm": 0.17240411043167114,
      "learning_rate": 6.0487851973084365e-05,
      "loss": 0.5693,
      "step": 5932
    },
    {
      "epoch": 1.2196525850549902,
      "grad_norm": 0.199370875954628,
      "learning_rate": 6.047847370082204e-05,
      "loss": 0.548,
      "step": 5933
    },
    {
      "epoch": 1.2198581560283688,
      "grad_norm": 0.20105613768100739,
      "learning_rate": 6.046909466600768e-05,
      "loss": 0.5604,
      "step": 5934
    },
    {
      "epoch": 1.2200637270017474,
      "grad_norm": 0.16920122504234314,
      "learning_rate": 6.0459714869103304e-05,
      "loss": 0.5377,
      "step": 5935
    },
    {
      "epoch": 1.220269297975126,
      "grad_norm": 0.17022979259490967,
      "learning_rate": 6.0450334310571046e-05,
      "loss": 0.556,
      "step": 5936
    },
    {
      "epoch": 1.2204748689485045,
      "grad_norm": 0.22041717171669006,
      "learning_rate": 6.044095299087304e-05,
      "loss": 0.5874,
      "step": 5937
    },
    {
      "epoch": 1.2206804399218831,
      "grad_norm": 0.20872265100479126,
      "learning_rate": 6.0431570910471436e-05,
      "loss": 0.5687,
      "step": 5938
    },
    {
      "epoch": 1.2208860108952615,
      "grad_norm": 0.18911628425121307,
      "learning_rate": 6.042218806982847e-05,
      "loss": 0.5712,
      "step": 5939
    },
    {
      "epoch": 1.22109158186864,
      "grad_norm": 0.19167855381965637,
      "learning_rate": 6.0412804469406384e-05,
      "loss": 0.5601,
      "step": 5940
    },
    {
      "epoch": 1.2212971528420187,
      "grad_norm": 0.19254928827285767,
      "learning_rate": 6.040342010966745e-05,
      "loss": 0.5746,
      "step": 5941
    },
    {
      "epoch": 1.2215027238153973,
      "grad_norm": 0.19120313227176666,
      "learning_rate": 6.0394034991073994e-05,
      "loss": 0.5502,
      "step": 5942
    },
    {
      "epoch": 1.2217082947887759,
      "grad_norm": 0.1880388706922531,
      "learning_rate": 6.038464911408841e-05,
      "loss": 0.5629,
      "step": 5943
    },
    {
      "epoch": 1.2219138657621544,
      "grad_norm": 0.19094626605510712,
      "learning_rate": 6.0375262479173064e-05,
      "loss": 0.5742,
      "step": 5944
    },
    {
      "epoch": 1.222119436735533,
      "grad_norm": 0.19934087991714478,
      "learning_rate": 6.0365875086790386e-05,
      "loss": 0.6047,
      "step": 5945
    },
    {
      "epoch": 1.2223250077089114,
      "grad_norm": 0.16785962879657745,
      "learning_rate": 6.035648693740287e-05,
      "loss": 0.5404,
      "step": 5946
    },
    {
      "epoch": 1.22253057868229,
      "grad_norm": 0.160533607006073,
      "learning_rate": 6.0347098031473025e-05,
      "loss": 0.5391,
      "step": 5947
    },
    {
      "epoch": 1.2227361496556686,
      "grad_norm": 0.201270192861557,
      "learning_rate": 6.033770836946339e-05,
      "loss": 0.5811,
      "step": 5948
    },
    {
      "epoch": 1.2229417206290472,
      "grad_norm": 0.1920137256383896,
      "learning_rate": 6.0328317951836554e-05,
      "loss": 0.5595,
      "step": 5949
    },
    {
      "epoch": 1.2231472916024257,
      "grad_norm": 0.19600927829742432,
      "learning_rate": 6.031892677905513e-05,
      "loss": 0.5679,
      "step": 5950
    },
    {
      "epoch": 1.2233528625758043,
      "grad_norm": 0.19393356144428253,
      "learning_rate": 6.030953485158178e-05,
      "loss": 0.5586,
      "step": 5951
    },
    {
      "epoch": 1.223558433549183,
      "grad_norm": 0.19558121263980865,
      "learning_rate": 6.030014216987922e-05,
      "loss": 0.5584,
      "step": 5952
    },
    {
      "epoch": 1.2237640045225615,
      "grad_norm": 0.1591499000787735,
      "learning_rate": 6.029074873441015e-05,
      "loss": 0.512,
      "step": 5953
    },
    {
      "epoch": 1.2239695754959399,
      "grad_norm": 0.1601012945175171,
      "learning_rate": 6.028135454563737e-05,
      "loss": 0.5482,
      "step": 5954
    },
    {
      "epoch": 1.2241751464693185,
      "grad_norm": 0.1917879432439804,
      "learning_rate": 6.027195960402367e-05,
      "loss": 0.5619,
      "step": 5955
    },
    {
      "epoch": 1.224380717442697,
      "grad_norm": 0.16363351047039032,
      "learning_rate": 6.026256391003192e-05,
      "loss": 0.5272,
      "step": 5956
    },
    {
      "epoch": 1.2245862884160756,
      "grad_norm": 0.1613667905330658,
      "learning_rate": 6.0253167464124965e-05,
      "loss": 0.5448,
      "step": 5957
    },
    {
      "epoch": 1.2247918593894542,
      "grad_norm": 0.19327108561992645,
      "learning_rate": 6.0243770266765754e-05,
      "loss": 0.5631,
      "step": 5958
    },
    {
      "epoch": 1.2249974303628328,
      "grad_norm": 0.20113897323608398,
      "learning_rate": 6.023437231841721e-05,
      "loss": 0.5433,
      "step": 5959
    },
    {
      "epoch": 1.2252030013362114,
      "grad_norm": 0.19953328371047974,
      "learning_rate": 6.022497361954237e-05,
      "loss": 0.5555,
      "step": 5960
    },
    {
      "epoch": 1.2254085723095898,
      "grad_norm": 0.16104325652122498,
      "learning_rate": 6.021557417060423e-05,
      "loss": 0.5269,
      "step": 5961
    },
    {
      "epoch": 1.2256141432829684,
      "grad_norm": 0.16105084121227264,
      "learning_rate": 6.0206173972065865e-05,
      "loss": 0.5649,
      "step": 5962
    },
    {
      "epoch": 1.225819714256347,
      "grad_norm": 0.1889335662126541,
      "learning_rate": 6.0196773024390374e-05,
      "loss": 0.5536,
      "step": 5963
    },
    {
      "epoch": 1.2260252852297255,
      "grad_norm": 0.19481204450130463,
      "learning_rate": 6.018737132804093e-05,
      "loss": 0.5673,
      "step": 5964
    },
    {
      "epoch": 1.2262308562031041,
      "grad_norm": 0.16492706537246704,
      "learning_rate": 6.017796888348068e-05,
      "loss": 0.548,
      "step": 5965
    },
    {
      "epoch": 1.2264364271764827,
      "grad_norm": 0.1624189019203186,
      "learning_rate": 6.016856569117283e-05,
      "loss": 0.5659,
      "step": 5966
    },
    {
      "epoch": 1.2266419981498613,
      "grad_norm": 0.19174005091190338,
      "learning_rate": 6.015916175158066e-05,
      "loss": 0.5483,
      "step": 5967
    },
    {
      "epoch": 1.22684756912324,
      "grad_norm": 0.19172148406505585,
      "learning_rate": 6.014975706516744e-05,
      "loss": 0.5629,
      "step": 5968
    },
    {
      "epoch": 1.2270531400966185,
      "grad_norm": 0.20126576721668243,
      "learning_rate": 6.014035163239649e-05,
      "loss": 0.5609,
      "step": 5969
    },
    {
      "epoch": 1.2272587110699968,
      "grad_norm": 0.19356362521648407,
      "learning_rate": 6.0130945453731196e-05,
      "loss": 0.557,
      "step": 5970
    },
    {
      "epoch": 1.2274642820433754,
      "grad_norm": 0.19379346072673798,
      "learning_rate": 6.012153852963494e-05,
      "loss": 0.5644,
      "step": 5971
    },
    {
      "epoch": 1.227669853016754,
      "grad_norm": 0.18843898177146912,
      "learning_rate": 6.011213086057114e-05,
      "loss": 0.5655,
      "step": 5972
    },
    {
      "epoch": 1.2278754239901326,
      "grad_norm": 0.1895827353000641,
      "learning_rate": 6.010272244700331e-05,
      "loss": 0.5324,
      "step": 5973
    },
    {
      "epoch": 1.2280809949635112,
      "grad_norm": 0.19657573103904724,
      "learning_rate": 6.009331328939492e-05,
      "loss": 0.5604,
      "step": 5974
    },
    {
      "epoch": 1.2282865659368898,
      "grad_norm": 0.1885729730129242,
      "learning_rate": 6.0083903388209536e-05,
      "loss": 0.5601,
      "step": 5975
    },
    {
      "epoch": 1.2284921369102682,
      "grad_norm": 0.16260753571987152,
      "learning_rate": 6.007449274391073e-05,
      "loss": 0.5245,
      "step": 5976
    },
    {
      "epoch": 1.2286977078836467,
      "grad_norm": 0.13464370369911194,
      "learning_rate": 6.0065081356962124e-05,
      "loss": 0.5164,
      "step": 5977
    },
    {
      "epoch": 1.2289032788570253,
      "grad_norm": 0.17227724194526672,
      "learning_rate": 6.0055669227827384e-05,
      "loss": 0.5848,
      "step": 5978
    },
    {
      "epoch": 1.229108849830404,
      "grad_norm": 0.19165630638599396,
      "learning_rate": 6.0046256356970185e-05,
      "loss": 0.5713,
      "step": 5979
    },
    {
      "epoch": 1.2293144208037825,
      "grad_norm": 0.191480353474617,
      "learning_rate": 6.003684274485426e-05,
      "loss": 0.5564,
      "step": 5980
    },
    {
      "epoch": 1.229519991777161,
      "grad_norm": 0.19356124103069305,
      "learning_rate": 6.002742839194338e-05,
      "loss": 0.5711,
      "step": 5981
    },
    {
      "epoch": 1.2297255627505397,
      "grad_norm": 0.18836161494255066,
      "learning_rate": 6.001801329870134e-05,
      "loss": 0.551,
      "step": 5982
    },
    {
      "epoch": 1.2299311337239183,
      "grad_norm": 0.18804924190044403,
      "learning_rate": 6.0008597465591966e-05,
      "loss": 0.5641,
      "step": 5983
    },
    {
      "epoch": 1.2301367046972969,
      "grad_norm": 0.20674586296081543,
      "learning_rate": 5.999918089307915e-05,
      "loss": 0.5664,
      "step": 5984
    },
    {
      "epoch": 1.2303422756706752,
      "grad_norm": 0.1936078518629074,
      "learning_rate": 5.9989763581626806e-05,
      "loss": 0.552,
      "step": 5985
    },
    {
      "epoch": 1.2305478466440538,
      "grad_norm": 0.19843873381614685,
      "learning_rate": 5.998034553169886e-05,
      "loss": 0.562,
      "step": 5986
    },
    {
      "epoch": 1.2307534176174324,
      "grad_norm": 0.18645739555358887,
      "learning_rate": 5.997092674375932e-05,
      "loss": 0.5424,
      "step": 5987
    },
    {
      "epoch": 1.230958988590811,
      "grad_norm": 0.18855836987495422,
      "learning_rate": 5.9961507218272196e-05,
      "loss": 0.5706,
      "step": 5988
    },
    {
      "epoch": 1.2311645595641896,
      "grad_norm": 0.18944047391414642,
      "learning_rate": 5.9952086955701535e-05,
      "loss": 0.5564,
      "step": 5989
    },
    {
      "epoch": 1.2313701305375682,
      "grad_norm": 0.1880870759487152,
      "learning_rate": 5.994266595651143e-05,
      "loss": 0.5662,
      "step": 5990
    },
    {
      "epoch": 1.2315757015109465,
      "grad_norm": 0.19140774011611938,
      "learning_rate": 5.993324422116602e-05,
      "loss": 0.5469,
      "step": 5991
    },
    {
      "epoch": 1.2317812724843251,
      "grad_norm": 0.1923801451921463,
      "learning_rate": 5.9923821750129466e-05,
      "loss": 0.5715,
      "step": 5992
    },
    {
      "epoch": 1.2319868434577037,
      "grad_norm": 0.18575525283813477,
      "learning_rate": 5.991439854386597e-05,
      "loss": 0.5325,
      "step": 5993
    },
    {
      "epoch": 1.2321924144310823,
      "grad_norm": 0.19030645489692688,
      "learning_rate": 5.9904974602839764e-05,
      "loss": 0.5366,
      "step": 5994
    },
    {
      "epoch": 1.2323979854044609,
      "grad_norm": 0.19156965613365173,
      "learning_rate": 5.9895549927515114e-05,
      "loss": 0.5741,
      "step": 5995
    },
    {
      "epoch": 1.2326035563778395,
      "grad_norm": 0.1905066967010498,
      "learning_rate": 5.988612451835636e-05,
      "loss": 0.5452,
      "step": 5996
    },
    {
      "epoch": 1.232809127351218,
      "grad_norm": 0.18837079405784607,
      "learning_rate": 5.987669837582782e-05,
      "loss": 0.5644,
      "step": 5997
    },
    {
      "epoch": 1.2330146983245966,
      "grad_norm": 0.1969577670097351,
      "learning_rate": 5.9867271500393884e-05,
      "loss": 0.5653,
      "step": 5998
    },
    {
      "epoch": 1.2332202692979752,
      "grad_norm": 0.1714939922094345,
      "learning_rate": 5.9857843892518975e-05,
      "loss": 0.5255,
      "step": 5999
    },
    {
      "epoch": 1.2334258402713536,
      "grad_norm": 0.16838547587394714,
      "learning_rate": 5.984841555266753e-05,
      "loss": 0.5574,
      "step": 6000
    },
    {
      "epoch": 1.2336314112447322,
      "grad_norm": 0.18724249303340912,
      "learning_rate": 5.983898648130407e-05,
      "loss": 0.5286,
      "step": 6001
    },
    {
      "epoch": 1.2338369822181108,
      "grad_norm": 0.1969245970249176,
      "learning_rate": 5.98295566788931e-05,
      "loss": 0.5673,
      "step": 6002
    },
    {
      "epoch": 1.2340425531914894,
      "grad_norm": 0.1898987591266632,
      "learning_rate": 5.982012614589917e-05,
      "loss": 0.5545,
      "step": 6003
    },
    {
      "epoch": 1.234248124164868,
      "grad_norm": 0.1573200672864914,
      "learning_rate": 5.9810694882786916e-05,
      "loss": 0.5205,
      "step": 6004
    },
    {
      "epoch": 1.2344536951382465,
      "grad_norm": 0.1741228699684143,
      "learning_rate": 5.9801262890020935e-05,
      "loss": 0.567,
      "step": 6005
    },
    {
      "epoch": 1.2346592661116251,
      "grad_norm": 0.19393646717071533,
      "learning_rate": 5.9791830168065914e-05,
      "loss": 0.5476,
      "step": 6006
    },
    {
      "epoch": 1.2348648370850035,
      "grad_norm": 0.19462937116622925,
      "learning_rate": 5.978239671738655e-05,
      "loss": 0.5361,
      "step": 6007
    },
    {
      "epoch": 1.235070408058382,
      "grad_norm": 0.18887047469615936,
      "learning_rate": 5.9772962538447604e-05,
      "loss": 0.5682,
      "step": 6008
    },
    {
      "epoch": 1.2352759790317607,
      "grad_norm": 0.19533561170101166,
      "learning_rate": 5.976352763171385e-05,
      "loss": 0.5776,
      "step": 6009
    },
    {
      "epoch": 1.2354815500051393,
      "grad_norm": 0.2016497403383255,
      "learning_rate": 5.975409199765008e-05,
      "loss": 0.5768,
      "step": 6010
    },
    {
      "epoch": 1.2356871209785179,
      "grad_norm": 0.19525597989559174,
      "learning_rate": 5.9744655636721166e-05,
      "loss": 0.5774,
      "step": 6011
    },
    {
      "epoch": 1.2358926919518964,
      "grad_norm": 0.19392353296279907,
      "learning_rate": 5.973521854939198e-05,
      "loss": 0.5451,
      "step": 6012
    },
    {
      "epoch": 1.236098262925275,
      "grad_norm": 0.1947338730096817,
      "learning_rate": 5.9725780736127456e-05,
      "loss": 0.5697,
      "step": 6013
    },
    {
      "epoch": 1.2363038338986536,
      "grad_norm": 0.20187315344810486,
      "learning_rate": 5.971634219739253e-05,
      "loss": 0.5441,
      "step": 6014
    },
    {
      "epoch": 1.236509404872032,
      "grad_norm": 0.1915546953678131,
      "learning_rate": 5.970690293365222e-05,
      "loss": 0.5692,
      "step": 6015
    },
    {
      "epoch": 1.2367149758454106,
      "grad_norm": 0.18739596009254456,
      "learning_rate": 5.969746294537153e-05,
      "loss": 0.5582,
      "step": 6016
    },
    {
      "epoch": 1.2369205468187892,
      "grad_norm": 0.18742164969444275,
      "learning_rate": 5.968802223301554e-05,
      "loss": 0.5538,
      "step": 6017
    },
    {
      "epoch": 1.2371261177921677,
      "grad_norm": 0.18883053958415985,
      "learning_rate": 5.967858079704935e-05,
      "loss": 0.5569,
      "step": 6018
    },
    {
      "epoch": 1.2373316887655463,
      "grad_norm": 0.1861804723739624,
      "learning_rate": 5.966913863793809e-05,
      "loss": 0.5506,
      "step": 6019
    },
    {
      "epoch": 1.237537259738925,
      "grad_norm": 0.1672678142786026,
      "learning_rate": 5.965969575614694e-05,
      "loss": 0.5207,
      "step": 6020
    },
    {
      "epoch": 1.2377428307123035,
      "grad_norm": 0.1628050059080124,
      "learning_rate": 5.965025215214109e-05,
      "loss": 0.564,
      "step": 6021
    },
    {
      "epoch": 1.2379484016856819,
      "grad_norm": 0.16974832117557526,
      "learning_rate": 5.964080782638579e-05,
      "loss": 0.5396,
      "step": 6022
    },
    {
      "epoch": 1.2381539726590605,
      "grad_norm": 0.1564965546131134,
      "learning_rate": 5.963136277934634e-05,
      "loss": 0.5456,
      "step": 6023
    },
    {
      "epoch": 1.238359543632439,
      "grad_norm": 0.19115638732910156,
      "learning_rate": 5.962191701148801e-05,
      "loss": 0.5821,
      "step": 6024
    },
    {
      "epoch": 1.2385651146058176,
      "grad_norm": 0.1846878081560135,
      "learning_rate": 5.9612470523276176e-05,
      "loss": 0.5708,
      "step": 6025
    },
    {
      "epoch": 1.2387706855791962,
      "grad_norm": 0.1887466162443161,
      "learning_rate": 5.9603023315176224e-05,
      "loss": 0.5633,
      "step": 6026
    },
    {
      "epoch": 1.2389762565525748,
      "grad_norm": 0.1877734214067459,
      "learning_rate": 5.959357538765356e-05,
      "loss": 0.5343,
      "step": 6027
    },
    {
      "epoch": 1.2391818275259534,
      "grad_norm": 0.1928664743900299,
      "learning_rate": 5.958412674117365e-05,
      "loss": 0.553,
      "step": 6028
    },
    {
      "epoch": 1.239387398499332,
      "grad_norm": 0.19139814376831055,
      "learning_rate": 5.957467737620199e-05,
      "loss": 0.5586,
      "step": 6029
    },
    {
      "epoch": 1.2395929694727104,
      "grad_norm": 0.18959654867649078,
      "learning_rate": 5.9565227293204084e-05,
      "loss": 0.5756,
      "step": 6030
    },
    {
      "epoch": 1.239798540446089,
      "grad_norm": 0.17210416495800018,
      "learning_rate": 5.9555776492645513e-05,
      "loss": 0.5649,
      "step": 6031
    },
    {
      "epoch": 1.2400041114194675,
      "grad_norm": 0.160491481423378,
      "learning_rate": 5.954632497499187e-05,
      "loss": 0.5464,
      "step": 6032
    },
    {
      "epoch": 1.2402096823928461,
      "grad_norm": 0.19676798582077026,
      "learning_rate": 5.9536872740708777e-05,
      "loss": 0.5877,
      "step": 6033
    },
    {
      "epoch": 1.2404152533662247,
      "grad_norm": 0.20140545070171356,
      "learning_rate": 5.952741979026192e-05,
      "loss": 0.5762,
      "step": 6034
    },
    {
      "epoch": 1.2406208243396033,
      "grad_norm": 0.19546420872211456,
      "learning_rate": 5.951796612411698e-05,
      "loss": 0.5576,
      "step": 6035
    },
    {
      "epoch": 1.240826395312982,
      "grad_norm": 0.16486842930316925,
      "learning_rate": 5.9508511742739716e-05,
      "loss": 0.5115,
      "step": 6036
    },
    {
      "epoch": 1.2410319662863603,
      "grad_norm": 0.13164182007312775,
      "learning_rate": 5.94990566465959e-05,
      "loss": 0.5294,
      "step": 6037
    },
    {
      "epoch": 1.2412375372597388,
      "grad_norm": 0.15759903192520142,
      "learning_rate": 5.9489600836151305e-05,
      "loss": 0.5432,
      "step": 6038
    },
    {
      "epoch": 1.2414431082331174,
      "grad_norm": 0.2032260000705719,
      "learning_rate": 5.948014431187181e-05,
      "loss": 0.5613,
      "step": 6039
    },
    {
      "epoch": 1.241648679206496,
      "grad_norm": 0.19559217989444733,
      "learning_rate": 5.947068707422329e-05,
      "loss": 0.5402,
      "step": 6040
    },
    {
      "epoch": 1.2418542501798746,
      "grad_norm": 0.19073714315891266,
      "learning_rate": 5.9461229123671654e-05,
      "loss": 0.534,
      "step": 6041
    },
    {
      "epoch": 1.2420598211532532,
      "grad_norm": 0.1976533830165863,
      "learning_rate": 5.9451770460682846e-05,
      "loss": 0.5591,
      "step": 6042
    },
    {
      "epoch": 1.2422653921266318,
      "grad_norm": 0.2046486884355545,
      "learning_rate": 5.944231108572287e-05,
      "loss": 0.5668,
      "step": 6043
    },
    {
      "epoch": 1.2424709631000104,
      "grad_norm": 0.19867998361587524,
      "learning_rate": 5.9432850999257705e-05,
      "loss": 0.5453,
      "step": 6044
    },
    {
      "epoch": 1.242676534073389,
      "grad_norm": 0.18936549127101898,
      "learning_rate": 5.9423390201753446e-05,
      "loss": 0.5649,
      "step": 6045
    },
    {
      "epoch": 1.2428821050467673,
      "grad_norm": 0.19626031816005707,
      "learning_rate": 5.941392869367616e-05,
      "loss": 0.5673,
      "step": 6046
    },
    {
      "epoch": 1.243087676020146,
      "grad_norm": 0.19594736397266388,
      "learning_rate": 5.9404466475492e-05,
      "loss": 0.5673,
      "step": 6047
    },
    {
      "epoch": 1.2432932469935245,
      "grad_norm": 0.19246500730514526,
      "learning_rate": 5.939500354766707e-05,
      "loss": 0.5708,
      "step": 6048
    },
    {
      "epoch": 1.243498817966903,
      "grad_norm": 0.18370835483074188,
      "learning_rate": 5.9385539910667615e-05,
      "loss": 0.5339,
      "step": 6049
    },
    {
      "epoch": 1.2437043889402817,
      "grad_norm": 0.1910664439201355,
      "learning_rate": 5.9376075564959836e-05,
      "loss": 0.5801,
      "step": 6050
    },
    {
      "epoch": 1.2439099599136603,
      "grad_norm": 0.19655410945415497,
      "learning_rate": 5.936661051101002e-05,
      "loss": 0.5389,
      "step": 6051
    },
    {
      "epoch": 1.2441155308870386,
      "grad_norm": 0.23548901081085205,
      "learning_rate": 5.9357144749284446e-05,
      "loss": 0.5509,
      "step": 6052
    },
    {
      "epoch": 1.2443211018604172,
      "grad_norm": 0.1724226176738739,
      "learning_rate": 5.934767828024946e-05,
      "loss": 0.5405,
      "step": 6053
    },
    {
      "epoch": 1.2445266728337958,
      "grad_norm": 0.16652943193912506,
      "learning_rate": 5.9338211104371424e-05,
      "loss": 0.5401,
      "step": 6054
    },
    {
      "epoch": 1.2447322438071744,
      "grad_norm": 0.20364424586296082,
      "learning_rate": 5.932874322211674e-05,
      "loss": 0.5624,
      "step": 6055
    },
    {
      "epoch": 1.244937814780553,
      "grad_norm": 0.1893276572227478,
      "learning_rate": 5.931927463395186e-05,
      "loss": 0.541,
      "step": 6056
    },
    {
      "epoch": 1.2451433857539316,
      "grad_norm": 0.1932743936777115,
      "learning_rate": 5.930980534034323e-05,
      "loss": 0.5789,
      "step": 6057
    },
    {
      "epoch": 1.2453489567273102,
      "grad_norm": 0.192164346575737,
      "learning_rate": 5.930033534175739e-05,
      "loss": 0.5711,
      "step": 6058
    },
    {
      "epoch": 1.2455545277006888,
      "grad_norm": 0.18755845725536346,
      "learning_rate": 5.9290864638660864e-05,
      "loss": 0.5503,
      "step": 6059
    },
    {
      "epoch": 1.2457600986740673,
      "grad_norm": 0.19044922292232513,
      "learning_rate": 5.928139323152022e-05,
      "loss": 0.5441,
      "step": 6060
    },
    {
      "epoch": 1.2459656696474457,
      "grad_norm": 0.16590002179145813,
      "learning_rate": 5.9271921120802106e-05,
      "loss": 0.5255,
      "step": 6061
    },
    {
      "epoch": 1.2461712406208243,
      "grad_norm": 0.16867230832576752,
      "learning_rate": 5.926244830697312e-05,
      "loss": 0.5825,
      "step": 6062
    },
    {
      "epoch": 1.2463768115942029,
      "grad_norm": 0.20571991801261902,
      "learning_rate": 5.925297479049999e-05,
      "loss": 0.552,
      "step": 6063
    },
    {
      "epoch": 1.2465823825675815,
      "grad_norm": 0.20340660214424133,
      "learning_rate": 5.92435005718494e-05,
      "loss": 0.5572,
      "step": 6064
    },
    {
      "epoch": 1.24678795354096,
      "grad_norm": 0.19198235869407654,
      "learning_rate": 5.923402565148811e-05,
      "loss": 0.5569,
      "step": 6065
    },
    {
      "epoch": 1.2469935245143386,
      "grad_norm": 0.1904488056898117,
      "learning_rate": 5.92245500298829e-05,
      "loss": 0.5641,
      "step": 6066
    },
    {
      "epoch": 1.247199095487717,
      "grad_norm": 0.1928306370973587,
      "learning_rate": 5.921507370750061e-05,
      "loss": 0.5613,
      "step": 6067
    },
    {
      "epoch": 1.2474046664610956,
      "grad_norm": 0.18856725096702576,
      "learning_rate": 5.920559668480808e-05,
      "loss": 0.5478,
      "step": 6068
    },
    {
      "epoch": 1.2476102374344742,
      "grad_norm": 0.19025270640850067,
      "learning_rate": 5.919611896227218e-05,
      "loss": 0.553,
      "step": 6069
    },
    {
      "epoch": 1.2478158084078528,
      "grad_norm": 0.18751074373722076,
      "learning_rate": 5.918664054035987e-05,
      "loss": 0.5571,
      "step": 6070
    },
    {
      "epoch": 1.2480213793812314,
      "grad_norm": 0.18929120898246765,
      "learning_rate": 5.917716141953807e-05,
      "loss": 0.5674,
      "step": 6071
    },
    {
      "epoch": 1.24822695035461,
      "grad_norm": 0.19729354977607727,
      "learning_rate": 5.916768160027381e-05,
      "loss": 0.5493,
      "step": 6072
    },
    {
      "epoch": 1.2484325213279885,
      "grad_norm": 0.1939440220594406,
      "learning_rate": 5.9158201083034086e-05,
      "loss": 0.5617,
      "step": 6073
    },
    {
      "epoch": 1.2486380923013671,
      "grad_norm": 0.19020439684391022,
      "learning_rate": 5.914871986828596e-05,
      "loss": 0.551,
      "step": 6074
    },
    {
      "epoch": 1.2488436632747457,
      "grad_norm": 0.19423425197601318,
      "learning_rate": 5.913923795649656e-05,
      "loss": 0.5513,
      "step": 6075
    },
    {
      "epoch": 1.249049234248124,
      "grad_norm": 0.1902787834405899,
      "learning_rate": 5.912975534813298e-05,
      "loss": 0.5467,
      "step": 6076
    },
    {
      "epoch": 1.2492548052215027,
      "grad_norm": 0.16620683670043945,
      "learning_rate": 5.91202720436624e-05,
      "loss": 0.5262,
      "step": 6077
    },
    {
      "epoch": 1.2494603761948813,
      "grad_norm": 0.15968933701515198,
      "learning_rate": 5.911078804355202e-05,
      "loss": 0.5616,
      "step": 6078
    },
    {
      "epoch": 1.2496659471682598,
      "grad_norm": 0.19238422811031342,
      "learning_rate": 5.910130334826906e-05,
      "loss": 0.5515,
      "step": 6079
    },
    {
      "epoch": 1.2498715181416384,
      "grad_norm": 0.19091928005218506,
      "learning_rate": 5.9091817958280786e-05,
      "loss": 0.5648,
      "step": 6080
    },
    {
      "epoch": 1.250077089115017,
      "grad_norm": 0.19049179553985596,
      "learning_rate": 5.908233187405452e-05,
      "loss": 0.55,
      "step": 6081
    },
    {
      "epoch": 1.2502826600883954,
      "grad_norm": 0.19400426745414734,
      "learning_rate": 5.907284509605757e-05,
      "loss": 0.5554,
      "step": 6082
    },
    {
      "epoch": 1.250488231061774,
      "grad_norm": 0.19264687597751617,
      "learning_rate": 5.9063357624757316e-05,
      "loss": 0.5693,
      "step": 6083
    },
    {
      "epoch": 1.2506938020351526,
      "grad_norm": 0.1882631927728653,
      "learning_rate": 5.905386946062118e-05,
      "loss": 0.5509,
      "step": 6084
    },
    {
      "epoch": 1.2508993730085312,
      "grad_norm": 0.1930553913116455,
      "learning_rate": 5.9044380604116575e-05,
      "loss": 0.5667,
      "step": 6085
    },
    {
      "epoch": 1.2511049439819097,
      "grad_norm": 0.19695702195167542,
      "learning_rate": 5.9034891055710985e-05,
      "loss": 0.5592,
      "step": 6086
    },
    {
      "epoch": 1.2513105149552883,
      "grad_norm": 0.19834263622760773,
      "learning_rate": 5.90254008158719e-05,
      "loss": 0.5621,
      "step": 6087
    },
    {
      "epoch": 1.251516085928667,
      "grad_norm": 0.19930176436901093,
      "learning_rate": 5.9015909885066885e-05,
      "loss": 0.5845,
      "step": 6088
    },
    {
      "epoch": 1.2517216569020455,
      "grad_norm": 0.1929783821105957,
      "learning_rate": 5.90064182637635e-05,
      "loss": 0.5658,
      "step": 6089
    },
    {
      "epoch": 1.251927227875424,
      "grad_norm": 0.2053227424621582,
      "learning_rate": 5.899692595242934e-05,
      "loss": 0.559,
      "step": 6090
    },
    {
      "epoch": 1.2521327988488027,
      "grad_norm": 0.1878289431333542,
      "learning_rate": 5.898743295153208e-05,
      "loss": 0.5331,
      "step": 6091
    },
    {
      "epoch": 1.252338369822181,
      "grad_norm": 0.1905200332403183,
      "learning_rate": 5.897793926153935e-05,
      "loss": 0.5687,
      "step": 6092
    },
    {
      "epoch": 1.2525439407955596,
      "grad_norm": 0.16592474281787872,
      "learning_rate": 5.89684448829189e-05,
      "loss": 0.509,
      "step": 6093
    },
    {
      "epoch": 1.2527495117689382,
      "grad_norm": 0.15698356926441193,
      "learning_rate": 5.895894981613845e-05,
      "loss": 0.558,
      "step": 6094
    },
    {
      "epoch": 1.2529550827423168,
      "grad_norm": 0.19929586350917816,
      "learning_rate": 5.89494540616658e-05,
      "loss": 0.5595,
      "step": 6095
    },
    {
      "epoch": 1.2531606537156954,
      "grad_norm": 0.19312036037445068,
      "learning_rate": 5.893995761996875e-05,
      "loss": 0.5577,
      "step": 6096
    },
    {
      "epoch": 1.2533662246890738,
      "grad_norm": 0.19632984697818756,
      "learning_rate": 5.8930460491515125e-05,
      "loss": 0.5715,
      "step": 6097
    },
    {
      "epoch": 1.2535717956624524,
      "grad_norm": 0.1999562531709671,
      "learning_rate": 5.8920962676772836e-05,
      "loss": 0.5578,
      "step": 6098
    },
    {
      "epoch": 1.253777366635831,
      "grad_norm": 0.1987222284078598,
      "learning_rate": 5.891146417620978e-05,
      "loss": 0.5777,
      "step": 6099
    },
    {
      "epoch": 1.2539829376092095,
      "grad_norm": 0.17240692675113678,
      "learning_rate": 5.8901964990293894e-05,
      "loss": 0.546,
      "step": 6100
    },
    {
      "epoch": 1.2541885085825881,
      "grad_norm": 0.1715145856142044,
      "learning_rate": 5.8892465119493184e-05,
      "loss": 0.5658,
      "step": 6101
    },
    {
      "epoch": 1.2543940795559667,
      "grad_norm": 0.18989497423171997,
      "learning_rate": 5.888296456427565e-05,
      "loss": 0.5718,
      "step": 6102
    },
    {
      "epoch": 1.2545996505293453,
      "grad_norm": 0.1893077790737152,
      "learning_rate": 5.887346332510934e-05,
      "loss": 0.572,
      "step": 6103
    },
    {
      "epoch": 1.2548052215027239,
      "grad_norm": 0.16260646283626556,
      "learning_rate": 5.886396140246233e-05,
      "loss": 0.5399,
      "step": 6104
    },
    {
      "epoch": 1.2550107924761025,
      "grad_norm": 1.3922818899154663,
      "learning_rate": 5.8854458796802744e-05,
      "loss": 0.5587,
      "step": 6105
    },
    {
      "epoch": 1.255216363449481,
      "grad_norm": 0.16991350054740906,
      "learning_rate": 5.8844955508598745e-05,
      "loss": 0.5286,
      "step": 6106
    },
    {
      "epoch": 1.2554219344228594,
      "grad_norm": 0.21412529051303864,
      "learning_rate": 5.8835451538318476e-05,
      "loss": 0.5637,
      "step": 6107
    },
    {
      "epoch": 1.255627505396238,
      "grad_norm": 0.40624189376831055,
      "learning_rate": 5.882594688643019e-05,
      "loss": 0.5364,
      "step": 6108
    },
    {
      "epoch": 1.2558330763696166,
      "grad_norm": 0.2089642882347107,
      "learning_rate": 5.881644155340213e-05,
      "loss": 0.5669,
      "step": 6109
    },
    {
      "epoch": 1.2560386473429952,
      "grad_norm": 0.4316593110561371,
      "learning_rate": 5.880693553970256e-05,
      "loss": 0.564,
      "step": 6110
    },
    {
      "epoch": 1.2562442183163738,
      "grad_norm": 0.21521629393100739,
      "learning_rate": 5.879742884579981e-05,
      "loss": 0.5774,
      "step": 6111
    },
    {
      "epoch": 1.2564497892897522,
      "grad_norm": 0.2025582194328308,
      "learning_rate": 5.878792147216223e-05,
      "loss": 0.5487,
      "step": 6112
    },
    {
      "epoch": 1.2566553602631307,
      "grad_norm": 0.21197755634784698,
      "learning_rate": 5.8778413419258204e-05,
      "loss": 0.5674,
      "step": 6113
    },
    {
      "epoch": 1.2568609312365093,
      "grad_norm": 0.21161524951457977,
      "learning_rate": 5.876890468755614e-05,
      "loss": 0.5915,
      "step": 6114
    },
    {
      "epoch": 1.257066502209888,
      "grad_norm": 0.20301292836666107,
      "learning_rate": 5.875939527752451e-05,
      "loss": 0.5569,
      "step": 6115
    },
    {
      "epoch": 1.2572720731832665,
      "grad_norm": 0.20232078433036804,
      "learning_rate": 5.874988518963178e-05,
      "loss": 0.5686,
      "step": 6116
    },
    {
      "epoch": 1.257477644156645,
      "grad_norm": 0.19668982923030853,
      "learning_rate": 5.8740374424346484e-05,
      "loss": 0.5472,
      "step": 6117
    },
    {
      "epoch": 1.2576832151300237,
      "grad_norm": 0.19299955666065216,
      "learning_rate": 5.8730862982137155e-05,
      "loss": 0.554,
      "step": 6118
    },
    {
      "epoch": 1.2578887861034023,
      "grad_norm": 0.16891315579414368,
      "learning_rate": 5.872135086347238e-05,
      "loss": 0.549,
      "step": 6119
    },
    {
      "epoch": 1.2580943570767809,
      "grad_norm": 0.19991520047187805,
      "learning_rate": 5.87118380688208e-05,
      "loss": 0.5791,
      "step": 6120
    },
    {
      "epoch": 1.2582999280501594,
      "grad_norm": 0.19644920527935028,
      "learning_rate": 5.870232459865102e-05,
      "loss": 0.5416,
      "step": 6121
    },
    {
      "epoch": 1.2585054990235378,
      "grad_norm": 0.19781053066253662,
      "learning_rate": 5.869281045343177e-05,
      "loss": 0.5701,
      "step": 6122
    },
    {
      "epoch": 1.2587110699969164,
      "grad_norm": 0.1692863404750824,
      "learning_rate": 5.868329563363175e-05,
      "loss": 0.5307,
      "step": 6123
    },
    {
      "epoch": 1.258916640970295,
      "grad_norm": 0.16794486343860626,
      "learning_rate": 5.8673780139719697e-05,
      "loss": 0.572,
      "step": 6124
    },
    {
      "epoch": 1.2591222119436736,
      "grad_norm": 0.16393691301345825,
      "learning_rate": 5.866426397216442e-05,
      "loss": 0.5017,
      "step": 6125
    },
    {
      "epoch": 1.2593277829170522,
      "grad_norm": 0.20335790514945984,
      "learning_rate": 5.8654747131434714e-05,
      "loss": 0.5663,
      "step": 6126
    },
    {
      "epoch": 1.2595333538904308,
      "grad_norm": 0.20092669129371643,
      "learning_rate": 5.864522961799944e-05,
      "loss": 0.5714,
      "step": 6127
    },
    {
      "epoch": 1.2597389248638091,
      "grad_norm": 0.16403307020664215,
      "learning_rate": 5.863571143232748e-05,
      "loss": 0.5319,
      "step": 6128
    },
    {
      "epoch": 1.2599444958371877,
      "grad_norm": 0.1622430980205536,
      "learning_rate": 5.8626192574887756e-05,
      "loss": 0.5429,
      "step": 6129
    },
    {
      "epoch": 1.2601500668105663,
      "grad_norm": 0.19496072828769684,
      "learning_rate": 5.861667304614922e-05,
      "loss": 0.5497,
      "step": 6130
    },
    {
      "epoch": 1.2603556377839449,
      "grad_norm": 0.18575909733772278,
      "learning_rate": 5.860715284658084e-05,
      "loss": 0.5494,
      "step": 6131
    },
    {
      "epoch": 1.2605612087573235,
      "grad_norm": 0.19597534835338593,
      "learning_rate": 5.8597631976651635e-05,
      "loss": 0.5602,
      "step": 6132
    },
    {
      "epoch": 1.260766779730702,
      "grad_norm": 0.1906193345785141,
      "learning_rate": 5.858811043683066e-05,
      "loss": 0.5495,
      "step": 6133
    },
    {
      "epoch": 1.2609723507040806,
      "grad_norm": 0.16364972293376923,
      "learning_rate": 5.8578588227586995e-05,
      "loss": 0.5283,
      "step": 6134
    },
    {
      "epoch": 1.2611779216774592,
      "grad_norm": 0.15908394753932953,
      "learning_rate": 5.8569065349389746e-05,
      "loss": 0.5484,
      "step": 6135
    },
    {
      "epoch": 1.2613834926508378,
      "grad_norm": 0.18748100101947784,
      "learning_rate": 5.855954180270808e-05,
      "loss": 0.5653,
      "step": 6136
    },
    {
      "epoch": 1.2615890636242162,
      "grad_norm": 0.19369830191135406,
      "learning_rate": 5.855001758801116e-05,
      "loss": 0.5627,
      "step": 6137
    },
    {
      "epoch": 1.2617946345975948,
      "grad_norm": 0.19096927344799042,
      "learning_rate": 5.8540492705768205e-05,
      "loss": 0.5464,
      "step": 6138
    },
    {
      "epoch": 1.2620002055709734,
      "grad_norm": 0.19514234364032745,
      "learning_rate": 5.853096715644847e-05,
      "loss": 0.569,
      "step": 6139
    },
    {
      "epoch": 1.262205776544352,
      "grad_norm": 0.19120776653289795,
      "learning_rate": 5.852144094052123e-05,
      "loss": 0.5634,
      "step": 6140
    },
    {
      "epoch": 1.2624113475177305,
      "grad_norm": 0.19928298890590668,
      "learning_rate": 5.851191405845579e-05,
      "loss": 0.5745,
      "step": 6141
    },
    {
      "epoch": 1.2626169184911091,
      "grad_norm": 0.1887395977973938,
      "learning_rate": 5.850238651072149e-05,
      "loss": 0.56,
      "step": 6142
    },
    {
      "epoch": 1.2628224894644875,
      "grad_norm": 0.19872866570949554,
      "learning_rate": 5.849285829778772e-05,
      "loss": 0.5627,
      "step": 6143
    },
    {
      "epoch": 1.263028060437866,
      "grad_norm": 0.16826018691062927,
      "learning_rate": 5.8483329420123906e-05,
      "loss": 0.5414,
      "step": 6144
    },
    {
      "epoch": 1.2632336314112447,
      "grad_norm": 0.16626615822315216,
      "learning_rate": 5.847379987819944e-05,
      "loss": 0.5532,
      "step": 6145
    },
    {
      "epoch": 1.2634392023846233,
      "grad_norm": 0.1921907663345337,
      "learning_rate": 5.8464269672483855e-05,
      "loss": 0.5543,
      "step": 6146
    },
    {
      "epoch": 1.2636447733580018,
      "grad_norm": 0.191694438457489,
      "learning_rate": 5.8454738803446616e-05,
      "loss": 0.5442,
      "step": 6147
    },
    {
      "epoch": 1.2638503443313804,
      "grad_norm": 0.19045263528823853,
      "learning_rate": 5.8445207271557306e-05,
      "loss": 0.5794,
      "step": 6148
    },
    {
      "epoch": 1.264055915304759,
      "grad_norm": 0.19358719885349274,
      "learning_rate": 5.843567507728545e-05,
      "loss": 0.5692,
      "step": 6149
    },
    {
      "epoch": 1.2642614862781376,
      "grad_norm": 0.19511562585830688,
      "learning_rate": 5.8426142221100706e-05,
      "loss": 0.5648,
      "step": 6150
    },
    {
      "epoch": 1.2644670572515162,
      "grad_norm": 0.1978984773159027,
      "learning_rate": 5.841660870347268e-05,
      "loss": 0.5792,
      "step": 6151
    },
    {
      "epoch": 1.2646726282248946,
      "grad_norm": 0.189521923661232,
      "learning_rate": 5.840707452487104e-05,
      "loss": 0.5421,
      "step": 6152
    },
    {
      "epoch": 1.2648781991982732,
      "grad_norm": 0.1647057980298996,
      "learning_rate": 5.8397539685765516e-05,
      "loss": 0.5296,
      "step": 6153
    },
    {
      "epoch": 1.2650837701716517,
      "grad_norm": 0.15688472986221313,
      "learning_rate": 5.8388004186625836e-05,
      "loss": 0.5423,
      "step": 6154
    },
    {
      "epoch": 1.2652893411450303,
      "grad_norm": 0.19488799571990967,
      "learning_rate": 5.8378468027921766e-05,
      "loss": 0.5396,
      "step": 6155
    },
    {
      "epoch": 1.265494912118409,
      "grad_norm": 0.19577009975910187,
      "learning_rate": 5.8368931210123085e-05,
      "loss": 0.5487,
      "step": 6156
    },
    {
      "epoch": 1.2657004830917875,
      "grad_norm": 0.19283023476600647,
      "learning_rate": 5.835939373369966e-05,
      "loss": 0.5554,
      "step": 6157
    },
    {
      "epoch": 1.2659060540651659,
      "grad_norm": 0.19187267124652863,
      "learning_rate": 5.834985559912136e-05,
      "loss": 0.5572,
      "step": 6158
    },
    {
      "epoch": 1.2661116250385445,
      "grad_norm": 0.19688525795936584,
      "learning_rate": 5.834031680685805e-05,
      "loss": 0.5667,
      "step": 6159
    },
    {
      "epoch": 1.266317196011923,
      "grad_norm": 0.17647728323936462,
      "learning_rate": 5.83307773573797e-05,
      "loss": 0.546,
      "step": 6160
    },
    {
      "epoch": 1.2665227669853016,
      "grad_norm": 0.16302068531513214,
      "learning_rate": 5.8321237251156254e-05,
      "loss": 0.5648,
      "step": 6161
    },
    {
      "epoch": 1.2667283379586802,
      "grad_norm": 0.1963539719581604,
      "learning_rate": 5.8311696488657714e-05,
      "loss": 0.5584,
      "step": 6162
    },
    {
      "epoch": 1.2669339089320588,
      "grad_norm": 0.19600288569927216,
      "learning_rate": 5.8302155070354105e-05,
      "loss": 0.5657,
      "step": 6163
    },
    {
      "epoch": 1.2671394799054374,
      "grad_norm": 0.17675581574440002,
      "learning_rate": 5.829261299671549e-05,
      "loss": 0.5394,
      "step": 6164
    },
    {
      "epoch": 1.267345050878816,
      "grad_norm": 0.16274531185626984,
      "learning_rate": 5.828307026821196e-05,
      "loss": 0.5493,
      "step": 6165
    },
    {
      "epoch": 1.2675506218521946,
      "grad_norm": 0.18789401650428772,
      "learning_rate": 5.827352688531365e-05,
      "loss": 0.5438,
      "step": 6166
    },
    {
      "epoch": 1.267756192825573,
      "grad_norm": 0.19160960614681244,
      "learning_rate": 5.82639828484907e-05,
      "loss": 0.558,
      "step": 6167
    },
    {
      "epoch": 1.2679617637989515,
      "grad_norm": 0.1683780699968338,
      "learning_rate": 5.8254438158213306e-05,
      "loss": 0.5021,
      "step": 6168
    },
    {
      "epoch": 1.2681673347723301,
      "grad_norm": 0.14388030767440796,
      "learning_rate": 5.824489281495171e-05,
      "loss": 0.5228,
      "step": 6169
    },
    {
      "epoch": 1.2683729057457087,
      "grad_norm": 0.1721310168504715,
      "learning_rate": 5.8235346819176135e-05,
      "loss": 0.5546,
      "step": 6170
    },
    {
      "epoch": 1.2685784767190873,
      "grad_norm": 0.19721747934818268,
      "learning_rate": 5.822580017135691e-05,
      "loss": 0.5533,
      "step": 6171
    },
    {
      "epoch": 1.2687840476924659,
      "grad_norm": 0.18930335342884064,
      "learning_rate": 5.8216252871964314e-05,
      "loss": 0.5671,
      "step": 6172
    },
    {
      "epoch": 1.2689896186658443,
      "grad_norm": 0.1941603124141693,
      "learning_rate": 5.8206704921468695e-05,
      "loss": 0.5594,
      "step": 6173
    },
    {
      "epoch": 1.2691951896392228,
      "grad_norm": 0.20115360617637634,
      "learning_rate": 5.819715632034048e-05,
      "loss": 0.5645,
      "step": 6174
    },
    {
      "epoch": 1.2694007606126014,
      "grad_norm": 0.19006428122520447,
      "learning_rate": 5.818760706905004e-05,
      "loss": 0.5384,
      "step": 6175
    },
    {
      "epoch": 1.26960633158598,
      "grad_norm": 0.18901333212852478,
      "learning_rate": 5.8178057168067844e-05,
      "loss": 0.5551,
      "step": 6176
    },
    {
      "epoch": 1.2698119025593586,
      "grad_norm": 0.1722274273633957,
      "learning_rate": 5.816850661786436e-05,
      "loss": 0.529,
      "step": 6177
    },
    {
      "epoch": 1.2700174735327372,
      "grad_norm": 0.16205133497714996,
      "learning_rate": 5.815895541891012e-05,
      "loss": 0.5608,
      "step": 6178
    },
    {
      "epoch": 1.2702230445061158,
      "grad_norm": 0.20700521767139435,
      "learning_rate": 5.814940357167563e-05,
      "loss": 0.5537,
      "step": 6179
    },
    {
      "epoch": 1.2704286154794944,
      "grad_norm": 0.19888941943645477,
      "learning_rate": 5.8139851076631486e-05,
      "loss": 0.5919,
      "step": 6180
    },
    {
      "epoch": 1.270634186452873,
      "grad_norm": 0.18785306811332703,
      "learning_rate": 5.813029793424831e-05,
      "loss": 0.5355,
      "step": 6181
    },
    {
      "epoch": 1.2708397574262515,
      "grad_norm": 0.1864861100912094,
      "learning_rate": 5.812074414499673e-05,
      "loss": 0.5585,
      "step": 6182
    },
    {
      "epoch": 1.27104532839963,
      "grad_norm": 0.16200599074363708,
      "learning_rate": 5.81111897093474e-05,
      "loss": 0.5484,
      "step": 6183
    },
    {
      "epoch": 1.2712508993730085,
      "grad_norm": 0.15543238818645477,
      "learning_rate": 5.8101634627771034e-05,
      "loss": 0.5398,
      "step": 6184
    },
    {
      "epoch": 1.271456470346387,
      "grad_norm": 0.1934465765953064,
      "learning_rate": 5.809207890073837e-05,
      "loss": 0.5703,
      "step": 6185
    },
    {
      "epoch": 1.2716620413197657,
      "grad_norm": 0.17177589237689972,
      "learning_rate": 5.808252252872018e-05,
      "loss": 0.535,
      "step": 6186
    },
    {
      "epoch": 1.2718676122931443,
      "grad_norm": 0.1565936654806137,
      "learning_rate": 5.807296551218723e-05,
      "loss": 0.5704,
      "step": 6187
    },
    {
      "epoch": 1.2720731832665226,
      "grad_norm": 0.1956259161233902,
      "learning_rate": 5.80634078516104e-05,
      "loss": 0.5477,
      "step": 6188
    },
    {
      "epoch": 1.2722787542399012,
      "grad_norm": 0.19236725568771362,
      "learning_rate": 5.80538495474605e-05,
      "loss": 0.5691,
      "step": 6189
    },
    {
      "epoch": 1.2724843252132798,
      "grad_norm": 0.16895383596420288,
      "learning_rate": 5.804429060020845e-05,
      "loss": 0.5185,
      "step": 6190
    },
    {
      "epoch": 1.2726898961866584,
      "grad_norm": 0.15849240124225616,
      "learning_rate": 5.8034731010325176e-05,
      "loss": 0.5699,
      "step": 6191
    },
    {
      "epoch": 1.272895467160037,
      "grad_norm": 0.1865822672843933,
      "learning_rate": 5.802517077828163e-05,
      "loss": 0.5255,
      "step": 6192
    },
    {
      "epoch": 1.2731010381334156,
      "grad_norm": 0.16672882437705994,
      "learning_rate": 5.80156099045488e-05,
      "loss": 0.5399,
      "step": 6193
    },
    {
      "epoch": 1.2733066091067942,
      "grad_norm": 0.1562536656856537,
      "learning_rate": 5.8006048389597694e-05,
      "loss": 0.55,
      "step": 6194
    },
    {
      "epoch": 1.2735121800801728,
      "grad_norm": 0.19599376618862152,
      "learning_rate": 5.7996486233899395e-05,
      "loss": 0.5545,
      "step": 6195
    },
    {
      "epoch": 1.2737177510535513,
      "grad_norm": 0.1640097200870514,
      "learning_rate": 5.798692343792495e-05,
      "loss": 0.5277,
      "step": 6196
    },
    {
      "epoch": 1.27392332202693,
      "grad_norm": 0.17527011036872864,
      "learning_rate": 5.797736000214549e-05,
      "loss": 0.5735,
      "step": 6197
    },
    {
      "epoch": 1.2741288930003083,
      "grad_norm": 0.19275882840156555,
      "learning_rate": 5.7967795927032164e-05,
      "loss": 0.5686,
      "step": 6198
    },
    {
      "epoch": 1.2743344639736869,
      "grad_norm": 0.19368760287761688,
      "learning_rate": 5.7958231213056144e-05,
      "loss": 0.5665,
      "step": 6199
    },
    {
      "epoch": 1.2745400349470655,
      "grad_norm": 0.1672065258026123,
      "learning_rate": 5.794866586068862e-05,
      "loss": 0.5532,
      "step": 6200
    },
    {
      "epoch": 1.274745605920444,
      "grad_norm": 0.1615796685218811,
      "learning_rate": 5.7939099870400865e-05,
      "loss": 0.5549,
      "step": 6201
    },
    {
      "epoch": 1.2749511768938226,
      "grad_norm": 0.18721790611743927,
      "learning_rate": 5.7929533242664137e-05,
      "loss": 0.5476,
      "step": 6202
    },
    {
      "epoch": 1.275156747867201,
      "grad_norm": 0.19924210011959076,
      "learning_rate": 5.791996597794975e-05,
      "loss": 0.5929,
      "step": 6203
    },
    {
      "epoch": 1.2753623188405796,
      "grad_norm": 0.16278637945652008,
      "learning_rate": 5.791039807672901e-05,
      "loss": 0.545,
      "step": 6204
    },
    {
      "epoch": 1.2755678898139582,
      "grad_norm": 0.12655942142009735,
      "learning_rate": 5.7900829539473304e-05,
      "loss": 0.5253,
      "step": 6205
    },
    {
      "epoch": 1.2757734607873368,
      "grad_norm": 0.16198953986167908,
      "learning_rate": 5.789126036665403e-05,
      "loss": 0.5607,
      "step": 6206
    },
    {
      "epoch": 1.2759790317607154,
      "grad_norm": 0.1700884997844696,
      "learning_rate": 5.7881690558742605e-05,
      "loss": 0.5321,
      "step": 6207
    },
    {
      "epoch": 1.276184602734094,
      "grad_norm": 0.15518617630004883,
      "learning_rate": 5.7872120116210494e-05,
      "loss": 0.5518,
      "step": 6208
    },
    {
      "epoch": 1.2763901737074725,
      "grad_norm": 0.18900856375694275,
      "learning_rate": 5.7862549039529196e-05,
      "loss": 0.5467,
      "step": 6209
    },
    {
      "epoch": 1.2765957446808511,
      "grad_norm": 0.2112400233745575,
      "learning_rate": 5.785297732917023e-05,
      "loss": 0.5821,
      "step": 6210
    },
    {
      "epoch": 1.2768013156542297,
      "grad_norm": 0.19592179358005524,
      "learning_rate": 5.784340498560513e-05,
      "loss": 0.5889,
      "step": 6211
    },
    {
      "epoch": 1.2770068866276083,
      "grad_norm": 0.1897910088300705,
      "learning_rate": 5.783383200930551e-05,
      "loss": 0.5657,
      "step": 6212
    },
    {
      "epoch": 1.2772124576009867,
      "grad_norm": 0.1914108544588089,
      "learning_rate": 5.782425840074297e-05,
      "loss": 0.5578,
      "step": 6213
    },
    {
      "epoch": 1.2774180285743653,
      "grad_norm": 0.19016936421394348,
      "learning_rate": 5.781468416038914e-05,
      "loss": 0.5599,
      "step": 6214
    },
    {
      "epoch": 1.2776235995477438,
      "grad_norm": 0.18804775178432465,
      "learning_rate": 5.780510928871574e-05,
      "loss": 0.5671,
      "step": 6215
    },
    {
      "epoch": 1.2778291705211224,
      "grad_norm": 0.18596555292606354,
      "learning_rate": 5.779553378619445e-05,
      "loss": 0.5355,
      "step": 6216
    },
    {
      "epoch": 1.278034741494501,
      "grad_norm": 0.19289173185825348,
      "learning_rate": 5.778595765329702e-05,
      "loss": 0.583,
      "step": 6217
    },
    {
      "epoch": 1.2782403124678796,
      "grad_norm": 0.18467681109905243,
      "learning_rate": 5.7776380890495214e-05,
      "loss": 0.561,
      "step": 6218
    },
    {
      "epoch": 1.278445883441258,
      "grad_norm": 0.19433990120887756,
      "learning_rate": 5.776680349826083e-05,
      "loss": 0.5548,
      "step": 6219
    },
    {
      "epoch": 1.2786514544146366,
      "grad_norm": 0.1940041035413742,
      "learning_rate": 5.7757225477065725e-05,
      "loss": 0.5654,
      "step": 6220
    },
    {
      "epoch": 1.2788570253880152,
      "grad_norm": 0.1894046515226364,
      "learning_rate": 5.774764682738174e-05,
      "loss": 0.5628,
      "step": 6221
    },
    {
      "epoch": 1.2790625963613937,
      "grad_norm": 0.20354604721069336,
      "learning_rate": 5.7738067549680776e-05,
      "loss": 0.569,
      "step": 6222
    },
    {
      "epoch": 1.2792681673347723,
      "grad_norm": 0.18965789675712585,
      "learning_rate": 5.7728487644434754e-05,
      "loss": 0.5458,
      "step": 6223
    },
    {
      "epoch": 1.279473738308151,
      "grad_norm": 0.18858371675014496,
      "learning_rate": 5.771890711211566e-05,
      "loss": 0.5415,
      "step": 6224
    },
    {
      "epoch": 1.2796793092815295,
      "grad_norm": 0.19351953268051147,
      "learning_rate": 5.7709325953195444e-05,
      "loss": 0.5504,
      "step": 6225
    },
    {
      "epoch": 1.279884880254908,
      "grad_norm": 0.18949908018112183,
      "learning_rate": 5.769974416814615e-05,
      "loss": 0.541,
      "step": 6226
    },
    {
      "epoch": 1.2800904512282867,
      "grad_norm": 0.19526349008083344,
      "learning_rate": 5.769016175743982e-05,
      "loss": 0.5634,
      "step": 6227
    },
    {
      "epoch": 1.280296022201665,
      "grad_norm": 0.17583510279655457,
      "learning_rate": 5.7680578721548524e-05,
      "loss": 0.5462,
      "step": 6228
    },
    {
      "epoch": 1.2805015931750436,
      "grad_norm": 0.1601148396730423,
      "learning_rate": 5.767099506094438e-05,
      "loss": 0.5474,
      "step": 6229
    },
    {
      "epoch": 1.2807071641484222,
      "grad_norm": 0.19925040006637573,
      "learning_rate": 5.766141077609955e-05,
      "loss": 0.5884,
      "step": 6230
    },
    {
      "epoch": 1.2809127351218008,
      "grad_norm": 0.20039363205432892,
      "learning_rate": 5.765182586748619e-05,
      "loss": 0.5624,
      "step": 6231
    },
    {
      "epoch": 1.2811183060951794,
      "grad_norm": 0.19234807789325714,
      "learning_rate": 5.764224033557649e-05,
      "loss": 0.5994,
      "step": 6232
    },
    {
      "epoch": 1.281323877068558,
      "grad_norm": 0.19299016892910004,
      "learning_rate": 5.76326541808427e-05,
      "loss": 0.5786,
      "step": 6233
    },
    {
      "epoch": 1.2815294480419364,
      "grad_norm": 0.2128915637731552,
      "learning_rate": 5.762306740375709e-05,
      "loss": 0.5763,
      "step": 6234
    },
    {
      "epoch": 1.281735019015315,
      "grad_norm": 0.19753651320934296,
      "learning_rate": 5.761348000479194e-05,
      "loss": 0.5565,
      "step": 6235
    },
    {
      "epoch": 1.2819405899886935,
      "grad_norm": 0.19530276954174042,
      "learning_rate": 5.76038919844196e-05,
      "loss": 0.567,
      "step": 6236
    },
    {
      "epoch": 1.2821461609620721,
      "grad_norm": 0.1876569539308548,
      "learning_rate": 5.7594303343112406e-05,
      "loss": 0.5358,
      "step": 6237
    },
    {
      "epoch": 1.2823517319354507,
      "grad_norm": 0.19202187657356262,
      "learning_rate": 5.758471408134276e-05,
      "loss": 0.5589,
      "step": 6238
    },
    {
      "epoch": 1.2825573029088293,
      "grad_norm": 0.2080259472131729,
      "learning_rate": 5.757512419958305e-05,
      "loss": 0.5767,
      "step": 6239
    },
    {
      "epoch": 1.2827628738822079,
      "grad_norm": 0.2008046805858612,
      "learning_rate": 5.756553369830577e-05,
      "loss": 0.5486,
      "step": 6240
    },
    {
      "epoch": 1.2829684448555865,
      "grad_norm": 0.18698541820049286,
      "learning_rate": 5.7555942577983364e-05,
      "loss": 0.5471,
      "step": 6241
    },
    {
      "epoch": 1.283174015828965,
      "grad_norm": 0.19184443354606628,
      "learning_rate": 5.754635083908835e-05,
      "loss": 0.5703,
      "step": 6242
    },
    {
      "epoch": 1.2833795868023434,
      "grad_norm": 0.18551193177700043,
      "learning_rate": 5.753675848209329e-05,
      "loss": 0.5353,
      "step": 6243
    },
    {
      "epoch": 1.283585157775722,
      "grad_norm": 0.17165902256965637,
      "learning_rate": 5.7527165507470705e-05,
      "loss": 0.5094,
      "step": 6244
    },
    {
      "epoch": 1.2837907287491006,
      "grad_norm": 0.16080299019813538,
      "learning_rate": 5.7517571915693255e-05,
      "loss": 0.5797,
      "step": 6245
    },
    {
      "epoch": 1.2839962997224792,
      "grad_norm": 0.16521471738815308,
      "learning_rate": 5.750797770723353e-05,
      "loss": 0.5199,
      "step": 6246
    },
    {
      "epoch": 1.2842018706958578,
      "grad_norm": 0.12971197068691254,
      "learning_rate": 5.749838288256421e-05,
      "loss": 0.5376,
      "step": 6247
    },
    {
      "epoch": 1.2844074416692364,
      "grad_norm": 0.13733793795108795,
      "learning_rate": 5.748878744215799e-05,
      "loss": 0.5266,
      "step": 6248
    },
    {
      "epoch": 1.2846130126426147,
      "grad_norm": 0.1690482795238495,
      "learning_rate": 5.747919138648757e-05,
      "loss": 0.5737,
      "step": 6249
    },
    {
      "epoch": 1.2848185836159933,
      "grad_norm": 0.19658613204956055,
      "learning_rate": 5.746959471602572e-05,
      "loss": 0.5531,
      "step": 6250
    },
    {
      "epoch": 1.285024154589372,
      "grad_norm": 0.1984742283821106,
      "learning_rate": 5.7459997431245236e-05,
      "loss": 0.5877,
      "step": 6251
    },
    {
      "epoch": 1.2852297255627505,
      "grad_norm": 0.1888909637928009,
      "learning_rate": 5.74503995326189e-05,
      "loss": 0.5391,
      "step": 6252
    },
    {
      "epoch": 1.285435296536129,
      "grad_norm": 0.19062168896198273,
      "learning_rate": 5.744080102061958e-05,
      "loss": 0.5662,
      "step": 6253
    },
    {
      "epoch": 1.2856408675095077,
      "grad_norm": 0.1896916627883911,
      "learning_rate": 5.7431201895720146e-05,
      "loss": 0.5658,
      "step": 6254
    },
    {
      "epoch": 1.2858464384828863,
      "grad_norm": 0.19082388281822205,
      "learning_rate": 5.742160215839349e-05,
      "loss": 0.5624,
      "step": 6255
    },
    {
      "epoch": 1.2860520094562649,
      "grad_norm": 0.1924538016319275,
      "learning_rate": 5.741200180911255e-05,
      "loss": 0.5813,
      "step": 6256
    },
    {
      "epoch": 1.2862575804296434,
      "grad_norm": 0.18487077951431274,
      "learning_rate": 5.740240084835031e-05,
      "loss": 0.5528,
      "step": 6257
    },
    {
      "epoch": 1.286463151403022,
      "grad_norm": 0.18869616091251373,
      "learning_rate": 5.7392799276579745e-05,
      "loss": 0.5472,
      "step": 6258
    },
    {
      "epoch": 1.2866687223764004,
      "grad_norm": 0.19108757376670837,
      "learning_rate": 5.738319709427386e-05,
      "loss": 0.5516,
      "step": 6259
    },
    {
      "epoch": 1.286874293349779,
      "grad_norm": 0.18827085196971893,
      "learning_rate": 5.7373594301905764e-05,
      "loss": 0.519,
      "step": 6260
    },
    {
      "epoch": 1.2870798643231576,
      "grad_norm": 0.17874634265899658,
      "learning_rate": 5.736399089994849e-05,
      "loss": 0.5608,
      "step": 6261
    },
    {
      "epoch": 1.2872854352965362,
      "grad_norm": 0.19754135608673096,
      "learning_rate": 5.73543868888752e-05,
      "loss": 0.5846,
      "step": 6262
    },
    {
      "epoch": 1.2874910062699148,
      "grad_norm": 0.16421428322792053,
      "learning_rate": 5.734478226915899e-05,
      "loss": 0.5233,
      "step": 6263
    },
    {
      "epoch": 1.2876965772432931,
      "grad_norm": 0.16342876851558685,
      "learning_rate": 5.733517704127306e-05,
      "loss": 0.5307,
      "step": 6264
    },
    {
      "epoch": 1.2879021482166717,
      "grad_norm": 0.19278982281684875,
      "learning_rate": 5.732557120569061e-05,
      "loss": 0.5424,
      "step": 6265
    },
    {
      "epoch": 1.2881077191900503,
      "grad_norm": 0.18997056782245636,
      "learning_rate": 5.731596476288488e-05,
      "loss": 0.5628,
      "step": 6266
    },
    {
      "epoch": 1.2883132901634289,
      "grad_norm": 0.19608962535858154,
      "learning_rate": 5.730635771332912e-05,
      "loss": 0.546,
      "step": 6267
    },
    {
      "epoch": 1.2885188611368075,
      "grad_norm": 0.18659254908561707,
      "learning_rate": 5.729675005749666e-05,
      "loss": 0.5634,
      "step": 6268
    },
    {
      "epoch": 1.288724432110186,
      "grad_norm": 0.1904764473438263,
      "learning_rate": 5.7287141795860774e-05,
      "loss": 0.5523,
      "step": 6269
    },
    {
      "epoch": 1.2889300030835646,
      "grad_norm": 0.685501754283905,
      "learning_rate": 5.727753292889485e-05,
      "loss": 0.5588,
      "step": 6270
    },
    {
      "epoch": 1.2891355740569432,
      "grad_norm": 0.19180195033550262,
      "learning_rate": 5.726792345707227e-05,
      "loss": 0.552,
      "step": 6271
    },
    {
      "epoch": 1.2893411450303218,
      "grad_norm": 0.18611235916614532,
      "learning_rate": 5.7258313380866436e-05,
      "loss": 0.5342,
      "step": 6272
    },
    {
      "epoch": 1.2895467160037004,
      "grad_norm": 0.1877206414937973,
      "learning_rate": 5.7248702700750796e-05,
      "loss": 0.5512,
      "step": 6273
    },
    {
      "epoch": 1.2897522869770788,
      "grad_norm": 0.19219855964183807,
      "learning_rate": 5.723909141719883e-05,
      "loss": 0.5525,
      "step": 6274
    },
    {
      "epoch": 1.2899578579504574,
      "grad_norm": 0.1869809925556183,
      "learning_rate": 5.722947953068403e-05,
      "loss": 0.541,
      "step": 6275
    },
    {
      "epoch": 1.290163428923836,
      "grad_norm": 0.19108881056308746,
      "learning_rate": 5.721986704167994e-05,
      "loss": 0.5669,
      "step": 6276
    },
    {
      "epoch": 1.2903689998972145,
      "grad_norm": 0.1971481740474701,
      "learning_rate": 5.72102539506601e-05,
      "loss": 0.5596,
      "step": 6277
    },
    {
      "epoch": 1.2905745708705931,
      "grad_norm": 0.24877598881721497,
      "learning_rate": 5.7200640258098134e-05,
      "loss": 0.5511,
      "step": 6278
    },
    {
      "epoch": 1.2907801418439715,
      "grad_norm": 0.16880907118320465,
      "learning_rate": 5.719102596446765e-05,
      "loss": 0.5211,
      "step": 6279
    },
    {
      "epoch": 1.29098571281735,
      "grad_norm": 0.16007640957832336,
      "learning_rate": 5.718141107024229e-05,
      "loss": 0.5402,
      "step": 6280
    },
    {
      "epoch": 1.2911912837907287,
      "grad_norm": 0.1952618956565857,
      "learning_rate": 5.717179557589574e-05,
      "loss": 0.5729,
      "step": 6281
    },
    {
      "epoch": 1.2913968547641073,
      "grad_norm": 0.16671602427959442,
      "learning_rate": 5.7162179481901725e-05,
      "loss": 0.5312,
      "step": 6282
    },
    {
      "epoch": 1.2916024257374858,
      "grad_norm": 0.15948770940303802,
      "learning_rate": 5.7152562788733975e-05,
      "loss": 0.5243,
      "step": 6283
    },
    {
      "epoch": 1.2918079967108644,
      "grad_norm": 0.1951056569814682,
      "learning_rate": 5.7142945496866235e-05,
      "loss": 0.5665,
      "step": 6284
    },
    {
      "epoch": 1.292013567684243,
      "grad_norm": 0.1952039748430252,
      "learning_rate": 5.713332760677234e-05,
      "loss": 0.5717,
      "step": 6285
    },
    {
      "epoch": 1.2922191386576216,
      "grad_norm": 0.1987905502319336,
      "learning_rate": 5.7123709118926104e-05,
      "loss": 0.567,
      "step": 6286
    },
    {
      "epoch": 1.2924247096310002,
      "grad_norm": 0.19743449985980988,
      "learning_rate": 5.711409003380138e-05,
      "loss": 0.5466,
      "step": 6287
    },
    {
      "epoch": 1.2926302806043788,
      "grad_norm": 0.19229763746261597,
      "learning_rate": 5.710447035187206e-05,
      "loss": 0.5583,
      "step": 6288
    },
    {
      "epoch": 1.2928358515777572,
      "grad_norm": 0.18883401155471802,
      "learning_rate": 5.709485007361208e-05,
      "loss": 0.54,
      "step": 6289
    },
    {
      "epoch": 1.2930414225511357,
      "grad_norm": 0.19647282361984253,
      "learning_rate": 5.708522919949536e-05,
      "loss": 0.583,
      "step": 6290
    },
    {
      "epoch": 1.2932469935245143,
      "grad_norm": 0.18365654349327087,
      "learning_rate": 5.707560772999587e-05,
      "loss": 0.5476,
      "step": 6291
    },
    {
      "epoch": 1.293452564497893,
      "grad_norm": 0.19475975632667542,
      "learning_rate": 5.7065985665587646e-05,
      "loss": 0.5476,
      "step": 6292
    },
    {
      "epoch": 1.2936581354712715,
      "grad_norm": 0.18907500803470612,
      "learning_rate": 5.70563630067447e-05,
      "loss": 0.5483,
      "step": 6293
    },
    {
      "epoch": 1.29386370644465,
      "grad_norm": 0.189442440867424,
      "learning_rate": 5.704673975394109e-05,
      "loss": 0.5387,
      "step": 6294
    },
    {
      "epoch": 1.2940692774180285,
      "grad_norm": 0.19112446904182434,
      "learning_rate": 5.703711590765093e-05,
      "loss": 0.5714,
      "step": 6295
    },
    {
      "epoch": 1.294274848391407,
      "grad_norm": 0.19194044172763824,
      "learning_rate": 5.7027491468348326e-05,
      "loss": 0.5521,
      "step": 6296
    },
    {
      "epoch": 1.2944804193647856,
      "grad_norm": 0.18977665901184082,
      "learning_rate": 5.7017866436507434e-05,
      "loss": 0.5738,
      "step": 6297
    },
    {
      "epoch": 1.2946859903381642,
      "grad_norm": 0.19306746125221252,
      "learning_rate": 5.700824081260243e-05,
      "loss": 0.5636,
      "step": 6298
    },
    {
      "epoch": 1.2948915613115428,
      "grad_norm": 0.19150002300739288,
      "learning_rate": 5.699861459710753e-05,
      "loss": 0.5506,
      "step": 6299
    },
    {
      "epoch": 1.2950971322849214,
      "grad_norm": 0.211594358086586,
      "learning_rate": 5.698898779049697e-05,
      "loss": 0.5631,
      "step": 6300
    },
    {
      "epoch": 1.2953027032583,
      "grad_norm": 0.19325849413871765,
      "learning_rate": 5.697936039324502e-05,
      "loss": 0.5571,
      "step": 6301
    },
    {
      "epoch": 1.2955082742316786,
      "grad_norm": 0.1876952350139618,
      "learning_rate": 5.696973240582597e-05,
      "loss": 0.5579,
      "step": 6302
    },
    {
      "epoch": 1.2957138452050572,
      "grad_norm": 0.16953028738498688,
      "learning_rate": 5.6960103828714164e-05,
      "loss": 0.5279,
      "step": 6303
    },
    {
      "epoch": 1.2959194161784355,
      "grad_norm": 0.16833354532718658,
      "learning_rate": 5.695047466238393e-05,
      "loss": 0.5394,
      "step": 6304
    },
    {
      "epoch": 1.2961249871518141,
      "grad_norm": 0.16338950395584106,
      "learning_rate": 5.694084490730967e-05,
      "loss": 0.5196,
      "step": 6305
    },
    {
      "epoch": 1.2963305581251927,
      "grad_norm": 0.16173096001148224,
      "learning_rate": 5.6931214563965805e-05,
      "loss": 0.5538,
      "step": 6306
    },
    {
      "epoch": 1.2965361290985713,
      "grad_norm": 0.19378416240215302,
      "learning_rate": 5.692158363282675e-05,
      "loss": 0.5448,
      "step": 6307
    },
    {
      "epoch": 1.2967417000719499,
      "grad_norm": 0.18964388966560364,
      "learning_rate": 5.691195211436699e-05,
      "loss": 0.5423,
      "step": 6308
    },
    {
      "epoch": 1.2969472710453285,
      "grad_norm": 0.18687476217746735,
      "learning_rate": 5.690232000906103e-05,
      "loss": 0.5643,
      "step": 6309
    },
    {
      "epoch": 1.2971528420187068,
      "grad_norm": 0.1913549154996872,
      "learning_rate": 5.689268731738339e-05,
      "loss": 0.554,
      "step": 6310
    },
    {
      "epoch": 1.2973584129920854,
      "grad_norm": 0.19576480984687805,
      "learning_rate": 5.688305403980863e-05,
      "loss": 0.5846,
      "step": 6311
    },
    {
      "epoch": 1.297563983965464,
      "grad_norm": 0.2015174776315689,
      "learning_rate": 5.687342017681135e-05,
      "loss": 0.5571,
      "step": 6312
    },
    {
      "epoch": 1.2977695549388426,
      "grad_norm": 0.1950497329235077,
      "learning_rate": 5.6863785728866154e-05,
      "loss": 0.5471,
      "step": 6313
    },
    {
      "epoch": 1.2979751259122212,
      "grad_norm": 0.19457519054412842,
      "learning_rate": 5.6854150696447686e-05,
      "loss": 0.5689,
      "step": 6314
    },
    {
      "epoch": 1.2981806968855998,
      "grad_norm": 0.18924319744110107,
      "learning_rate": 5.684451508003061e-05,
      "loss": 0.5632,
      "step": 6315
    },
    {
      "epoch": 1.2983862678589784,
      "grad_norm": 0.20829612016677856,
      "learning_rate": 5.6834878880089635e-05,
      "loss": 0.554,
      "step": 6316
    },
    {
      "epoch": 1.298591838832357,
      "grad_norm": 0.19046112895011902,
      "learning_rate": 5.6825242097099514e-05,
      "loss": 0.5508,
      "step": 6317
    },
    {
      "epoch": 1.2987974098057355,
      "grad_norm": 0.19234079122543335,
      "learning_rate": 5.681560473153495e-05,
      "loss": 0.5417,
      "step": 6318
    },
    {
      "epoch": 1.299002980779114,
      "grad_norm": 0.19579647481441498,
      "learning_rate": 5.68059667838708e-05,
      "loss": 0.5449,
      "step": 6319
    },
    {
      "epoch": 1.2992085517524925,
      "grad_norm": 0.19146116077899933,
      "learning_rate": 5.679632825458184e-05,
      "loss": 0.5603,
      "step": 6320
    },
    {
      "epoch": 1.299414122725871,
      "grad_norm": 0.19622944295406342,
      "learning_rate": 5.6786689144142917e-05,
      "loss": 0.5568,
      "step": 6321
    },
    {
      "epoch": 1.2996196936992497,
      "grad_norm": 0.19650766253471375,
      "learning_rate": 5.6777049453028914e-05,
      "loss": 0.5603,
      "step": 6322
    },
    {
      "epoch": 1.2998252646726283,
      "grad_norm": 0.20279136300086975,
      "learning_rate": 5.676740918171472e-05,
      "loss": 0.5455,
      "step": 6323
    },
    {
      "epoch": 1.3000308356460069,
      "grad_norm": 0.1786477267742157,
      "learning_rate": 5.67577683306753e-05,
      "loss": 0.5148,
      "step": 6324
    },
    {
      "epoch": 1.3002364066193852,
      "grad_norm": 0.15858376026153564,
      "learning_rate": 5.674812690038557e-05,
      "loss": 0.5217,
      "step": 6325
    },
    {
      "epoch": 1.3004419775927638,
      "grad_norm": 0.16333921253681183,
      "learning_rate": 5.673848489132054e-05,
      "loss": 0.5504,
      "step": 6326
    },
    {
      "epoch": 1.3006475485661424,
      "grad_norm": 0.20864447951316833,
      "learning_rate": 5.672884230395524e-05,
      "loss": 0.5664,
      "step": 6327
    },
    {
      "epoch": 1.300853119539521,
      "grad_norm": 0.20059353113174438,
      "learning_rate": 5.6719199138764686e-05,
      "loss": 0.575,
      "step": 6328
    },
    {
      "epoch": 1.3010586905128996,
      "grad_norm": 0.1858949512243271,
      "learning_rate": 5.670955539622396e-05,
      "loss": 0.535,
      "step": 6329
    },
    {
      "epoch": 1.3012642614862782,
      "grad_norm": 0.1687631458044052,
      "learning_rate": 5.669991107680818e-05,
      "loss": 0.54,
      "step": 6330
    },
    {
      "epoch": 1.3014698324596568,
      "grad_norm": 0.16431094706058502,
      "learning_rate": 5.6690266180992464e-05,
      "loss": 0.5506,
      "step": 6331
    },
    {
      "epoch": 1.3016754034330353,
      "grad_norm": 0.21161231398582458,
      "learning_rate": 5.668062070925197e-05,
      "loss": 0.5579,
      "step": 6332
    },
    {
      "epoch": 1.301880974406414,
      "grad_norm": 0.20481392741203308,
      "learning_rate": 5.66709746620619e-05,
      "loss": 0.5693,
      "step": 6333
    },
    {
      "epoch": 1.3020865453797923,
      "grad_norm": 0.2095717191696167,
      "learning_rate": 5.6661328039897456e-05,
      "loss": 0.5543,
      "step": 6334
    },
    {
      "epoch": 1.3022921163531709,
      "grad_norm": 0.17169706523418427,
      "learning_rate": 5.665168084323387e-05,
      "loss": 0.513,
      "step": 6335
    },
    {
      "epoch": 1.3024976873265495,
      "grad_norm": 0.184236079454422,
      "learning_rate": 5.664203307254644e-05,
      "loss": 0.5606,
      "step": 6336
    },
    {
      "epoch": 1.302703258299928,
      "grad_norm": 0.210636168718338,
      "learning_rate": 5.6632384728310464e-05,
      "loss": 0.5587,
      "step": 6337
    },
    {
      "epoch": 1.3029088292733066,
      "grad_norm": 0.20916485786437988,
      "learning_rate": 5.6622735811001255e-05,
      "loss": 0.5563,
      "step": 6338
    },
    {
      "epoch": 1.3031144002466852,
      "grad_norm": 0.19716860353946686,
      "learning_rate": 5.6613086321094175e-05,
      "loss": 0.5461,
      "step": 6339
    },
    {
      "epoch": 1.3033199712200636,
      "grad_norm": 0.20383410155773163,
      "learning_rate": 5.660343625906461e-05,
      "loss": 0.5711,
      "step": 6340
    },
    {
      "epoch": 1.3035255421934422,
      "grad_norm": 0.19553574919700623,
      "learning_rate": 5.6593785625387965e-05,
      "loss": 0.5719,
      "step": 6341
    },
    {
      "epoch": 1.3037311131668208,
      "grad_norm": 0.20345737040042877,
      "learning_rate": 5.65841344205397e-05,
      "loss": 0.5902,
      "step": 6342
    },
    {
      "epoch": 1.3039366841401994,
      "grad_norm": 0.1968560367822647,
      "learning_rate": 5.657448264499528e-05,
      "loss": 0.5552,
      "step": 6343
    },
    {
      "epoch": 1.304142255113578,
      "grad_norm": 0.19714896380901337,
      "learning_rate": 5.6564830299230204e-05,
      "loss": 0.5477,
      "step": 6344
    },
    {
      "epoch": 1.3043478260869565,
      "grad_norm": 0.2418747991323471,
      "learning_rate": 5.6555177383719986e-05,
      "loss": 0.5675,
      "step": 6345
    },
    {
      "epoch": 1.3045533970603351,
      "grad_norm": 0.16260170936584473,
      "learning_rate": 5.654552389894019e-05,
      "loss": 0.5324,
      "step": 6346
    },
    {
      "epoch": 1.3047589680337137,
      "grad_norm": 0.15336725115776062,
      "learning_rate": 5.653586984536639e-05,
      "loss": 0.5376,
      "step": 6347
    },
    {
      "epoch": 1.3049645390070923,
      "grad_norm": 0.13179324567317963,
      "learning_rate": 5.652621522347421e-05,
      "loss": 0.5133,
      "step": 6348
    },
    {
      "epoch": 1.305170109980471,
      "grad_norm": 0.16065613925457,
      "learning_rate": 5.651656003373927e-05,
      "loss": 0.5376,
      "step": 6349
    },
    {
      "epoch": 1.3053756809538493,
      "grad_norm": 0.20791570842266083,
      "learning_rate": 5.650690427663725e-05,
      "loss": 0.5707,
      "step": 6350
    },
    {
      "epoch": 1.3055812519272278,
      "grad_norm": 0.19432078301906586,
      "learning_rate": 5.649724795264384e-05,
      "loss": 0.5642,
      "step": 6351
    },
    {
      "epoch": 1.3057868229006064,
      "grad_norm": 0.19507555663585663,
      "learning_rate": 5.6487591062234756e-05,
      "loss": 0.5484,
      "step": 6352
    },
    {
      "epoch": 1.305992393873985,
      "grad_norm": 0.18937799334526062,
      "learning_rate": 5.647793360588575e-05,
      "loss": 0.5504,
      "step": 6353
    },
    {
      "epoch": 1.3061979648473636,
      "grad_norm": 0.18545973300933838,
      "learning_rate": 5.646827558407261e-05,
      "loss": 0.5353,
      "step": 6354
    },
    {
      "epoch": 1.306403535820742,
      "grad_norm": 0.210302472114563,
      "learning_rate": 5.645861699727114e-05,
      "loss": 0.5373,
      "step": 6355
    },
    {
      "epoch": 1.3066091067941206,
      "grad_norm": 0.20394356548786163,
      "learning_rate": 5.644895784595715e-05,
      "loss": 0.5707,
      "step": 6356
    },
    {
      "epoch": 1.3068146777674992,
      "grad_norm": 0.20221911370754242,
      "learning_rate": 5.6439298130606546e-05,
      "loss": 0.5635,
      "step": 6357
    },
    {
      "epoch": 1.3070202487408777,
      "grad_norm": 0.20493952929973602,
      "learning_rate": 5.642963785169518e-05,
      "loss": 0.5635,
      "step": 6358
    },
    {
      "epoch": 1.3072258197142563,
      "grad_norm": 0.2118876874446869,
      "learning_rate": 5.641997700969898e-05,
      "loss": 0.5578,
      "step": 6359
    },
    {
      "epoch": 1.307431390687635,
      "grad_norm": 0.1980256587266922,
      "learning_rate": 5.6410315605093875e-05,
      "loss": 0.5551,
      "step": 6360
    },
    {
      "epoch": 1.3076369616610135,
      "grad_norm": 0.20084832608699799,
      "learning_rate": 5.640065363835586e-05,
      "loss": 0.569,
      "step": 6361
    },
    {
      "epoch": 1.307842532634392,
      "grad_norm": 0.1686294972896576,
      "learning_rate": 5.639099110996092e-05,
      "loss": 0.5371,
      "step": 6362
    },
    {
      "epoch": 1.3080481036077707,
      "grad_norm": 0.15857572853565216,
      "learning_rate": 5.63813280203851e-05,
      "loss": 0.5402,
      "step": 6363
    },
    {
      "epoch": 1.3082536745811493,
      "grad_norm": 0.15745136141777039,
      "learning_rate": 5.6371664370104435e-05,
      "loss": 0.5196,
      "step": 6364
    },
    {
      "epoch": 1.3084592455545276,
      "grad_norm": 0.15688499808311462,
      "learning_rate": 5.6362000159595034e-05,
      "loss": 0.5361,
      "step": 6365
    },
    {
      "epoch": 1.3086648165279062,
      "grad_norm": 0.18788595497608185,
      "learning_rate": 5.635233538933298e-05,
      "loss": 0.551,
      "step": 6366
    },
    {
      "epoch": 1.3088703875012848,
      "grad_norm": 0.19345730543136597,
      "learning_rate": 5.634267005979442e-05,
      "loss": 0.5762,
      "step": 6367
    },
    {
      "epoch": 1.3090759584746634,
      "grad_norm": 0.1903630942106247,
      "learning_rate": 5.633300417145553e-05,
      "loss": 0.5489,
      "step": 6368
    },
    {
      "epoch": 1.309281529448042,
      "grad_norm": 0.19679617881774902,
      "learning_rate": 5.632333772479249e-05,
      "loss": 0.5641,
      "step": 6369
    },
    {
      "epoch": 1.3094871004214204,
      "grad_norm": 0.19722123444080353,
      "learning_rate": 5.631367072028152e-05,
      "loss": 0.5428,
      "step": 6370
    },
    {
      "epoch": 1.309692671394799,
      "grad_norm": 0.19673387706279755,
      "learning_rate": 5.630400315839888e-05,
      "loss": 0.5763,
      "step": 6371
    },
    {
      "epoch": 1.3098982423681775,
      "grad_norm": 0.19249959290027618,
      "learning_rate": 5.629433503962084e-05,
      "loss": 0.5687,
      "step": 6372
    },
    {
      "epoch": 1.3101038133415561,
      "grad_norm": 0.18873926997184753,
      "learning_rate": 5.6284666364423695e-05,
      "loss": 0.557,
      "step": 6373
    },
    {
      "epoch": 1.3103093843149347,
      "grad_norm": 0.2006826251745224,
      "learning_rate": 5.627499713328378e-05,
      "loss": 0.549,
      "step": 6374
    },
    {
      "epoch": 1.3105149552883133,
      "grad_norm": 0.15970605611801147,
      "learning_rate": 5.6265327346677465e-05,
      "loss": 0.5264,
      "step": 6375
    },
    {
      "epoch": 1.3107205262616919,
      "grad_norm": 0.16438056528568268,
      "learning_rate": 5.6255657005081134e-05,
      "loss": 0.5647,
      "step": 6376
    },
    {
      "epoch": 1.3109260972350705,
      "grad_norm": 0.19391551613807678,
      "learning_rate": 5.624598610897117e-05,
      "loss": 0.5691,
      "step": 6377
    },
    {
      "epoch": 1.311131668208449,
      "grad_norm": 0.19656315445899963,
      "learning_rate": 5.623631465882405e-05,
      "loss": 0.5626,
      "step": 6378
    },
    {
      "epoch": 1.3113372391818277,
      "grad_norm": 0.18690890073776245,
      "learning_rate": 5.622664265511623e-05,
      "loss": 0.5395,
      "step": 6379
    },
    {
      "epoch": 1.311542810155206,
      "grad_norm": 0.19605736434459686,
      "learning_rate": 5.621697009832418e-05,
      "loss": 0.5796,
      "step": 6380
    },
    {
      "epoch": 1.3117483811285846,
      "grad_norm": 0.19763530790805817,
      "learning_rate": 5.620729698892445e-05,
      "loss": 0.5447,
      "step": 6381
    },
    {
      "epoch": 1.3119539521019632,
      "grad_norm": 0.18934392929077148,
      "learning_rate": 5.6197623327393584e-05,
      "loss": 0.575,
      "step": 6382
    },
    {
      "epoch": 1.3121595230753418,
      "grad_norm": 0.19040028750896454,
      "learning_rate": 5.6187949114208155e-05,
      "loss": 0.5448,
      "step": 6383
    },
    {
      "epoch": 1.3123650940487204,
      "grad_norm": 0.20778769254684448,
      "learning_rate": 5.6178274349844766e-05,
      "loss": 0.5336,
      "step": 6384
    },
    {
      "epoch": 1.312570665022099,
      "grad_norm": 0.18825723230838776,
      "learning_rate": 5.6168599034780034e-05,
      "loss": 0.5409,
      "step": 6385
    },
    {
      "epoch": 1.3127762359954773,
      "grad_norm": 0.1885683834552765,
      "learning_rate": 5.615892316949064e-05,
      "loss": 0.5617,
      "step": 6386
    },
    {
      "epoch": 1.312981806968856,
      "grad_norm": 0.16970692574977875,
      "learning_rate": 5.614924675445325e-05,
      "loss": 0.5322,
      "step": 6387
    },
    {
      "epoch": 1.3131873779422345,
      "grad_norm": 0.1596226543188095,
      "learning_rate": 5.613956979014459e-05,
      "loss": 0.5696,
      "step": 6388
    },
    {
      "epoch": 1.313392948915613,
      "grad_norm": 0.18783892691135406,
      "learning_rate": 5.61298922770414e-05,
      "loss": 0.5507,
      "step": 6389
    },
    {
      "epoch": 1.3135985198889917,
      "grad_norm": 0.2017127424478531,
      "learning_rate": 5.612021421562043e-05,
      "loss": 0.5858,
      "step": 6390
    },
    {
      "epoch": 1.3138040908623703,
      "grad_norm": 0.1910979151725769,
      "learning_rate": 5.611053560635848e-05,
      "loss": 0.5607,
      "step": 6391
    },
    {
      "epoch": 1.3140096618357489,
      "grad_norm": 0.2119234949350357,
      "learning_rate": 5.6100856449732384e-05,
      "loss": 0.5665,
      "step": 6392
    },
    {
      "epoch": 1.3142152328091274,
      "grad_norm": 0.19099730253219604,
      "learning_rate": 5.609117674621896e-05,
      "loss": 0.5601,
      "step": 6393
    },
    {
      "epoch": 1.314420803782506,
      "grad_norm": 0.18972419202327728,
      "learning_rate": 5.60814964962951e-05,
      "loss": 0.5419,
      "step": 6394
    },
    {
      "epoch": 1.3146263747558844,
      "grad_norm": 0.15883517265319824,
      "learning_rate": 5.6071815700437716e-05,
      "loss": 0.5145,
      "step": 6395
    },
    {
      "epoch": 1.314831945729263,
      "grad_norm": 0.1622246950864792,
      "learning_rate": 5.606213435912371e-05,
      "loss": 0.5542,
      "step": 6396
    },
    {
      "epoch": 1.3150375167026416,
      "grad_norm": 0.20873090624809265,
      "learning_rate": 5.605245247283005e-05,
      "loss": 0.5812,
      "step": 6397
    },
    {
      "epoch": 1.3152430876760202,
      "grad_norm": 0.1877153068780899,
      "learning_rate": 5.604277004203371e-05,
      "loss": 0.5479,
      "step": 6398
    },
    {
      "epoch": 1.3154486586493987,
      "grad_norm": 0.19027303159236908,
      "learning_rate": 5.6033087067211714e-05,
      "loss": 0.5552,
      "step": 6399
    },
    {
      "epoch": 1.3156542296227773,
      "grad_norm": 0.19082914292812347,
      "learning_rate": 5.602340354884108e-05,
      "loss": 0.5544,
      "step": 6400
    },
    {
      "epoch": 1.3158598005961557,
      "grad_norm": 0.1900823563337326,
      "learning_rate": 5.601371948739888e-05,
      "loss": 0.5564,
      "step": 6401
    },
    {
      "epoch": 1.3160653715695343,
      "grad_norm": 0.1659982055425644,
      "learning_rate": 5.60040348833622e-05,
      "loss": 0.5338,
      "step": 6402
    },
    {
      "epoch": 1.3162709425429129,
      "grad_norm": 0.16377677023410797,
      "learning_rate": 5.599434973720815e-05,
      "loss": 0.5685,
      "step": 6403
    },
    {
      "epoch": 1.3164765135162915,
      "grad_norm": 0.1914215385913849,
      "learning_rate": 5.5984664049413884e-05,
      "loss": 0.5734,
      "step": 6404
    },
    {
      "epoch": 1.31668208448967,
      "grad_norm": 0.19817842543125153,
      "learning_rate": 5.5974977820456546e-05,
      "loss": 0.5658,
      "step": 6405
    },
    {
      "epoch": 1.3168876554630486,
      "grad_norm": 0.1932641863822937,
      "learning_rate": 5.596529105081336e-05,
      "loss": 0.5597,
      "step": 6406
    },
    {
      "epoch": 1.3170932264364272,
      "grad_norm": 0.18866626918315887,
      "learning_rate": 5.595560374096154e-05,
      "loss": 0.5736,
      "step": 6407
    },
    {
      "epoch": 1.3172987974098058,
      "grad_norm": 0.1907801777124405,
      "learning_rate": 5.594591589137831e-05,
      "loss": 0.575,
      "step": 6408
    },
    {
      "epoch": 1.3175043683831844,
      "grad_norm": 0.18488825857639313,
      "learning_rate": 5.5936227502540984e-05,
      "loss": 0.5658,
      "step": 6409
    },
    {
      "epoch": 1.3177099393565628,
      "grad_norm": 0.18911798298358917,
      "learning_rate": 5.592653857492684e-05,
      "loss": 0.5505,
      "step": 6410
    },
    {
      "epoch": 1.3179155103299414,
      "grad_norm": 0.161835715174675,
      "learning_rate": 5.59168491090132e-05,
      "loss": 0.5313,
      "step": 6411
    },
    {
      "epoch": 1.31812108130332,
      "grad_norm": 0.15991567075252533,
      "learning_rate": 5.590715910527745e-05,
      "loss": 0.5707,
      "step": 6412
    },
    {
      "epoch": 1.3183266522766985,
      "grad_norm": 0.1980849802494049,
      "learning_rate": 5.589746856419694e-05,
      "loss": 0.5339,
      "step": 6413
    },
    {
      "epoch": 1.3185322232500771,
      "grad_norm": 0.1609208732843399,
      "learning_rate": 5.58877774862491e-05,
      "loss": 0.5264,
      "step": 6414
    },
    {
      "epoch": 1.3187377942234557,
      "grad_norm": 0.16349831223487854,
      "learning_rate": 5.587808587191134e-05,
      "loss": 0.5642,
      "step": 6415
    },
    {
      "epoch": 1.318943365196834,
      "grad_norm": 0.1919315755367279,
      "learning_rate": 5.586839372166113e-05,
      "loss": 0.57,
      "step": 6416
    },
    {
      "epoch": 1.3191489361702127,
      "grad_norm": 0.19255201518535614,
      "learning_rate": 5.585870103597596e-05,
      "loss": 0.5692,
      "step": 6417
    },
    {
      "epoch": 1.3193545071435913,
      "grad_norm": 0.1922633796930313,
      "learning_rate": 5.584900781533334e-05,
      "loss": 0.5675,
      "step": 6418
    },
    {
      "epoch": 1.3195600781169698,
      "grad_norm": 0.19982829689979553,
      "learning_rate": 5.5839314060210826e-05,
      "loss": 0.5711,
      "step": 6419
    },
    {
      "epoch": 1.3197656490903484,
      "grad_norm": 0.19519644975662231,
      "learning_rate": 5.582961977108598e-05,
      "loss": 0.5645,
      "step": 6420
    },
    {
      "epoch": 1.319971220063727,
      "grad_norm": 0.19568218290805817,
      "learning_rate": 5.5819924948436374e-05,
      "loss": 0.5638,
      "step": 6421
    },
    {
      "epoch": 1.3201767910371056,
      "grad_norm": 0.1948254555463791,
      "learning_rate": 5.581022959273963e-05,
      "loss": 0.5511,
      "step": 6422
    },
    {
      "epoch": 1.3203823620104842,
      "grad_norm": 0.19327300786972046,
      "learning_rate": 5.580053370447341e-05,
      "loss": 0.523,
      "step": 6423
    },
    {
      "epoch": 1.3205879329838628,
      "grad_norm": 0.19158729910850525,
      "learning_rate": 5.5790837284115365e-05,
      "loss": 0.5628,
      "step": 6424
    },
    {
      "epoch": 1.3207935039572412,
      "grad_norm": 0.2012944519519806,
      "learning_rate": 5.578114033214322e-05,
      "loss": 0.5486,
      "step": 6425
    },
    {
      "epoch": 1.3209990749306197,
      "grad_norm": 0.19401337206363678,
      "learning_rate": 5.577144284903466e-05,
      "loss": 0.569,
      "step": 6426
    },
    {
      "epoch": 1.3212046459039983,
      "grad_norm": 0.19512306153774261,
      "learning_rate": 5.576174483526748e-05,
      "loss": 0.5581,
      "step": 6427
    },
    {
      "epoch": 1.321410216877377,
      "grad_norm": 0.18876834213733673,
      "learning_rate": 5.5752046291319415e-05,
      "loss": 0.5591,
      "step": 6428
    },
    {
      "epoch": 1.3216157878507555,
      "grad_norm": 0.19513283669948578,
      "learning_rate": 5.574234721766829e-05,
      "loss": 0.57,
      "step": 6429
    },
    {
      "epoch": 1.321821358824134,
      "grad_norm": 0.16624127328395844,
      "learning_rate": 5.5732647614791933e-05,
      "loss": 0.5405,
      "step": 6430
    },
    {
      "epoch": 1.3220269297975125,
      "grad_norm": 0.16485817730426788,
      "learning_rate": 5.572294748316818e-05,
      "loss": 0.54,
      "step": 6431
    },
    {
      "epoch": 1.322232500770891,
      "grad_norm": 0.16315220296382904,
      "learning_rate": 5.571324682327493e-05,
      "loss": 0.5326,
      "step": 6432
    },
    {
      "epoch": 1.3224380717442696,
      "grad_norm": 0.17077341675758362,
      "learning_rate": 5.570354563559009e-05,
      "loss": 0.5464,
      "step": 6433
    },
    {
      "epoch": 1.3226436427176482,
      "grad_norm": 0.19310691952705383,
      "learning_rate": 5.569384392059158e-05,
      "loss": 0.5544,
      "step": 6434
    },
    {
      "epoch": 1.3228492136910268,
      "grad_norm": 0.19178032875061035,
      "learning_rate": 5.568414167875736e-05,
      "loss": 0.5595,
      "step": 6435
    },
    {
      "epoch": 1.3230547846644054,
      "grad_norm": 0.19363771378993988,
      "learning_rate": 5.567443891056542e-05,
      "loss": 0.5565,
      "step": 6436
    },
    {
      "epoch": 1.323260355637784,
      "grad_norm": 0.16950379312038422,
      "learning_rate": 5.566473561649376e-05,
      "loss": 0.5465,
      "step": 6437
    },
    {
      "epoch": 1.3234659266111626,
      "grad_norm": 0.15700620412826538,
      "learning_rate": 5.565503179702043e-05,
      "loss": 0.5377,
      "step": 6438
    },
    {
      "epoch": 1.3236714975845412,
      "grad_norm": 0.16397301852703094,
      "learning_rate": 5.564532745262348e-05,
      "loss": 0.536,
      "step": 6439
    },
    {
      "epoch": 1.3238770685579198,
      "grad_norm": 0.20148152112960815,
      "learning_rate": 5.5635622583781e-05,
      "loss": 0.5598,
      "step": 6440
    },
    {
      "epoch": 1.3240826395312981,
      "grad_norm": 0.16813023388385773,
      "learning_rate": 5.562591719097112e-05,
      "loss": 0.5117,
      "step": 6441
    },
    {
      "epoch": 1.3242882105046767,
      "grad_norm": 0.15760543942451477,
      "learning_rate": 5.5616211274671956e-05,
      "loss": 0.5487,
      "step": 6442
    },
    {
      "epoch": 1.3244937814780553,
      "grad_norm": 0.18859198689460754,
      "learning_rate": 5.5606504835361675e-05,
      "loss": 0.5293,
      "step": 6443
    },
    {
      "epoch": 1.3246993524514339,
      "grad_norm": 0.19250252842903137,
      "learning_rate": 5.559679787351849e-05,
      "loss": 0.5722,
      "step": 6444
    },
    {
      "epoch": 1.3249049234248125,
      "grad_norm": 0.1938043236732483,
      "learning_rate": 5.558709038962061e-05,
      "loss": 0.553,
      "step": 6445
    },
    {
      "epoch": 1.3251104943981908,
      "grad_norm": 0.19342714548110962,
      "learning_rate": 5.557738238414624e-05,
      "loss": 0.5467,
      "step": 6446
    },
    {
      "epoch": 1.3253160653715694,
      "grad_norm": 0.20176750421524048,
      "learning_rate": 5.556767385757371e-05,
      "loss": 0.5503,
      "step": 6447
    },
    {
      "epoch": 1.325521636344948,
      "grad_norm": 0.19387808442115784,
      "learning_rate": 5.555796481038127e-05,
      "loss": 0.5651,
      "step": 6448
    },
    {
      "epoch": 1.3257272073183266,
      "grad_norm": 0.17772021889686584,
      "learning_rate": 5.5548255243047236e-05,
      "loss": 0.506,
      "step": 6449
    },
    {
      "epoch": 1.3259327782917052,
      "grad_norm": 0.1652149111032486,
      "learning_rate": 5.553854515604998e-05,
      "loss": 0.5591,
      "step": 6450
    },
    {
      "epoch": 1.3261383492650838,
      "grad_norm": 0.19004401564598083,
      "learning_rate": 5.552883454986786e-05,
      "loss": 0.5616,
      "step": 6451
    },
    {
      "epoch": 1.3263439202384624,
      "grad_norm": 0.1958709955215454,
      "learning_rate": 5.551912342497929e-05,
      "loss": 0.5523,
      "step": 6452
    },
    {
      "epoch": 1.326549491211841,
      "grad_norm": 0.18773847818374634,
      "learning_rate": 5.550941178186265e-05,
      "loss": 0.5625,
      "step": 6453
    },
    {
      "epoch": 1.3267550621852195,
      "grad_norm": 0.16042830049991608,
      "learning_rate": 5.549969962099643e-05,
      "loss": 0.5096,
      "step": 6454
    },
    {
      "epoch": 1.3269606331585981,
      "grad_norm": 0.1585341989994049,
      "learning_rate": 5.548998694285908e-05,
      "loss": 0.5587,
      "step": 6455
    },
    {
      "epoch": 1.3271662041319765,
      "grad_norm": 0.18803685903549194,
      "learning_rate": 5.54802737479291e-05,
      "loss": 0.5649,
      "step": 6456
    },
    {
      "epoch": 1.327371775105355,
      "grad_norm": 0.1625043749809265,
      "learning_rate": 5.5470560036685025e-05,
      "loss": 0.5228,
      "step": 6457
    },
    {
      "epoch": 1.3275773460787337,
      "grad_norm": 0.1575174331665039,
      "learning_rate": 5.54608458096054e-05,
      "loss": 0.5426,
      "step": 6458
    },
    {
      "epoch": 1.3277829170521123,
      "grad_norm": 0.19953930377960205,
      "learning_rate": 5.545113106716877e-05,
      "loss": 0.5559,
      "step": 6459
    },
    {
      "epoch": 1.3279884880254909,
      "grad_norm": 0.2004413902759552,
      "learning_rate": 5.5441415809853786e-05,
      "loss": 0.5624,
      "step": 6460
    },
    {
      "epoch": 1.3281940589988694,
      "grad_norm": 0.18838083744049072,
      "learning_rate": 5.543170003813903e-05,
      "loss": 0.5626,
      "step": 6461
    },
    {
      "epoch": 1.3283996299722478,
      "grad_norm": 0.1713562160730362,
      "learning_rate": 5.542198375250319e-05,
      "loss": 0.5454,
      "step": 6462
    },
    {
      "epoch": 1.3286052009456264,
      "grad_norm": 0.13531114161014557,
      "learning_rate": 5.5412266953424905e-05,
      "loss": 0.5289,
      "step": 6463
    },
    {
      "epoch": 1.328810771919005,
      "grad_norm": 0.16264608502388,
      "learning_rate": 5.540254964138291e-05,
      "loss": 0.5403,
      "step": 6464
    },
    {
      "epoch": 1.3290163428923836,
      "grad_norm": 0.16079317033290863,
      "learning_rate": 5.5392831816855915e-05,
      "loss": 0.5081,
      "step": 6465
    },
    {
      "epoch": 1.3292219138657622,
      "grad_norm": 0.15615412592887878,
      "learning_rate": 5.538311348032266e-05,
      "loss": 0.558,
      "step": 6466
    },
    {
      "epoch": 1.3294274848391407,
      "grad_norm": 0.18808799982070923,
      "learning_rate": 5.5373394632261934e-05,
      "loss": 0.5462,
      "step": 6467
    },
    {
      "epoch": 1.3296330558125193,
      "grad_norm": 0.1914406418800354,
      "learning_rate": 5.536367527315255e-05,
      "loss": 0.5668,
      "step": 6468
    },
    {
      "epoch": 1.329838626785898,
      "grad_norm": 0.27818214893341064,
      "learning_rate": 5.5353955403473325e-05,
      "loss": 0.5524,
      "step": 6469
    },
    {
      "epoch": 1.3300441977592765,
      "grad_norm": 0.19103524088859558,
      "learning_rate": 5.53442350237031e-05,
      "loss": 0.577,
      "step": 6470
    },
    {
      "epoch": 1.3302497687326549,
      "grad_norm": 0.17256119847297668,
      "learning_rate": 5.533451413432077e-05,
      "loss": 0.5307,
      "step": 6471
    },
    {
      "epoch": 1.3304553397060335,
      "grad_norm": 0.1665564626455307,
      "learning_rate": 5.532479273580523e-05,
      "loss": 0.5791,
      "step": 6472
    },
    {
      "epoch": 1.330660910679412,
      "grad_norm": 0.16080975532531738,
      "learning_rate": 5.531507082863542e-05,
      "loss": 0.5073,
      "step": 6473
    },
    {
      "epoch": 1.3308664816527906,
      "grad_norm": 0.16216245293617249,
      "learning_rate": 5.5305348413290264e-05,
      "loss": 0.5609,
      "step": 6474
    },
    {
      "epoch": 1.3310720526261692,
      "grad_norm": 0.16360749304294586,
      "learning_rate": 5.529562549024878e-05,
      "loss": 0.5257,
      "step": 6475
    },
    {
      "epoch": 1.3312776235995478,
      "grad_norm": 0.1617291420698166,
      "learning_rate": 5.528590205998994e-05,
      "loss": 0.5577,
      "step": 6476
    },
    {
      "epoch": 1.3314831945729262,
      "grad_norm": 0.1931338757276535,
      "learning_rate": 5.527617812299278e-05,
      "loss": 0.5589,
      "step": 6477
    },
    {
      "epoch": 1.3316887655463048,
      "grad_norm": 0.18447865545749664,
      "learning_rate": 5.526645367973636e-05,
      "loss": 0.5692,
      "step": 6478
    },
    {
      "epoch": 1.3318943365196834,
      "grad_norm": 0.16455183923244476,
      "learning_rate": 5.525672873069975e-05,
      "loss": 0.5236,
      "step": 6479
    },
    {
      "epoch": 1.332099907493062,
      "grad_norm": 0.15722709894180298,
      "learning_rate": 5.524700327636206e-05,
      "loss": 0.5514,
      "step": 6480
    },
    {
      "epoch": 1.3323054784664405,
      "grad_norm": 0.18713107705116272,
      "learning_rate": 5.5237277317202405e-05,
      "loss": 0.5401,
      "step": 6481
    },
    {
      "epoch": 1.3325110494398191,
      "grad_norm": 0.19015434384346008,
      "learning_rate": 5.522755085369994e-05,
      "loss": 0.5464,
      "step": 6482
    },
    {
      "epoch": 1.3327166204131977,
      "grad_norm": 0.18974623084068298,
      "learning_rate": 5.5217823886333854e-05,
      "loss": 0.5409,
      "step": 6483
    },
    {
      "epoch": 1.3329221913865763,
      "grad_norm": 0.19141395390033722,
      "learning_rate": 5.520809641558334e-05,
      "loss": 0.5512,
      "step": 6484
    },
    {
      "epoch": 1.333127762359955,
      "grad_norm": 0.19724808633327484,
      "learning_rate": 5.519836844192763e-05,
      "loss": 0.5687,
      "step": 6485
    },
    {
      "epoch": 1.3333333333333333,
      "grad_norm": 0.18789160251617432,
      "learning_rate": 5.518863996584599e-05,
      "loss": 0.5373,
      "step": 6486
    },
    {
      "epoch": 1.3335389043067118,
      "grad_norm": 0.198290154337883,
      "learning_rate": 5.517891098781766e-05,
      "loss": 0.5726,
      "step": 6487
    },
    {
      "epoch": 1.3337444752800904,
      "grad_norm": 0.19129502773284912,
      "learning_rate": 5.516918150832197e-05,
      "loss": 0.547,
      "step": 6488
    },
    {
      "epoch": 1.333950046253469,
      "grad_norm": 0.16152769327163696,
      "learning_rate": 5.515945152783824e-05,
      "loss": 0.5004,
      "step": 6489
    },
    {
      "epoch": 1.3341556172268476,
      "grad_norm": 0.1580476313829422,
      "learning_rate": 5.5149721046845824e-05,
      "loss": 0.5455,
      "step": 6490
    },
    {
      "epoch": 1.3343611882002262,
      "grad_norm": 0.190731480717659,
      "learning_rate": 5.513999006582407e-05,
      "loss": 0.566,
      "step": 6491
    },
    {
      "epoch": 1.3345667591736046,
      "grad_norm": 0.1941419392824173,
      "learning_rate": 5.513025858525242e-05,
      "loss": 0.5748,
      "step": 6492
    },
    {
      "epoch": 1.3347723301469832,
      "grad_norm": 0.20120371878147125,
      "learning_rate": 5.512052660561026e-05,
      "loss": 0.5662,
      "step": 6493
    },
    {
      "epoch": 1.3349779011203617,
      "grad_norm": 0.2006073296070099,
      "learning_rate": 5.511079412737706e-05,
      "loss": 0.5741,
      "step": 6494
    },
    {
      "epoch": 1.3351834720937403,
      "grad_norm": 0.18886934220790863,
      "learning_rate": 5.510106115103231e-05,
      "loss": 0.5534,
      "step": 6495
    },
    {
      "epoch": 1.335389043067119,
      "grad_norm": 0.18579721450805664,
      "learning_rate": 5.5091327677055484e-05,
      "loss": 0.5403,
      "step": 6496
    },
    {
      "epoch": 1.3355946140404975,
      "grad_norm": 0.1928054839372635,
      "learning_rate": 5.50815937059261e-05,
      "loss": 0.5666,
      "step": 6497
    },
    {
      "epoch": 1.335800185013876,
      "grad_norm": 0.161499485373497,
      "learning_rate": 5.5071859238123714e-05,
      "loss": 0.5366,
      "step": 6498
    },
    {
      "epoch": 1.3360057559872547,
      "grad_norm": 0.1295616626739502,
      "learning_rate": 5.506212427412791e-05,
      "loss": 0.4916,
      "step": 6499
    },
    {
      "epoch": 1.3362113269606333,
      "grad_norm": 0.15952670574188232,
      "learning_rate": 5.505238881441827e-05,
      "loss": 0.5444,
      "step": 6500
    },
    {
      "epoch": 1.3364168979340116,
      "grad_norm": 0.202559694647789,
      "learning_rate": 5.5042652859474414e-05,
      "loss": 0.5592,
      "step": 6501
    },
    {
      "epoch": 1.3366224689073902,
      "grad_norm": 0.16196085512638092,
      "learning_rate": 5.5032916409776003e-05,
      "loss": 0.5164,
      "step": 6502
    },
    {
      "epoch": 1.3368280398807688,
      "grad_norm": 0.1672007143497467,
      "learning_rate": 5.502317946580268e-05,
      "loss": 0.5319,
      "step": 6503
    },
    {
      "epoch": 1.3370336108541474,
      "grad_norm": 0.16251109540462494,
      "learning_rate": 5.501344202803415e-05,
      "loss": 0.5215,
      "step": 6504
    },
    {
      "epoch": 1.337239181827526,
      "grad_norm": 0.12841519713401794,
      "learning_rate": 5.500370409695014e-05,
      "loss": 0.5087,
      "step": 6505
    },
    {
      "epoch": 1.3374447528009046,
      "grad_norm": 0.16203691065311432,
      "learning_rate": 5.499396567303039e-05,
      "loss": 0.5683,
      "step": 6506
    },
    {
      "epoch": 1.337650323774283,
      "grad_norm": 0.18712860345840454,
      "learning_rate": 5.4984226756754664e-05,
      "loss": 0.5488,
      "step": 6507
    },
    {
      "epoch": 1.3378558947476615,
      "grad_norm": 0.19168932735919952,
      "learning_rate": 5.497448734860274e-05,
      "loss": 0.5639,
      "step": 6508
    },
    {
      "epoch": 1.3380614657210401,
      "grad_norm": 0.18323485553264618,
      "learning_rate": 5.4964747449054464e-05,
      "loss": 0.5504,
      "step": 6509
    },
    {
      "epoch": 1.3382670366944187,
      "grad_norm": 0.16930492222309113,
      "learning_rate": 5.4955007058589646e-05,
      "loss": 0.5296,
      "step": 6510
    },
    {
      "epoch": 1.3384726076677973,
      "grad_norm": 0.16478413343429565,
      "learning_rate": 5.494526617768816e-05,
      "loss": 0.557,
      "step": 6511
    },
    {
      "epoch": 1.3386781786411759,
      "grad_norm": 0.1620486080646515,
      "learning_rate": 5.4935524806829885e-05,
      "loss": 0.5328,
      "step": 6512
    },
    {
      "epoch": 1.3388837496145545,
      "grad_norm": 0.15588897466659546,
      "learning_rate": 5.4925782946494754e-05,
      "loss": 0.5307,
      "step": 6513
    },
    {
      "epoch": 1.339089320587933,
      "grad_norm": 0.16102923452854156,
      "learning_rate": 5.4916040597162677e-05,
      "loss": 0.5318,
      "step": 6514
    },
    {
      "epoch": 1.3392948915613117,
      "grad_norm": 0.13110311329364777,
      "learning_rate": 5.490629775931364e-05,
      "loss": 0.515,
      "step": 6515
    },
    {
      "epoch": 1.3395004625346902,
      "grad_norm": 0.1619655340909958,
      "learning_rate": 5.4896554433427606e-05,
      "loss": 0.5477,
      "step": 6516
    },
    {
      "epoch": 1.3397060335080686,
      "grad_norm": 0.20572912693023682,
      "learning_rate": 5.48868106199846e-05,
      "loss": 0.5571,
      "step": 6517
    },
    {
      "epoch": 1.3399116044814472,
      "grad_norm": 0.156040221452713,
      "learning_rate": 5.487706631946464e-05,
      "loss": 0.5231,
      "step": 6518
    },
    {
      "epoch": 1.3401171754548258,
      "grad_norm": 0.16056253015995026,
      "learning_rate": 5.486732153234778e-05,
      "loss": 0.5529,
      "step": 6519
    },
    {
      "epoch": 1.3403227464282044,
      "grad_norm": 0.19152522087097168,
      "learning_rate": 5.485757625911413e-05,
      "loss": 0.545,
      "step": 6520
    },
    {
      "epoch": 1.340528317401583,
      "grad_norm": 0.185153067111969,
      "learning_rate": 5.484783050024376e-05,
      "loss": 0.5545,
      "step": 6521
    },
    {
      "epoch": 1.3407338883749613,
      "grad_norm": 0.18557578325271606,
      "learning_rate": 5.4838084256216796e-05,
      "loss": 0.5631,
      "step": 6522
    },
    {
      "epoch": 1.34093945934834,
      "grad_norm": 0.1944609433412552,
      "learning_rate": 5.482833752751343e-05,
      "loss": 0.5673,
      "step": 6523
    },
    {
      "epoch": 1.3411450303217185,
      "grad_norm": 0.1916920244693756,
      "learning_rate": 5.4818590314613796e-05,
      "loss": 0.5406,
      "step": 6524
    },
    {
      "epoch": 1.341350601295097,
      "grad_norm": 0.199026957154274,
      "learning_rate": 5.48088426179981e-05,
      "loss": 0.5614,
      "step": 6525
    },
    {
      "epoch": 1.3415561722684757,
      "grad_norm": 0.19180314242839813,
      "learning_rate": 5.479909443814658e-05,
      "loss": 0.5676,
      "step": 6526
    },
    {
      "epoch": 1.3417617432418543,
      "grad_norm": 0.18850663304328918,
      "learning_rate": 5.478934577553949e-05,
      "loss": 0.5644,
      "step": 6527
    },
    {
      "epoch": 1.3419673142152329,
      "grad_norm": 0.19104434549808502,
      "learning_rate": 5.477959663065709e-05,
      "loss": 0.5517,
      "step": 6528
    },
    {
      "epoch": 1.3421728851886114,
      "grad_norm": 0.16571475565433502,
      "learning_rate": 5.476984700397966e-05,
      "loss": 0.5328,
      "step": 6529
    },
    {
      "epoch": 1.34237845616199,
      "grad_norm": 0.1614765077829361,
      "learning_rate": 5.4760096895987535e-05,
      "loss": 0.5574,
      "step": 6530
    },
    {
      "epoch": 1.3425840271353686,
      "grad_norm": 0.18632696568965912,
      "learning_rate": 5.4750346307161064e-05,
      "loss": 0.5605,
      "step": 6531
    },
    {
      "epoch": 1.342789598108747,
      "grad_norm": 0.1589028239250183,
      "learning_rate": 5.474059523798059e-05,
      "loss": 0.5214,
      "step": 6532
    },
    {
      "epoch": 1.3429951690821256,
      "grad_norm": 0.16524967551231384,
      "learning_rate": 5.473084368892653e-05,
      "loss": 0.565,
      "step": 6533
    },
    {
      "epoch": 1.3432007400555042,
      "grad_norm": 0.1631617695093155,
      "learning_rate": 5.4721091660479276e-05,
      "loss": 0.5324,
      "step": 6534
    },
    {
      "epoch": 1.3434063110288827,
      "grad_norm": 0.1608559638261795,
      "learning_rate": 5.471133915311927e-05,
      "loss": 0.5469,
      "step": 6535
    },
    {
      "epoch": 1.3436118820022613,
      "grad_norm": 0.1971094310283661,
      "learning_rate": 5.470158616732698e-05,
      "loss": 0.5692,
      "step": 6536
    },
    {
      "epoch": 1.3438174529756397,
      "grad_norm": 0.19706624746322632,
      "learning_rate": 5.469183270358288e-05,
      "loss": 0.5694,
      "step": 6537
    },
    {
      "epoch": 1.3440230239490183,
      "grad_norm": 0.18402022123336792,
      "learning_rate": 5.468207876236748e-05,
      "loss": 0.5478,
      "step": 6538
    },
    {
      "epoch": 1.3442285949223969,
      "grad_norm": 0.16580908000469208,
      "learning_rate": 5.467232434416132e-05,
      "loss": 0.5444,
      "step": 6539
    },
    {
      "epoch": 1.3444341658957755,
      "grad_norm": 0.1564161777496338,
      "learning_rate": 5.466256944944494e-05,
      "loss": 0.5379,
      "step": 6540
    },
    {
      "epoch": 1.344639736869154,
      "grad_norm": 0.19156378507614136,
      "learning_rate": 5.465281407869894e-05,
      "loss": 0.5479,
      "step": 6541
    },
    {
      "epoch": 1.3448453078425326,
      "grad_norm": 0.18408456444740295,
      "learning_rate": 5.46430582324039e-05,
      "loss": 0.5402,
      "step": 6542
    },
    {
      "epoch": 1.3450508788159112,
      "grad_norm": 0.18590892851352692,
      "learning_rate": 5.463330191104045e-05,
      "loss": 0.5345,
      "step": 6543
    },
    {
      "epoch": 1.3452564497892898,
      "grad_norm": 0.2050226926803589,
      "learning_rate": 5.4623545115089246e-05,
      "loss": 0.5731,
      "step": 6544
    },
    {
      "epoch": 1.3454620207626684,
      "grad_norm": 0.19850295782089233,
      "learning_rate": 5.461378784503095e-05,
      "loss": 0.5583,
      "step": 6545
    },
    {
      "epoch": 1.345667591736047,
      "grad_norm": 0.16567668318748474,
      "learning_rate": 5.4604030101346255e-05,
      "loss": 0.531,
      "step": 6546
    },
    {
      "epoch": 1.3458731627094254,
      "grad_norm": 0.15176017582416534,
      "learning_rate": 5.4594271884515884e-05,
      "loss": 0.5567,
      "step": 6547
    },
    {
      "epoch": 1.346078733682804,
      "grad_norm": 0.19408267736434937,
      "learning_rate": 5.45845131950206e-05,
      "loss": 0.5601,
      "step": 6548
    },
    {
      "epoch": 1.3462843046561825,
      "grad_norm": 0.18972966074943542,
      "learning_rate": 5.457475403334114e-05,
      "loss": 0.574,
      "step": 6549
    },
    {
      "epoch": 1.3464898756295611,
      "grad_norm": 0.19591477513313293,
      "learning_rate": 5.456499439995829e-05,
      "loss": 0.5559,
      "step": 6550
    },
    {
      "epoch": 1.3466954466029397,
      "grad_norm": 0.18834471702575684,
      "learning_rate": 5.455523429535289e-05,
      "loss": 0.5537,
      "step": 6551
    },
    {
      "epoch": 1.3469010175763183,
      "grad_norm": 0.1918981820344925,
      "learning_rate": 5.454547372000575e-05,
      "loss": 0.5594,
      "step": 6552
    },
    {
      "epoch": 1.3471065885496967,
      "grad_norm": 0.16592934727668762,
      "learning_rate": 5.453571267439773e-05,
      "loss": 0.509,
      "step": 6553
    },
    {
      "epoch": 1.3473121595230753,
      "grad_norm": 0.16087022423744202,
      "learning_rate": 5.4525951159009726e-05,
      "loss": 0.5429,
      "step": 6554
    },
    {
      "epoch": 1.3475177304964538,
      "grad_norm": 0.19623617827892303,
      "learning_rate": 5.4516189174322635e-05,
      "loss": 0.5526,
      "step": 6555
    },
    {
      "epoch": 1.3477233014698324,
      "grad_norm": 0.19142059981822968,
      "learning_rate": 5.450642672081737e-05,
      "loss": 0.5726,
      "step": 6556
    },
    {
      "epoch": 1.347928872443211,
      "grad_norm": 0.1905898004770279,
      "learning_rate": 5.44966637989749e-05,
      "loss": 0.556,
      "step": 6557
    },
    {
      "epoch": 1.3481344434165896,
      "grad_norm": 0.19187632203102112,
      "learning_rate": 5.448690040927618e-05,
      "loss": 0.5517,
      "step": 6558
    },
    {
      "epoch": 1.3483400143899682,
      "grad_norm": 0.19854268431663513,
      "learning_rate": 5.447713655220224e-05,
      "loss": 0.5642,
      "step": 6559
    },
    {
      "epoch": 1.3485455853633468,
      "grad_norm": 0.18761958181858063,
      "learning_rate": 5.446737222823405e-05,
      "loss": 0.5595,
      "step": 6560
    },
    {
      "epoch": 1.3487511563367254,
      "grad_norm": 0.19532154500484467,
      "learning_rate": 5.445760743785271e-05,
      "loss": 0.5764,
      "step": 6561
    },
    {
      "epoch": 1.3489567273101037,
      "grad_norm": 0.20847441256046295,
      "learning_rate": 5.444784218153924e-05,
      "loss": 0.5326,
      "step": 6562
    },
    {
      "epoch": 1.3491622982834823,
      "grad_norm": 0.2053038477897644,
      "learning_rate": 5.4438076459774746e-05,
      "loss": 0.54,
      "step": 6563
    },
    {
      "epoch": 1.349367869256861,
      "grad_norm": 0.1965019851922989,
      "learning_rate": 5.4428310273040335e-05,
      "loss": 0.5454,
      "step": 6564
    },
    {
      "epoch": 1.3495734402302395,
      "grad_norm": 0.19706155359745026,
      "learning_rate": 5.4418543621817165e-05,
      "loss": 0.5847,
      "step": 6565
    },
    {
      "epoch": 1.349779011203618,
      "grad_norm": 0.18815022706985474,
      "learning_rate": 5.440877650658636e-05,
      "loss": 0.5541,
      "step": 6566
    },
    {
      "epoch": 1.3499845821769967,
      "grad_norm": 0.16428446769714355,
      "learning_rate": 5.43990089278291e-05,
      "loss": 0.5459,
      "step": 6567
    },
    {
      "epoch": 1.350190153150375,
      "grad_norm": 0.16542398929595947,
      "learning_rate": 5.438924088602662e-05,
      "loss": 0.5646,
      "step": 6568
    },
    {
      "epoch": 1.3503957241237536,
      "grad_norm": 0.15714940428733826,
      "learning_rate": 5.437947238166012e-05,
      "loss": 0.5173,
      "step": 6569
    },
    {
      "epoch": 1.3506012950971322,
      "grad_norm": 0.15711595118045807,
      "learning_rate": 5.436970341521084e-05,
      "loss": 0.5552,
      "step": 6570
    },
    {
      "epoch": 1.3508068660705108,
      "grad_norm": 0.1985914558172226,
      "learning_rate": 5.4359933987160086e-05,
      "loss": 0.5668,
      "step": 6571
    },
    {
      "epoch": 1.3510124370438894,
      "grad_norm": 0.19462761282920837,
      "learning_rate": 5.435016409798913e-05,
      "loss": 0.5585,
      "step": 6572
    },
    {
      "epoch": 1.351218008017268,
      "grad_norm": 0.19194667041301727,
      "learning_rate": 5.434039374817929e-05,
      "loss": 0.5631,
      "step": 6573
    },
    {
      "epoch": 1.3514235789906466,
      "grad_norm": 0.19980405271053314,
      "learning_rate": 5.43306229382119e-05,
      "loss": 0.5535,
      "step": 6574
    },
    {
      "epoch": 1.3516291499640252,
      "grad_norm": 0.193598210811615,
      "learning_rate": 5.432085166856834e-05,
      "loss": 0.5606,
      "step": 6575
    },
    {
      "epoch": 1.3518347209374038,
      "grad_norm": 0.16227704286575317,
      "learning_rate": 5.431107993972999e-05,
      "loss": 0.5169,
      "step": 6576
    },
    {
      "epoch": 1.3520402919107821,
      "grad_norm": 0.16246087849140167,
      "learning_rate": 5.430130775217823e-05,
      "loss": 0.5548,
      "step": 6577
    },
    {
      "epoch": 1.3522458628841607,
      "grad_norm": 0.16693639755249023,
      "learning_rate": 5.4291535106394524e-05,
      "loss": 0.5287,
      "step": 6578
    },
    {
      "epoch": 1.3524514338575393,
      "grad_norm": 0.16185717284679413,
      "learning_rate": 5.4281762002860304e-05,
      "loss": 0.5556,
      "step": 6579
    },
    {
      "epoch": 1.3526570048309179,
      "grad_norm": 0.19650043547153473,
      "learning_rate": 5.427198844205706e-05,
      "loss": 0.5632,
      "step": 6580
    },
    {
      "epoch": 1.3528625758042965,
      "grad_norm": 0.16057594120502472,
      "learning_rate": 5.426221442446627e-05,
      "loss": 0.5163,
      "step": 6581
    },
    {
      "epoch": 1.353068146777675,
      "grad_norm": 0.15515869855880737,
      "learning_rate": 5.425243995056949e-05,
      "loss": 0.5588,
      "step": 6582
    },
    {
      "epoch": 1.3532737177510534,
      "grad_norm": 0.19516292214393616,
      "learning_rate": 5.4242665020848224e-05,
      "loss": 0.5814,
      "step": 6583
    },
    {
      "epoch": 1.353479288724432,
      "grad_norm": 0.1625499576330185,
      "learning_rate": 5.423288963578405e-05,
      "loss": 0.5264,
      "step": 6584
    },
    {
      "epoch": 1.3536848596978106,
      "grad_norm": 0.16830846667289734,
      "learning_rate": 5.422311379585857e-05,
      "loss": 0.5258,
      "step": 6585
    },
    {
      "epoch": 1.3538904306711892,
      "grad_norm": 0.19009056687355042,
      "learning_rate": 5.4213337501553374e-05,
      "loss": 0.5549,
      "step": 6586
    },
    {
      "epoch": 1.3540960016445678,
      "grad_norm": 0.18671362102031708,
      "learning_rate": 5.4203560753350115e-05,
      "loss": 0.5482,
      "step": 6587
    },
    {
      "epoch": 1.3543015726179464,
      "grad_norm": 0.1931658238172531,
      "learning_rate": 5.419378355173042e-05,
      "loss": 0.5665,
      "step": 6588
    },
    {
      "epoch": 1.354507143591325,
      "grad_norm": 0.1925138682126999,
      "learning_rate": 5.4184005897175985e-05,
      "loss": 0.5649,
      "step": 6589
    },
    {
      "epoch": 1.3547127145647035,
      "grad_norm": 0.1919427365064621,
      "learning_rate": 5.41742277901685e-05,
      "loss": 0.5425,
      "step": 6590
    },
    {
      "epoch": 1.3549182855380821,
      "grad_norm": 0.19209784269332886,
      "learning_rate": 5.416444923118968e-05,
      "loss": 0.5561,
      "step": 6591
    },
    {
      "epoch": 1.3551238565114605,
      "grad_norm": 0.17238673567771912,
      "learning_rate": 5.415467022072131e-05,
      "loss": 0.5302,
      "step": 6592
    },
    {
      "epoch": 1.355329427484839,
      "grad_norm": 0.1562458574771881,
      "learning_rate": 5.414489075924512e-05,
      "loss": 0.5435,
      "step": 6593
    },
    {
      "epoch": 1.3555349984582177,
      "grad_norm": 0.19020064175128937,
      "learning_rate": 5.41351108472429e-05,
      "loss": 0.5327,
      "step": 6594
    },
    {
      "epoch": 1.3557405694315963,
      "grad_norm": 0.20159995555877686,
      "learning_rate": 5.412533048519646e-05,
      "loss": 0.5489,
      "step": 6595
    },
    {
      "epoch": 1.3559461404049749,
      "grad_norm": 0.19280879199504852,
      "learning_rate": 5.411554967358765e-05,
      "loss": 0.542,
      "step": 6596
    },
    {
      "epoch": 1.3561517113783534,
      "grad_norm": 0.18953213095664978,
      "learning_rate": 5.410576841289831e-05,
      "loss": 0.5464,
      "step": 6597
    },
    {
      "epoch": 1.3563572823517318,
      "grad_norm": 0.18897344172000885,
      "learning_rate": 5.409598670361032e-05,
      "loss": 0.5427,
      "step": 6598
    },
    {
      "epoch": 1.3565628533251104,
      "grad_norm": 0.20002910494804382,
      "learning_rate": 5.408620454620558e-05,
      "loss": 0.5554,
      "step": 6599
    },
    {
      "epoch": 1.356768424298489,
      "grad_norm": 0.18375547230243683,
      "learning_rate": 5.4076421941166016e-05,
      "loss": 0.555,
      "step": 6600
    },
    {
      "epoch": 1.3569739952718676,
      "grad_norm": 0.17289654910564423,
      "learning_rate": 5.406663888897355e-05,
      "loss": 0.5342,
      "step": 6601
    },
    {
      "epoch": 1.3571795662452462,
      "grad_norm": 0.16519290208816528,
      "learning_rate": 5.405685539011017e-05,
      "loss": 0.5506,
      "step": 6602
    },
    {
      "epoch": 1.3573851372186247,
      "grad_norm": 0.19404758512973785,
      "learning_rate": 5.404707144505786e-05,
      "loss": 0.5703,
      "step": 6603
    },
    {
      "epoch": 1.3575907081920033,
      "grad_norm": 0.1909807026386261,
      "learning_rate": 5.403728705429864e-05,
      "loss": 0.5762,
      "step": 6604
    },
    {
      "epoch": 1.357796279165382,
      "grad_norm": 0.19107364118099213,
      "learning_rate": 5.4027502218314505e-05,
      "loss": 0.5411,
      "step": 6605
    },
    {
      "epoch": 1.3580018501387605,
      "grad_norm": 0.18892939388751984,
      "learning_rate": 5.401771693758754e-05,
      "loss": 0.5456,
      "step": 6606
    },
    {
      "epoch": 1.358207421112139,
      "grad_norm": 0.19617542624473572,
      "learning_rate": 5.400793121259981e-05,
      "loss": 0.5759,
      "step": 6607
    },
    {
      "epoch": 1.3584129920855175,
      "grad_norm": 0.19577234983444214,
      "learning_rate": 5.39981450438334e-05,
      "loss": 0.5668,
      "step": 6608
    },
    {
      "epoch": 1.358618563058896,
      "grad_norm": 0.21422545611858368,
      "learning_rate": 5.3988358431770455e-05,
      "loss": 0.5677,
      "step": 6609
    },
    {
      "epoch": 1.3588241340322746,
      "grad_norm": 0.16092784702777863,
      "learning_rate": 5.397857137689311e-05,
      "loss": 0.5076,
      "step": 6610
    },
    {
      "epoch": 1.3590297050056532,
      "grad_norm": 0.15695548057556152,
      "learning_rate": 5.39687838796835e-05,
      "loss": 0.5357,
      "step": 6611
    },
    {
      "epoch": 1.3592352759790318,
      "grad_norm": 0.20313376188278198,
      "learning_rate": 5.395899594062383e-05,
      "loss": 0.5823,
      "step": 6612
    },
    {
      "epoch": 1.3594408469524102,
      "grad_norm": 0.19227701425552368,
      "learning_rate": 5.3949207560196306e-05,
      "loss": 0.5674,
      "step": 6613
    },
    {
      "epoch": 1.3596464179257888,
      "grad_norm": 0.190741628408432,
      "learning_rate": 5.393941873888316e-05,
      "loss": 0.548,
      "step": 6614
    },
    {
      "epoch": 1.3598519888991674,
      "grad_norm": 0.19307512044906616,
      "learning_rate": 5.3929629477166624e-05,
      "loss": 0.5449,
      "step": 6615
    },
    {
      "epoch": 1.360057559872546,
      "grad_norm": 0.19279111921787262,
      "learning_rate": 5.3919839775529e-05,
      "loss": 0.5505,
      "step": 6616
    },
    {
      "epoch": 1.3602631308459245,
      "grad_norm": 0.1940283179283142,
      "learning_rate": 5.391004963445255e-05,
      "loss": 0.5564,
      "step": 6617
    },
    {
      "epoch": 1.3604687018193031,
      "grad_norm": 0.22000883519649506,
      "learning_rate": 5.39002590544196e-05,
      "loss": 0.5643,
      "step": 6618
    },
    {
      "epoch": 1.3606742727926817,
      "grad_norm": 0.1951514333486557,
      "learning_rate": 5.3890468035912484e-05,
      "loss": 0.5502,
      "step": 6619
    },
    {
      "epoch": 1.3608798437660603,
      "grad_norm": 0.19694966077804565,
      "learning_rate": 5.388067657941357e-05,
      "loss": 0.5609,
      "step": 6620
    },
    {
      "epoch": 1.361085414739439,
      "grad_norm": 0.165736585855484,
      "learning_rate": 5.387088468540522e-05,
      "loss": 0.5275,
      "step": 6621
    },
    {
      "epoch": 1.3612909857128175,
      "grad_norm": 0.1606799215078354,
      "learning_rate": 5.3861092354369843e-05,
      "loss": 0.5503,
      "step": 6622
    },
    {
      "epoch": 1.3614965566861958,
      "grad_norm": 0.1982721984386444,
      "learning_rate": 5.385129958678986e-05,
      "loss": 0.5561,
      "step": 6623
    },
    {
      "epoch": 1.3617021276595744,
      "grad_norm": 0.20562221109867096,
      "learning_rate": 5.384150638314773e-05,
      "loss": 0.5675,
      "step": 6624
    },
    {
      "epoch": 1.361907698632953,
      "grad_norm": 0.19149377942085266,
      "learning_rate": 5.3831712743925905e-05,
      "loss": 0.5675,
      "step": 6625
    },
    {
      "epoch": 1.3621132696063316,
      "grad_norm": 0.19633962213993073,
      "learning_rate": 5.382191866960686e-05,
      "loss": 0.5566,
      "step": 6626
    },
    {
      "epoch": 1.3623188405797102,
      "grad_norm": 0.19432850182056427,
      "learning_rate": 5.381212416067313e-05,
      "loss": 0.5525,
      "step": 6627
    },
    {
      "epoch": 1.3625244115530886,
      "grad_norm": 0.18926875293254852,
      "learning_rate": 5.380232921760723e-05,
      "loss": 0.5573,
      "step": 6628
    },
    {
      "epoch": 1.3627299825264672,
      "grad_norm": 0.16620329022407532,
      "learning_rate": 5.379253384089169e-05,
      "loss": 0.5206,
      "step": 6629
    },
    {
      "epoch": 1.3629355534998457,
      "grad_norm": 0.1583135575056076,
      "learning_rate": 5.378273803100913e-05,
      "loss": 0.5458,
      "step": 6630
    },
    {
      "epoch": 1.3631411244732243,
      "grad_norm": 0.19092857837677002,
      "learning_rate": 5.3772941788442106e-05,
      "loss": 0.5782,
      "step": 6631
    },
    {
      "epoch": 1.363346695446603,
      "grad_norm": 0.19434650242328644,
      "learning_rate": 5.3763145113673234e-05,
      "loss": 0.5743,
      "step": 6632
    },
    {
      "epoch": 1.3635522664199815,
      "grad_norm": 0.19643783569335938,
      "learning_rate": 5.375334800718518e-05,
      "loss": 0.5689,
      "step": 6633
    },
    {
      "epoch": 1.36375783739336,
      "grad_norm": 0.16674213111400604,
      "learning_rate": 5.374355046946057e-05,
      "loss": 0.5268,
      "step": 6634
    },
    {
      "epoch": 1.3639634083667387,
      "grad_norm": 0.16963227093219757,
      "learning_rate": 5.3733752500982095e-05,
      "loss": 0.5625,
      "step": 6635
    },
    {
      "epoch": 1.3641689793401173,
      "grad_norm": 0.18819878995418549,
      "learning_rate": 5.372395410223246e-05,
      "loss": 0.5633,
      "step": 6636
    },
    {
      "epoch": 1.3643745503134959,
      "grad_norm": 0.19265903532505035,
      "learning_rate": 5.371415527369439e-05,
      "loss": 0.5459,
      "step": 6637
    },
    {
      "epoch": 1.3645801212868742,
      "grad_norm": 0.19311292469501495,
      "learning_rate": 5.370435601585061e-05,
      "loss": 0.5648,
      "step": 6638
    },
    {
      "epoch": 1.3647856922602528,
      "grad_norm": 0.19344937801361084,
      "learning_rate": 5.3694556329183904e-05,
      "loss": 0.5701,
      "step": 6639
    },
    {
      "epoch": 1.3649912632336314,
      "grad_norm": 0.24478478729724884,
      "learning_rate": 5.368475621417703e-05,
      "loss": 0.5532,
      "step": 6640
    },
    {
      "epoch": 1.36519683420701,
      "grad_norm": 0.19150310754776,
      "learning_rate": 5.367495567131282e-05,
      "loss": 0.5471,
      "step": 6641
    },
    {
      "epoch": 1.3654024051803886,
      "grad_norm": 0.19197209179401398,
      "learning_rate": 5.3665154701074097e-05,
      "loss": 0.5406,
      "step": 6642
    },
    {
      "epoch": 1.3656079761537672,
      "grad_norm": 0.19130434095859528,
      "learning_rate": 5.365535330394368e-05,
      "loss": 0.5363,
      "step": 6643
    },
    {
      "epoch": 1.3658135471271455,
      "grad_norm": 0.19257521629333496,
      "learning_rate": 5.3645551480404487e-05,
      "loss": 0.5547,
      "step": 6644
    },
    {
      "epoch": 1.3660191181005241,
      "grad_norm": 0.18824981153011322,
      "learning_rate": 5.363574923093936e-05,
      "loss": 0.5723,
      "step": 6645
    },
    {
      "epoch": 1.3662246890739027,
      "grad_norm": 0.19089485704898834,
      "learning_rate": 5.362594655603123e-05,
      "loss": 0.536,
      "step": 6646
    },
    {
      "epoch": 1.3664302600472813,
      "grad_norm": 0.1918558031320572,
      "learning_rate": 5.3616143456163055e-05,
      "loss": 0.5404,
      "step": 6647
    },
    {
      "epoch": 1.3666358310206599,
      "grad_norm": 0.199978306889534,
      "learning_rate": 5.3606339931817756e-05,
      "loss": 0.5633,
      "step": 6648
    },
    {
      "epoch": 1.3668414019940385,
      "grad_norm": 0.1935882270336151,
      "learning_rate": 5.35965359834783e-05,
      "loss": 0.5777,
      "step": 6649
    },
    {
      "epoch": 1.367046972967417,
      "grad_norm": 0.19100281596183777,
      "learning_rate": 5.358673161162771e-05,
      "loss": 0.547,
      "step": 6650
    },
    {
      "epoch": 1.3672525439407957,
      "grad_norm": 0.19073952734470367,
      "learning_rate": 5.357692681674898e-05,
      "loss": 0.5613,
      "step": 6651
    },
    {
      "epoch": 1.3674581149141742,
      "grad_norm": 0.16322961449623108,
      "learning_rate": 5.356712159932516e-05,
      "loss": 0.5327,
      "step": 6652
    },
    {
      "epoch": 1.3676636858875526,
      "grad_norm": 0.1632666438817978,
      "learning_rate": 5.35573159598393e-05,
      "loss": 0.5418,
      "step": 6653
    },
    {
      "epoch": 1.3678692568609312,
      "grad_norm": 0.1909777820110321,
      "learning_rate": 5.3547509898774476e-05,
      "loss": 0.5595,
      "step": 6654
    },
    {
      "epoch": 1.3680748278343098,
      "grad_norm": 0.19034305214881897,
      "learning_rate": 5.353770341661378e-05,
      "loss": 0.5576,
      "step": 6655
    },
    {
      "epoch": 1.3682803988076884,
      "grad_norm": 0.19562803208827972,
      "learning_rate": 5.352789651384036e-05,
      "loss": 0.5549,
      "step": 6656
    },
    {
      "epoch": 1.368485969781067,
      "grad_norm": 0.2044394165277481,
      "learning_rate": 5.351808919093733e-05,
      "loss": 0.5686,
      "step": 6657
    },
    {
      "epoch": 1.3686915407544455,
      "grad_norm": 0.19082361459732056,
      "learning_rate": 5.350828144838786e-05,
      "loss": 0.5626,
      "step": 6658
    },
    {
      "epoch": 1.368897111727824,
      "grad_norm": 0.21942925453186035,
      "learning_rate": 5.349847328667514e-05,
      "loss": 0.583,
      "step": 6659
    },
    {
      "epoch": 1.3691026827012025,
      "grad_norm": 0.19300974905490875,
      "learning_rate": 5.348866470628235e-05,
      "loss": 0.5538,
      "step": 6660
    },
    {
      "epoch": 1.369308253674581,
      "grad_norm": 0.1846531480550766,
      "learning_rate": 5.347885570769273e-05,
      "loss": 0.5331,
      "step": 6661
    },
    {
      "epoch": 1.3695138246479597,
      "grad_norm": 0.19142849743366241,
      "learning_rate": 5.346904629138953e-05,
      "loss": 0.5606,
      "step": 6662
    },
    {
      "epoch": 1.3697193956213383,
      "grad_norm": 0.19237980246543884,
      "learning_rate": 5.3459236457856e-05,
      "loss": 0.5426,
      "step": 6663
    },
    {
      "epoch": 1.3699249665947169,
      "grad_norm": 0.20076246559619904,
      "learning_rate": 5.344942620757541e-05,
      "loss": 0.5676,
      "step": 6664
    },
    {
      "epoch": 1.3701305375680954,
      "grad_norm": 0.193067267537117,
      "learning_rate": 5.34396155410311e-05,
      "loss": 0.557,
      "step": 6665
    },
    {
      "epoch": 1.370336108541474,
      "grad_norm": 0.19357764720916748,
      "learning_rate": 5.342980445870637e-05,
      "loss": 0.5676,
      "step": 6666
    },
    {
      "epoch": 1.3705416795148526,
      "grad_norm": 0.16621150076389313,
      "learning_rate": 5.341999296108457e-05,
      "loss": 0.526,
      "step": 6667
    },
    {
      "epoch": 1.370747250488231,
      "grad_norm": 0.13069656491279602,
      "learning_rate": 5.341018104864909e-05,
      "loss": 0.5275,
      "step": 6668
    },
    {
      "epoch": 1.3709528214616096,
      "grad_norm": 0.16230368614196777,
      "learning_rate": 5.3400368721883284e-05,
      "loss": 0.5518,
      "step": 6669
    },
    {
      "epoch": 1.3711583924349882,
      "grad_norm": 0.2009955644607544,
      "learning_rate": 5.339055598127059e-05,
      "loss": 0.5503,
      "step": 6670
    },
    {
      "epoch": 1.3713639634083667,
      "grad_norm": 0.18965907394886017,
      "learning_rate": 5.33807428272944e-05,
      "loss": 0.566,
      "step": 6671
    },
    {
      "epoch": 1.3715695343817453,
      "grad_norm": 0.1938343495130539,
      "learning_rate": 5.3370929260438196e-05,
      "loss": 0.5559,
      "step": 6672
    },
    {
      "epoch": 1.371775105355124,
      "grad_norm": 0.17998439073562622,
      "learning_rate": 5.336111528118543e-05,
      "loss": 0.515,
      "step": 6673
    },
    {
      "epoch": 1.3719806763285023,
      "grad_norm": 0.16804425418376923,
      "learning_rate": 5.335130089001958e-05,
      "loss": 0.5192,
      "step": 6674
    },
    {
      "epoch": 1.3721862473018809,
      "grad_norm": 0.16128107905387878,
      "learning_rate": 5.3341486087424194e-05,
      "loss": 0.5566,
      "step": 6675
    },
    {
      "epoch": 1.3723918182752595,
      "grad_norm": 0.1895219087600708,
      "learning_rate": 5.333167087388276e-05,
      "loss": 0.5678,
      "step": 6676
    },
    {
      "epoch": 1.372597389248638,
      "grad_norm": 0.18738722801208496,
      "learning_rate": 5.3321855249878845e-05,
      "loss": 0.5647,
      "step": 6677
    },
    {
      "epoch": 1.3728029602220166,
      "grad_norm": 0.19784080982208252,
      "learning_rate": 5.331203921589602e-05,
      "loss": 0.5661,
      "step": 6678
    },
    {
      "epoch": 1.3730085311953952,
      "grad_norm": 0.17455421388149261,
      "learning_rate": 5.3302222772417875e-05,
      "loss": 0.5411,
      "step": 6679
    },
    {
      "epoch": 1.3732141021687738,
      "grad_norm": 0.13297952711582184,
      "learning_rate": 5.329240591992803e-05,
      "loss": 0.532,
      "step": 6680
    },
    {
      "epoch": 1.3734196731421524,
      "grad_norm": 0.16244389116764069,
      "learning_rate": 5.328258865891008e-05,
      "loss": 0.5617,
      "step": 6681
    },
    {
      "epoch": 1.373625244115531,
      "grad_norm": 0.19854487478733063,
      "learning_rate": 5.3272770989847724e-05,
      "loss": 0.5724,
      "step": 6682
    },
    {
      "epoch": 1.3738308150889094,
      "grad_norm": 0.19856125116348267,
      "learning_rate": 5.32629529132246e-05,
      "loss": 0.588,
      "step": 6683
    },
    {
      "epoch": 1.374036386062288,
      "grad_norm": 0.19242699444293976,
      "learning_rate": 5.32531344295244e-05,
      "loss": 0.5468,
      "step": 6684
    },
    {
      "epoch": 1.3742419570356665,
      "grad_norm": 0.19373014569282532,
      "learning_rate": 5.3243315539230844e-05,
      "loss": 0.5487,
      "step": 6685
    },
    {
      "epoch": 1.3744475280090451,
      "grad_norm": 0.19233091175556183,
      "learning_rate": 5.323349624282766e-05,
      "loss": 0.5524,
      "step": 6686
    },
    {
      "epoch": 1.3746530989824237,
      "grad_norm": 0.1918216347694397,
      "learning_rate": 5.32236765407986e-05,
      "loss": 0.5538,
      "step": 6687
    },
    {
      "epoch": 1.3748586699558023,
      "grad_norm": 0.1914103925228119,
      "learning_rate": 5.3213856433627426e-05,
      "loss": 0.5608,
      "step": 6688
    },
    {
      "epoch": 1.3750642409291807,
      "grad_norm": 0.19780538976192474,
      "learning_rate": 5.320403592179795e-05,
      "loss": 0.5701,
      "step": 6689
    },
    {
      "epoch": 1.3752698119025593,
      "grad_norm": 0.19317637383937836,
      "learning_rate": 5.3194215005793964e-05,
      "loss": 0.551,
      "step": 6690
    },
    {
      "epoch": 1.3754753828759378,
      "grad_norm": 0.17101670801639557,
      "learning_rate": 5.31843936860993e-05,
      "loss": 0.5369,
      "step": 6691
    },
    {
      "epoch": 1.3756809538493164,
      "grad_norm": 0.1648482233285904,
      "learning_rate": 5.317457196319782e-05,
      "loss": 0.5706,
      "step": 6692
    },
    {
      "epoch": 1.375886524822695,
      "grad_norm": 0.1978417932987213,
      "learning_rate": 5.3164749837573395e-05,
      "loss": 0.5429,
      "step": 6693
    },
    {
      "epoch": 1.3760920957960736,
      "grad_norm": 0.19628840684890747,
      "learning_rate": 5.31549273097099e-05,
      "loss": 0.5649,
      "step": 6694
    },
    {
      "epoch": 1.3762976667694522,
      "grad_norm": 0.1944446712732315,
      "learning_rate": 5.314510438009125e-05,
      "loss": 0.548,
      "step": 6695
    },
    {
      "epoch": 1.3765032377428308,
      "grad_norm": 0.19895857572555542,
      "learning_rate": 5.313528104920138e-05,
      "loss": 0.5428,
      "step": 6696
    },
    {
      "epoch": 1.3767088087162094,
      "grad_norm": 0.18742914497852325,
      "learning_rate": 5.312545731752423e-05,
      "loss": 0.5525,
      "step": 6697
    },
    {
      "epoch": 1.376914379689588,
      "grad_norm": 0.1647169291973114,
      "learning_rate": 5.311563318554379e-05,
      "loss": 0.5259,
      "step": 6698
    },
    {
      "epoch": 1.3771199506629663,
      "grad_norm": 0.1640775054693222,
      "learning_rate": 5.310580865374401e-05,
      "loss": 0.5602,
      "step": 6699
    },
    {
      "epoch": 1.377325521636345,
      "grad_norm": 0.19247397780418396,
      "learning_rate": 5.309598372260895e-05,
      "loss": 0.5539,
      "step": 6700
    },
    {
      "epoch": 1.3775310926097235,
      "grad_norm": 0.17393262684345245,
      "learning_rate": 5.3086158392622606e-05,
      "loss": 0.5212,
      "step": 6701
    },
    {
      "epoch": 1.377736663583102,
      "grad_norm": 0.17243215441703796,
      "learning_rate": 5.307633266426903e-05,
      "loss": 0.5667,
      "step": 6702
    },
    {
      "epoch": 1.3779422345564807,
      "grad_norm": 0.19524256885051727,
      "learning_rate": 5.3066506538032286e-05,
      "loss": 0.5447,
      "step": 6703
    },
    {
      "epoch": 1.378147805529859,
      "grad_norm": 0.19185814261436462,
      "learning_rate": 5.305668001439647e-05,
      "loss": 0.5564,
      "step": 6704
    },
    {
      "epoch": 1.3783533765032376,
      "grad_norm": 0.19080397486686707,
      "learning_rate": 5.3046853093845694e-05,
      "loss": 0.5545,
      "step": 6705
    },
    {
      "epoch": 1.3785589474766162,
      "grad_norm": 0.20013724267482758,
      "learning_rate": 5.303702577686408e-05,
      "loss": 0.5444,
      "step": 6706
    },
    {
      "epoch": 1.3787645184499948,
      "grad_norm": 0.19205878674983978,
      "learning_rate": 5.302719806393576e-05,
      "loss": 0.5582,
      "step": 6707
    },
    {
      "epoch": 1.3789700894233734,
      "grad_norm": 0.16551436483860016,
      "learning_rate": 5.3017369955544915e-05,
      "loss": 0.5166,
      "step": 6708
    },
    {
      "epoch": 1.379175660396752,
      "grad_norm": 0.15659868717193604,
      "learning_rate": 5.300754145217573e-05,
      "loss": 0.5345,
      "step": 6709
    },
    {
      "epoch": 1.3793812313701306,
      "grad_norm": 0.19091999530792236,
      "learning_rate": 5.299771255431239e-05,
      "loss": 0.5393,
      "step": 6710
    },
    {
      "epoch": 1.3795868023435092,
      "grad_norm": 0.19453977048397064,
      "learning_rate": 5.298788326243915e-05,
      "loss": 0.5471,
      "step": 6711
    },
    {
      "epoch": 1.3797923733168878,
      "grad_norm": 0.18982084095478058,
      "learning_rate": 5.2978053577040225e-05,
      "loss": 0.5482,
      "step": 6712
    },
    {
      "epoch": 1.3799979442902663,
      "grad_norm": 0.20918771624565125,
      "learning_rate": 5.2968223498599895e-05,
      "loss": 0.5698,
      "step": 6713
    },
    {
      "epoch": 1.3802035152636447,
      "grad_norm": 0.20116795599460602,
      "learning_rate": 5.2958393027602444e-05,
      "loss": 0.5605,
      "step": 6714
    },
    {
      "epoch": 1.3804090862370233,
      "grad_norm": 0.18591387569904327,
      "learning_rate": 5.294856216453216e-05,
      "loss": 0.5381,
      "step": 6715
    },
    {
      "epoch": 1.3806146572104019,
      "grad_norm": 0.19346030056476593,
      "learning_rate": 5.293873090987336e-05,
      "loss": 0.565,
      "step": 6716
    },
    {
      "epoch": 1.3808202281837805,
      "grad_norm": 0.18695658445358276,
      "learning_rate": 5.292889926411041e-05,
      "loss": 0.5261,
      "step": 6717
    },
    {
      "epoch": 1.381025799157159,
      "grad_norm": 0.16254091262817383,
      "learning_rate": 5.291906722772765e-05,
      "loss": 0.5208,
      "step": 6718
    },
    {
      "epoch": 1.3812313701305377,
      "grad_norm": 0.15224479138851166,
      "learning_rate": 5.2909234801209445e-05,
      "loss": 0.5667,
      "step": 6719
    },
    {
      "epoch": 1.381436941103916,
      "grad_norm": 0.16312278807163239,
      "learning_rate": 5.2899401985040215e-05,
      "loss": 0.5439,
      "step": 6720
    },
    {
      "epoch": 1.3816425120772946,
      "grad_norm": 0.15921905636787415,
      "learning_rate": 5.288956877970438e-05,
      "loss": 0.5442,
      "step": 6721
    },
    {
      "epoch": 1.3818480830506732,
      "grad_norm": 0.20192372798919678,
      "learning_rate": 5.287973518568635e-05,
      "loss": 0.5779,
      "step": 6722
    },
    {
      "epoch": 1.3820536540240518,
      "grad_norm": 0.16968026757240295,
      "learning_rate": 5.286990120347061e-05,
      "loss": 0.5533,
      "step": 6723
    },
    {
      "epoch": 1.3822592249974304,
      "grad_norm": 0.16610193252563477,
      "learning_rate": 5.2860066833541636e-05,
      "loss": 0.5593,
      "step": 6724
    },
    {
      "epoch": 1.382464795970809,
      "grad_norm": 0.19108933210372925,
      "learning_rate": 5.285023207638389e-05,
      "loss": 0.564,
      "step": 6725
    },
    {
      "epoch": 1.3826703669441875,
      "grad_norm": 0.18995323777198792,
      "learning_rate": 5.28403969324819e-05,
      "loss": 0.5523,
      "step": 6726
    },
    {
      "epoch": 1.3828759379175661,
      "grad_norm": 0.18891942501068115,
      "learning_rate": 5.2830561402320215e-05,
      "loss": 0.5453,
      "step": 6727
    },
    {
      "epoch": 1.3830815088909447,
      "grad_norm": 0.19134697318077087,
      "learning_rate": 5.2820725486383356e-05,
      "loss": 0.565,
      "step": 6728
    },
    {
      "epoch": 1.383287079864323,
      "grad_norm": 0.16310301423072815,
      "learning_rate": 5.28108891851559e-05,
      "loss": 0.549,
      "step": 6729
    },
    {
      "epoch": 1.3834926508377017,
      "grad_norm": 0.13107767701148987,
      "learning_rate": 5.280105249912246e-05,
      "loss": 0.5449,
      "step": 6730
    },
    {
      "epoch": 1.3836982218110803,
      "grad_norm": 0.12225886434316635,
      "learning_rate": 5.279121542876761e-05,
      "loss": 0.5211,
      "step": 6731
    },
    {
      "epoch": 1.3839037927844589,
      "grad_norm": 0.16120769083499908,
      "learning_rate": 5.2781377974576e-05,
      "loss": 0.536,
      "step": 6732
    },
    {
      "epoch": 1.3841093637578374,
      "grad_norm": 0.20347453653812408,
      "learning_rate": 5.2771540137032256e-05,
      "loss": 0.5692,
      "step": 6733
    },
    {
      "epoch": 1.384314934731216,
      "grad_norm": 0.19555138051509857,
      "learning_rate": 5.2761701916621064e-05,
      "loss": 0.5155,
      "step": 6734
    },
    {
      "epoch": 1.3845205057045944,
      "grad_norm": 0.2035539448261261,
      "learning_rate": 5.27518633138271e-05,
      "loss": 0.5697,
      "step": 6735
    },
    {
      "epoch": 1.384726076677973,
      "grad_norm": 0.18798959255218506,
      "learning_rate": 5.274202432913505e-05,
      "loss": 0.553,
      "step": 6736
    },
    {
      "epoch": 1.3849316476513516,
      "grad_norm": 0.1946985423564911,
      "learning_rate": 5.2732184963029663e-05,
      "loss": 0.5551,
      "step": 6737
    },
    {
      "epoch": 1.3851372186247302,
      "grad_norm": 0.17025156319141388,
      "learning_rate": 5.272234521599565e-05,
      "loss": 0.5342,
      "step": 6738
    },
    {
      "epoch": 1.3853427895981087,
      "grad_norm": 0.16380397975444794,
      "learning_rate": 5.27125050885178e-05,
      "loss": 0.5592,
      "step": 6739
    },
    {
      "epoch": 1.3855483605714873,
      "grad_norm": 0.19385696947574615,
      "learning_rate": 5.2702664581080845e-05,
      "loss": 0.5499,
      "step": 6740
    },
    {
      "epoch": 1.385753931544866,
      "grad_norm": 0.19014237821102142,
      "learning_rate": 5.2692823694169624e-05,
      "loss": 0.5322,
      "step": 6741
    },
    {
      "epoch": 1.3859595025182445,
      "grad_norm": 0.16555199027061462,
      "learning_rate": 5.2682982428268926e-05,
      "loss": 0.5253,
      "step": 6742
    },
    {
      "epoch": 1.386165073491623,
      "grad_norm": 0.1773664802312851,
      "learning_rate": 5.26731407838636e-05,
      "loss": 0.5609,
      "step": 6743
    },
    {
      "epoch": 1.3863706444650015,
      "grad_norm": 0.20064838230609894,
      "learning_rate": 5.26632987614385e-05,
      "loss": 0.5682,
      "step": 6744
    },
    {
      "epoch": 1.38657621543838,
      "grad_norm": 0.16893361508846283,
      "learning_rate": 5.2653456361478486e-05,
      "loss": 0.5296,
      "step": 6745
    },
    {
      "epoch": 1.3867817864117586,
      "grad_norm": 0.12028443813323975,
      "learning_rate": 5.264361358446845e-05,
      "loss": 0.5109,
      "step": 6746
    },
    {
      "epoch": 1.3869873573851372,
      "grad_norm": 0.16433177888393402,
      "learning_rate": 5.263377043089329e-05,
      "loss": 0.5492,
      "step": 6747
    },
    {
      "epoch": 1.3871929283585158,
      "grad_norm": 0.20058415830135345,
      "learning_rate": 5.262392690123795e-05,
      "loss": 0.5544,
      "step": 6748
    },
    {
      "epoch": 1.3873984993318944,
      "grad_norm": 0.1890854686498642,
      "learning_rate": 5.261408299598737e-05,
      "loss": 0.5518,
      "step": 6749
    },
    {
      "epoch": 1.3876040703052728,
      "grad_norm": 0.1862923949956894,
      "learning_rate": 5.260423871562648e-05,
      "loss": 0.5598,
      "step": 6750
    },
    {
      "epoch": 1.3878096412786514,
      "grad_norm": 0.18234452605247498,
      "learning_rate": 5.2594394060640325e-05,
      "loss": 0.5486,
      "step": 6751
    },
    {
      "epoch": 1.38801521225203,
      "grad_norm": 0.16496604681015015,
      "learning_rate": 5.258454903151385e-05,
      "loss": 0.4984,
      "step": 6752
    },
    {
      "epoch": 1.3882207832254085,
      "grad_norm": 0.1623886525630951,
      "learning_rate": 5.2574703628732104e-05,
      "loss": 0.5521,
      "step": 6753
    },
    {
      "epoch": 1.3884263541987871,
      "grad_norm": 0.20108892023563385,
      "learning_rate": 5.25648578527801e-05,
      "loss": 0.571,
      "step": 6754
    },
    {
      "epoch": 1.3886319251721657,
      "grad_norm": 0.18858185410499573,
      "learning_rate": 5.2555011704142925e-05,
      "loss": 0.5343,
      "step": 6755
    },
    {
      "epoch": 1.3888374961455443,
      "grad_norm": 0.18392902612686157,
      "learning_rate": 5.2545165183305625e-05,
      "loss": 0.5448,
      "step": 6756
    },
    {
      "epoch": 1.389043067118923,
      "grad_norm": 0.19124126434326172,
      "learning_rate": 5.253531829075331e-05,
      "loss": 0.5493,
      "step": 6757
    },
    {
      "epoch": 1.3892486380923015,
      "grad_norm": 0.19267001748085022,
      "learning_rate": 5.252547102697108e-05,
      "loss": 0.5504,
      "step": 6758
    },
    {
      "epoch": 1.3894542090656798,
      "grad_norm": 0.19391465187072754,
      "learning_rate": 5.251562339244407e-05,
      "loss": 0.5503,
      "step": 6759
    },
    {
      "epoch": 1.3896597800390584,
      "grad_norm": 0.16429035365581512,
      "learning_rate": 5.250577538765741e-05,
      "loss": 0.5135,
      "step": 6760
    },
    {
      "epoch": 1.389865351012437,
      "grad_norm": 0.15530334413051605,
      "learning_rate": 5.249592701309629e-05,
      "loss": 0.5197,
      "step": 6761
    },
    {
      "epoch": 1.3900709219858156,
      "grad_norm": 0.19579361379146576,
      "learning_rate": 5.248607826924589e-05,
      "loss": 0.5486,
      "step": 6762
    },
    {
      "epoch": 1.3902764929591942,
      "grad_norm": 0.1991192102432251,
      "learning_rate": 5.2476229156591384e-05,
      "loss": 0.5713,
      "step": 6763
    },
    {
      "epoch": 1.3904820639325728,
      "grad_norm": 0.19221562147140503,
      "learning_rate": 5.246637967561802e-05,
      "loss": 0.5394,
      "step": 6764
    },
    {
      "epoch": 1.3906876349059512,
      "grad_norm": 0.18756262958049774,
      "learning_rate": 5.245652982681102e-05,
      "loss": 0.5317,
      "step": 6765
    },
    {
      "epoch": 1.3908932058793297,
      "grad_norm": 0.16349650919437408,
      "learning_rate": 5.244667961065567e-05,
      "loss": 0.5351,
      "step": 6766
    },
    {
      "epoch": 1.3910987768527083,
      "grad_norm": 0.17436912655830383,
      "learning_rate": 5.24368290276372e-05,
      "loss": 0.5597,
      "step": 6767
    },
    {
      "epoch": 1.391304347826087,
      "grad_norm": 0.19610293209552765,
      "learning_rate": 5.242697807824093e-05,
      "loss": 0.5688,
      "step": 6768
    },
    {
      "epoch": 1.3915099187994655,
      "grad_norm": 0.19287322461605072,
      "learning_rate": 5.241712676295217e-05,
      "loss": 0.5456,
      "step": 6769
    },
    {
      "epoch": 1.391715489772844,
      "grad_norm": 0.1898210346698761,
      "learning_rate": 5.240727508225623e-05,
      "loss": 0.5595,
      "step": 6770
    },
    {
      "epoch": 1.3919210607462227,
      "grad_norm": 0.1842799186706543,
      "learning_rate": 5.239742303663847e-05,
      "loss": 0.5492,
      "step": 6771
    },
    {
      "epoch": 1.3921266317196013,
      "grad_norm": 0.18624331057071686,
      "learning_rate": 5.238757062658426e-05,
      "loss": 0.5388,
      "step": 6772
    },
    {
      "epoch": 1.3923322026929799,
      "grad_norm": 0.16960440576076508,
      "learning_rate": 5.237771785257897e-05,
      "loss": 0.5353,
      "step": 6773
    },
    {
      "epoch": 1.3925377736663584,
      "grad_norm": 0.13957920670509338,
      "learning_rate": 5.2367864715108005e-05,
      "loss": 0.5144,
      "step": 6774
    },
    {
      "epoch": 1.3927433446397368,
      "grad_norm": 0.1618185192346573,
      "learning_rate": 5.235801121465677e-05,
      "loss": 0.5447,
      "step": 6775
    },
    {
      "epoch": 1.3929489156131154,
      "grad_norm": 0.19508126378059387,
      "learning_rate": 5.234815735171073e-05,
      "loss": 0.5684,
      "step": 6776
    },
    {
      "epoch": 1.393154486586494,
      "grad_norm": 0.1584571748971939,
      "learning_rate": 5.233830312675533e-05,
      "loss": 0.4997,
      "step": 6777
    },
    {
      "epoch": 1.3933600575598726,
      "grad_norm": 0.15756317973136902,
      "learning_rate": 5.232844854027601e-05,
      "loss": 0.5506,
      "step": 6778
    },
    {
      "epoch": 1.3935656285332512,
      "grad_norm": 0.2031278908252716,
      "learning_rate": 5.231859359275831e-05,
      "loss": 0.5452,
      "step": 6779
    },
    {
      "epoch": 1.3937711995066295,
      "grad_norm": 0.1902448683977127,
      "learning_rate": 5.230873828468769e-05,
      "loss": 0.5624,
      "step": 6780
    },
    {
      "epoch": 1.3939767704800081,
      "grad_norm": 0.19727613031864166,
      "learning_rate": 5.22988826165497e-05,
      "loss": 0.5699,
      "step": 6781
    },
    {
      "epoch": 1.3941823414533867,
      "grad_norm": 0.18354666233062744,
      "learning_rate": 5.228902658882989e-05,
      "loss": 0.5463,
      "step": 6782
    },
    {
      "epoch": 1.3943879124267653,
      "grad_norm": 0.16275332868099213,
      "learning_rate": 5.22791702020138e-05,
      "loss": 0.5167,
      "step": 6783
    },
    {
      "epoch": 1.3945934834001439,
      "grad_norm": 0.15852688252925873,
      "learning_rate": 5.226931345658701e-05,
      "loss": 0.5342,
      "step": 6784
    },
    {
      "epoch": 1.3947990543735225,
      "grad_norm": 0.16829104721546173,
      "learning_rate": 5.2259456353035136e-05,
      "loss": 0.5206,
      "step": 6785
    },
    {
      "epoch": 1.395004625346901,
      "grad_norm": 0.15513145923614502,
      "learning_rate": 5.2249598891843765e-05,
      "loss": 0.5584,
      "step": 6786
    },
    {
      "epoch": 1.3952101963202796,
      "grad_norm": 0.1593499630689621,
      "learning_rate": 5.223974107349855e-05,
      "loss": 0.5256,
      "step": 6787
    },
    {
      "epoch": 1.3954157672936582,
      "grad_norm": 0.16022507846355438,
      "learning_rate": 5.222988289848512e-05,
      "loss": 0.5377,
      "step": 6788
    },
    {
      "epoch": 1.3956213382670368,
      "grad_norm": 0.19908879697322845,
      "learning_rate": 5.222002436728917e-05,
      "loss": 0.5567,
      "step": 6789
    },
    {
      "epoch": 1.3958269092404152,
      "grad_norm": 0.1942145675420761,
      "learning_rate": 5.2210165480396364e-05,
      "loss": 0.5503,
      "step": 6790
    },
    {
      "epoch": 1.3960324802137938,
      "grad_norm": 0.20177899301052094,
      "learning_rate": 5.2200306238292396e-05,
      "loss": 0.5572,
      "step": 6791
    },
    {
      "epoch": 1.3962380511871724,
      "grad_norm": 0.20615504682064056,
      "learning_rate": 5.219044664146299e-05,
      "loss": 0.5572,
      "step": 6792
    },
    {
      "epoch": 1.396443622160551,
      "grad_norm": 0.16137507557868958,
      "learning_rate": 5.21805866903939e-05,
      "loss": 0.5327,
      "step": 6793
    },
    {
      "epoch": 1.3966491931339295,
      "grad_norm": 0.13222044706344604,
      "learning_rate": 5.217072638557086e-05,
      "loss": 0.5397,
      "step": 6794
    },
    {
      "epoch": 1.396854764107308,
      "grad_norm": 0.15501753985881805,
      "learning_rate": 5.216086572747963e-05,
      "loss": 0.5588,
      "step": 6795
    },
    {
      "epoch": 1.3970603350806865,
      "grad_norm": 0.16480109095573425,
      "learning_rate": 5.2151004716606035e-05,
      "loss": 0.4947,
      "step": 6796
    },
    {
      "epoch": 1.397265906054065,
      "grad_norm": 0.1597471535205841,
      "learning_rate": 5.214114335343585e-05,
      "loss": 0.5504,
      "step": 6797
    },
    {
      "epoch": 1.3974714770274437,
      "grad_norm": 0.18874730169773102,
      "learning_rate": 5.2131281638454914e-05,
      "loss": 0.5601,
      "step": 6798
    },
    {
      "epoch": 1.3976770480008223,
      "grad_norm": 0.19088098406791687,
      "learning_rate": 5.212141957214907e-05,
      "loss": 0.557,
      "step": 6799
    },
    {
      "epoch": 1.3978826189742009,
      "grad_norm": 0.19219143688678741,
      "learning_rate": 5.2111557155004156e-05,
      "loss": 0.5574,
      "step": 6800
    },
    {
      "epoch": 1.3980881899475794,
      "grad_norm": 0.19509856402873993,
      "learning_rate": 5.2101694387506074e-05,
      "loss": 0.5609,
      "step": 6801
    },
    {
      "epoch": 1.398293760920958,
      "grad_norm": 0.19519266486167908,
      "learning_rate": 5.2091831270140694e-05,
      "loss": 0.5598,
      "step": 6802
    },
    {
      "epoch": 1.3984993318943366,
      "grad_norm": 0.16416554152965546,
      "learning_rate": 5.208196780339394e-05,
      "loss": 0.5073,
      "step": 6803
    },
    {
      "epoch": 1.3987049028677152,
      "grad_norm": 0.16652482748031616,
      "learning_rate": 5.207210398775174e-05,
      "loss": 0.5577,
      "step": 6804
    },
    {
      "epoch": 1.3989104738410936,
      "grad_norm": 0.1610838919878006,
      "learning_rate": 5.206223982370001e-05,
      "loss": 0.5424,
      "step": 6805
    },
    {
      "epoch": 1.3991160448144722,
      "grad_norm": 0.12500424683094025,
      "learning_rate": 5.2052375311724755e-05,
      "loss": 0.5185,
      "step": 6806
    },
    {
      "epoch": 1.3993216157878507,
      "grad_norm": 0.16289743781089783,
      "learning_rate": 5.204251045231191e-05,
      "loss": 0.548,
      "step": 6807
    },
    {
      "epoch": 1.3995271867612293,
      "grad_norm": 0.1971302479505539,
      "learning_rate": 5.203264524594751e-05,
      "loss": 0.5481,
      "step": 6808
    },
    {
      "epoch": 1.399732757734608,
      "grad_norm": 0.1616830974817276,
      "learning_rate": 5.2022779693117535e-05,
      "loss": 0.5206,
      "step": 6809
    },
    {
      "epoch": 1.3999383287079865,
      "grad_norm": 0.13564690947532654,
      "learning_rate": 5.201291379430804e-05,
      "loss": 0.5078,
      "step": 6810
    },
    {
      "epoch": 1.4001438996813649,
      "grad_norm": 0.20377317070960999,
      "learning_rate": 5.200304755000506e-05,
      "loss": 0.5494,
      "step": 6811
    },
    {
      "epoch": 1.4003494706547435,
      "grad_norm": 0.20373232662677765,
      "learning_rate": 5.199318096069465e-05,
      "loss": 0.5652,
      "step": 6812
    },
    {
      "epoch": 1.400555041628122,
      "grad_norm": 0.19755113124847412,
      "learning_rate": 5.198331402686291e-05,
      "loss": 0.5687,
      "step": 6813
    },
    {
      "epoch": 1.4007606126015006,
      "grad_norm": 0.18689025938510895,
      "learning_rate": 5.197344674899593e-05,
      "loss": 0.5576,
      "step": 6814
    },
    {
      "epoch": 1.4009661835748792,
      "grad_norm": 0.1978052258491516,
      "learning_rate": 5.196357912757982e-05,
      "loss": 0.5807,
      "step": 6815
    },
    {
      "epoch": 1.4011717545482578,
      "grad_norm": 0.16826669871807098,
      "learning_rate": 5.19537111631007e-05,
      "loss": 0.4959,
      "step": 6816
    },
    {
      "epoch": 1.4013773255216364,
      "grad_norm": 0.16866251826286316,
      "learning_rate": 5.1943842856044745e-05,
      "loss": 0.5509,
      "step": 6817
    },
    {
      "epoch": 1.401582896495015,
      "grad_norm": 0.16553765535354614,
      "learning_rate": 5.19339742068981e-05,
      "loss": 0.5225,
      "step": 6818
    },
    {
      "epoch": 1.4017884674683936,
      "grad_norm": 0.15738850831985474,
      "learning_rate": 5.192410521614695e-05,
      "loss": 0.5439,
      "step": 6819
    },
    {
      "epoch": 1.401994038441772,
      "grad_norm": 0.1941434144973755,
      "learning_rate": 5.1914235884277515e-05,
      "loss": 0.5431,
      "step": 6820
    },
    {
      "epoch": 1.4021996094151505,
      "grad_norm": 0.19510993361473083,
      "learning_rate": 5.1904366211775995e-05,
      "loss": 0.5699,
      "step": 6821
    },
    {
      "epoch": 1.4024051803885291,
      "grad_norm": 0.21199296414852142,
      "learning_rate": 5.189449619912862e-05,
      "loss": 0.5497,
      "step": 6822
    },
    {
      "epoch": 1.4026107513619077,
      "grad_norm": 0.20840586721897125,
      "learning_rate": 5.188462584682163e-05,
      "loss": 0.5692,
      "step": 6823
    },
    {
      "epoch": 1.4028163223352863,
      "grad_norm": 0.18796321749687195,
      "learning_rate": 5.187475515534132e-05,
      "loss": 0.5497,
      "step": 6824
    },
    {
      "epoch": 1.403021893308665,
      "grad_norm": 0.18638098239898682,
      "learning_rate": 5.186488412517396e-05,
      "loss": 0.556,
      "step": 6825
    },
    {
      "epoch": 1.4032274642820433,
      "grad_norm": 0.18943150341510773,
      "learning_rate": 5.185501275680582e-05,
      "loss": 0.5451,
      "step": 6826
    },
    {
      "epoch": 1.4034330352554218,
      "grad_norm": 0.19243142008781433,
      "learning_rate": 5.184514105072326e-05,
      "loss": 0.5348,
      "step": 6827
    },
    {
      "epoch": 1.4036386062288004,
      "grad_norm": 0.19465966522693634,
      "learning_rate": 5.1835269007412585e-05,
      "loss": 0.5711,
      "step": 6828
    },
    {
      "epoch": 1.403844177202179,
      "grad_norm": 0.1992519199848175,
      "learning_rate": 5.1825396627360166e-05,
      "loss": 0.5768,
      "step": 6829
    },
    {
      "epoch": 1.4040497481755576,
      "grad_norm": 0.16625314950942993,
      "learning_rate": 5.181552391105235e-05,
      "loss": 0.5431,
      "step": 6830
    },
    {
      "epoch": 1.4042553191489362,
      "grad_norm": 0.16418209671974182,
      "learning_rate": 5.180565085897552e-05,
      "loss": 0.5585,
      "step": 6831
    },
    {
      "epoch": 1.4044608901223148,
      "grad_norm": 0.19852881133556366,
      "learning_rate": 5.17957774716161e-05,
      "loss": 0.5298,
      "step": 6832
    },
    {
      "epoch": 1.4046664610956934,
      "grad_norm": 0.18581949174404144,
      "learning_rate": 5.178590374946047e-05,
      "loss": 0.5466,
      "step": 6833
    },
    {
      "epoch": 1.404872032069072,
      "grad_norm": 0.19243168830871582,
      "learning_rate": 5.177602969299509e-05,
      "loss": 0.552,
      "step": 6834
    },
    {
      "epoch": 1.4050776030424503,
      "grad_norm": 0.20078270137310028,
      "learning_rate": 5.1766155302706397e-05,
      "loss": 0.56,
      "step": 6835
    },
    {
      "epoch": 1.405283174015829,
      "grad_norm": 0.18953198194503784,
      "learning_rate": 5.175628057908085e-05,
      "loss": 0.5404,
      "step": 6836
    },
    {
      "epoch": 1.4054887449892075,
      "grad_norm": 0.19314275681972504,
      "learning_rate": 5.174640552260494e-05,
      "loss": 0.553,
      "step": 6837
    },
    {
      "epoch": 1.405694315962586,
      "grad_norm": 0.19777776300907135,
      "learning_rate": 5.1736530133765175e-05,
      "loss": 0.5539,
      "step": 6838
    },
    {
      "epoch": 1.4058998869359647,
      "grad_norm": 0.18886315822601318,
      "learning_rate": 5.1726654413048036e-05,
      "loss": 0.5508,
      "step": 6839
    },
    {
      "epoch": 1.4061054579093433,
      "grad_norm": 0.16566768288612366,
      "learning_rate": 5.171677836094008e-05,
      "loss": 0.5384,
      "step": 6840
    },
    {
      "epoch": 1.4063110288827216,
      "grad_norm": 0.12670090794563293,
      "learning_rate": 5.170690197792785e-05,
      "loss": 0.5064,
      "step": 6841
    },
    {
      "epoch": 1.4065165998561002,
      "grad_norm": 0.16452710330486298,
      "learning_rate": 5.1697025264497915e-05,
      "loss": 0.5549,
      "step": 6842
    },
    {
      "epoch": 1.4067221708294788,
      "grad_norm": 0.23035211861133575,
      "learning_rate": 5.168714822113684e-05,
      "loss": 0.533,
      "step": 6843
    },
    {
      "epoch": 1.4069277418028574,
      "grad_norm": 0.1920643001794815,
      "learning_rate": 5.167727084833123e-05,
      "loss": 0.5667,
      "step": 6844
    },
    {
      "epoch": 1.407133312776236,
      "grad_norm": 0.1763206124305725,
      "learning_rate": 5.1667393146567695e-05,
      "loss": 0.5285,
      "step": 6845
    },
    {
      "epoch": 1.4073388837496146,
      "grad_norm": 0.17114083468914032,
      "learning_rate": 5.1657515116332866e-05,
      "loss": 0.5385,
      "step": 6846
    },
    {
      "epoch": 1.4075444547229932,
      "grad_norm": 0.19775407016277313,
      "learning_rate": 5.164763675811338e-05,
      "loss": 0.569,
      "step": 6847
    },
    {
      "epoch": 1.4077500256963718,
      "grad_norm": 0.18887090682983398,
      "learning_rate": 5.163775807239591e-05,
      "loss": 0.5487,
      "step": 6848
    },
    {
      "epoch": 1.4079555966697503,
      "grad_norm": 0.1911323517560959,
      "learning_rate": 5.162787905966711e-05,
      "loss": 0.5632,
      "step": 6849
    },
    {
      "epoch": 1.4081611676431287,
      "grad_norm": 0.19571152329444885,
      "learning_rate": 5.16179997204137e-05,
      "loss": 0.5655,
      "step": 6850
    },
    {
      "epoch": 1.4083667386165073,
      "grad_norm": 0.1829329878091812,
      "learning_rate": 5.160812005512236e-05,
      "loss": 0.5319,
      "step": 6851
    },
    {
      "epoch": 1.4085723095898859,
      "grad_norm": 0.19352376461029053,
      "learning_rate": 5.1598240064279846e-05,
      "loss": 0.5616,
      "step": 6852
    },
    {
      "epoch": 1.4087778805632645,
      "grad_norm": 0.19807998836040497,
      "learning_rate": 5.158835974837289e-05,
      "loss": 0.5414,
      "step": 6853
    },
    {
      "epoch": 1.408983451536643,
      "grad_norm": 0.1893458068370819,
      "learning_rate": 5.157847910788822e-05,
      "loss": 0.5426,
      "step": 6854
    },
    {
      "epoch": 1.4091890225100216,
      "grad_norm": 0.1907995045185089,
      "learning_rate": 5.1568598143312656e-05,
      "loss": 0.5472,
      "step": 6855
    },
    {
      "epoch": 1.4093945934834,
      "grad_norm": 0.17473357915878296,
      "learning_rate": 5.1558716855132956e-05,
      "loss": 0.4997,
      "step": 6856
    },
    {
      "epoch": 1.4096001644567786,
      "grad_norm": 0.16449564695358276,
      "learning_rate": 5.154883524383592e-05,
      "loss": 0.5579,
      "step": 6857
    },
    {
      "epoch": 1.4098057354301572,
      "grad_norm": 0.1907692849636078,
      "learning_rate": 5.153895330990839e-05,
      "loss": 0.5778,
      "step": 6858
    },
    {
      "epoch": 1.4100113064035358,
      "grad_norm": 0.18911254405975342,
      "learning_rate": 5.1529071053837206e-05,
      "loss": 0.532,
      "step": 6859
    },
    {
      "epoch": 1.4102168773769144,
      "grad_norm": 0.19013933837413788,
      "learning_rate": 5.151918847610918e-05,
      "loss": 0.5414,
      "step": 6860
    },
    {
      "epoch": 1.410422448350293,
      "grad_norm": 0.1888997107744217,
      "learning_rate": 5.150930557721122e-05,
      "loss": 0.5472,
      "step": 6861
    },
    {
      "epoch": 1.4106280193236715,
      "grad_norm": 0.18794280290603638,
      "learning_rate": 5.14994223576302e-05,
      "loss": 0.5541,
      "step": 6862
    },
    {
      "epoch": 1.4108335902970501,
      "grad_norm": 0.19255901873111725,
      "learning_rate": 5.1489538817853034e-05,
      "loss": 0.5695,
      "step": 6863
    },
    {
      "epoch": 1.4110391612704287,
      "grad_norm": 0.18833082914352417,
      "learning_rate": 5.1479654958366594e-05,
      "loss": 0.5571,
      "step": 6864
    },
    {
      "epoch": 1.4112447322438073,
      "grad_norm": 0.1937963217496872,
      "learning_rate": 5.1469770779657864e-05,
      "loss": 0.5531,
      "step": 6865
    },
    {
      "epoch": 1.4114503032171857,
      "grad_norm": 0.16009144484996796,
      "learning_rate": 5.145988628221376e-05,
      "loss": 0.5195,
      "step": 6866
    },
    {
      "epoch": 1.4116558741905643,
      "grad_norm": 0.15770770609378815,
      "learning_rate": 5.145000146652126e-05,
      "loss": 0.5767,
      "step": 6867
    },
    {
      "epoch": 1.4118614451639429,
      "grad_norm": 0.18932950496673584,
      "learning_rate": 5.1440116333067313e-05,
      "loss": 0.5413,
      "step": 6868
    },
    {
      "epoch": 1.4120670161373214,
      "grad_norm": 0.2200823277235031,
      "learning_rate": 5.143023088233895e-05,
      "loss": 0.5721,
      "step": 6869
    },
    {
      "epoch": 1.4122725871107,
      "grad_norm": 0.19378498196601868,
      "learning_rate": 5.142034511482317e-05,
      "loss": 0.5732,
      "step": 6870
    },
    {
      "epoch": 1.4124781580840784,
      "grad_norm": 0.20359185338020325,
      "learning_rate": 5.141045903100698e-05,
      "loss": 0.5555,
      "step": 6871
    },
    {
      "epoch": 1.412683729057457,
      "grad_norm": 0.18266808986663818,
      "learning_rate": 5.140057263137744e-05,
      "loss": 0.5287,
      "step": 6872
    },
    {
      "epoch": 1.4128893000308356,
      "grad_norm": 0.191037118434906,
      "learning_rate": 5.139068591642161e-05,
      "loss": 0.5536,
      "step": 6873
    },
    {
      "epoch": 1.4130948710042142,
      "grad_norm": 0.19039712846279144,
      "learning_rate": 5.138079888662654e-05,
      "loss": 0.5692,
      "step": 6874
    },
    {
      "epoch": 1.4133004419775927,
      "grad_norm": 0.1601129174232483,
      "learning_rate": 5.1370911542479354e-05,
      "loss": 0.5244,
      "step": 6875
    },
    {
      "epoch": 1.4135060129509713,
      "grad_norm": 0.1585390418767929,
      "learning_rate": 5.1361023884467136e-05,
      "loss": 0.5695,
      "step": 6876
    },
    {
      "epoch": 1.41371158392435,
      "grad_norm": 0.2022130936384201,
      "learning_rate": 5.135113591307699e-05,
      "loss": 0.5696,
      "step": 6877
    },
    {
      "epoch": 1.4139171548977285,
      "grad_norm": 0.1920463740825653,
      "learning_rate": 5.134124762879606e-05,
      "loss": 0.5397,
      "step": 6878
    },
    {
      "epoch": 1.414122725871107,
      "grad_norm": 0.1937701404094696,
      "learning_rate": 5.13313590321115e-05,
      "loss": 0.5513,
      "step": 6879
    },
    {
      "epoch": 1.4143282968444857,
      "grad_norm": 0.16302789747714996,
      "learning_rate": 5.1321470123510486e-05,
      "loss": 0.524,
      "step": 6880
    },
    {
      "epoch": 1.414533867817864,
      "grad_norm": 0.1612044721841812,
      "learning_rate": 5.131158090348017e-05,
      "loss": 0.5558,
      "step": 6881
    },
    {
      "epoch": 1.4147394387912426,
      "grad_norm": 0.18755872547626495,
      "learning_rate": 5.130169137250777e-05,
      "loss": 0.5448,
      "step": 6882
    },
    {
      "epoch": 1.4149450097646212,
      "grad_norm": 0.16323046386241913,
      "learning_rate": 5.1291801531080475e-05,
      "loss": 0.5202,
      "step": 6883
    },
    {
      "epoch": 1.4151505807379998,
      "grad_norm": 0.15463986992835999,
      "learning_rate": 5.128191137968555e-05,
      "loss": 0.5395,
      "step": 6884
    },
    {
      "epoch": 1.4153561517113784,
      "grad_norm": 0.1867363005876541,
      "learning_rate": 5.12720209188102e-05,
      "loss": 0.5608,
      "step": 6885
    },
    {
      "epoch": 1.4155617226847568,
      "grad_norm": 0.18984296917915344,
      "learning_rate": 5.1262130148941705e-05,
      "loss": 0.5527,
      "step": 6886
    },
    {
      "epoch": 1.4157672936581354,
      "grad_norm": 0.18599240481853485,
      "learning_rate": 5.1252239070567315e-05,
      "loss": 0.538,
      "step": 6887
    },
    {
      "epoch": 1.415972864631514,
      "grad_norm": 0.19605940580368042,
      "learning_rate": 5.1242347684174327e-05,
      "loss": 0.5715,
      "step": 6888
    },
    {
      "epoch": 1.4161784356048925,
      "grad_norm": 0.19661271572113037,
      "learning_rate": 5.1232455990250055e-05,
      "loss": 0.5538,
      "step": 6889
    },
    {
      "epoch": 1.4163840065782711,
      "grad_norm": 0.1689828336238861,
      "learning_rate": 5.12225639892818e-05,
      "loss": 0.5337,
      "step": 6890
    },
    {
      "epoch": 1.4165895775516497,
      "grad_norm": 0.16040822863578796,
      "learning_rate": 5.1212671681756916e-05,
      "loss": 0.5651,
      "step": 6891
    },
    {
      "epoch": 1.4167951485250283,
      "grad_norm": 0.16304267942905426,
      "learning_rate": 5.120277906816272e-05,
      "loss": 0.5215,
      "step": 6892
    },
    {
      "epoch": 1.417000719498407,
      "grad_norm": 0.1574201136827469,
      "learning_rate": 5.119288614898659e-05,
      "loss": 0.5349,
      "step": 6893
    },
    {
      "epoch": 1.4172062904717855,
      "grad_norm": 0.20037010312080383,
      "learning_rate": 5.118299292471591e-05,
      "loss": 0.5484,
      "step": 6894
    },
    {
      "epoch": 1.417411861445164,
      "grad_norm": 0.16355712711811066,
      "learning_rate": 5.117309939583806e-05,
      "loss": 0.517,
      "step": 6895
    },
    {
      "epoch": 1.4176174324185424,
      "grad_norm": 0.15935970842838287,
      "learning_rate": 5.116320556284047e-05,
      "loss": 0.5531,
      "step": 6896
    },
    {
      "epoch": 1.417823003391921,
      "grad_norm": 0.20276428759098053,
      "learning_rate": 5.115331142621055e-05,
      "loss": 0.5586,
      "step": 6897
    },
    {
      "epoch": 1.4180285743652996,
      "grad_norm": 0.1946752518415451,
      "learning_rate": 5.114341698643573e-05,
      "loss": 0.5415,
      "step": 6898
    },
    {
      "epoch": 1.4182341453386782,
      "grad_norm": 0.1875738501548767,
      "learning_rate": 5.113352224400347e-05,
      "loss": 0.5354,
      "step": 6899
    },
    {
      "epoch": 1.4184397163120568,
      "grad_norm": 0.1904314160346985,
      "learning_rate": 5.112362719940123e-05,
      "loss": 0.5619,
      "step": 6900
    },
    {
      "epoch": 1.4186452872854354,
      "grad_norm": 0.20147216320037842,
      "learning_rate": 5.111373185311651e-05,
      "loss": 0.5728,
      "step": 6901
    },
    {
      "epoch": 1.4188508582588137,
      "grad_norm": 0.19195587933063507,
      "learning_rate": 5.110383620563679e-05,
      "loss": 0.5806,
      "step": 6902
    },
    {
      "epoch": 1.4190564292321923,
      "grad_norm": 0.16246861219406128,
      "learning_rate": 5.109394025744959e-05,
      "loss": 0.5218,
      "step": 6903
    },
    {
      "epoch": 1.419262000205571,
      "grad_norm": 0.16603510081768036,
      "learning_rate": 5.108404400904243e-05,
      "loss": 0.5348,
      "step": 6904
    },
    {
      "epoch": 1.4194675711789495,
      "grad_norm": 0.1957361102104187,
      "learning_rate": 5.1074147460902876e-05,
      "loss": 0.5661,
      "step": 6905
    },
    {
      "epoch": 1.419673142152328,
      "grad_norm": 0.1889890879392624,
      "learning_rate": 5.106425061351845e-05,
      "loss": 0.5672,
      "step": 6906
    },
    {
      "epoch": 1.4198787131257067,
      "grad_norm": 0.19111685454845428,
      "learning_rate": 5.1054353467376756e-05,
      "loss": 0.5739,
      "step": 6907
    },
    {
      "epoch": 1.4200842840990853,
      "grad_norm": 0.17033053934574127,
      "learning_rate": 5.104445602296536e-05,
      "loss": 0.5152,
      "step": 6908
    },
    {
      "epoch": 1.4202898550724639,
      "grad_norm": 0.1564977467060089,
      "learning_rate": 5.103455828077186e-05,
      "loss": 0.5598,
      "step": 6909
    },
    {
      "epoch": 1.4204954260458424,
      "grad_norm": 0.19049371778964996,
      "learning_rate": 5.1024660241283884e-05,
      "loss": 0.5463,
      "step": 6910
    },
    {
      "epoch": 1.4207009970192208,
      "grad_norm": 0.19642889499664307,
      "learning_rate": 5.101476190498906e-05,
      "loss": 0.578,
      "step": 6911
    },
    {
      "epoch": 1.4209065679925994,
      "grad_norm": 0.19157302379608154,
      "learning_rate": 5.1004863272375034e-05,
      "loss": 0.5386,
      "step": 6912
    },
    {
      "epoch": 1.421112138965978,
      "grad_norm": 0.19283618032932281,
      "learning_rate": 5.0994964343929445e-05,
      "loss": 0.5429,
      "step": 6913
    },
    {
      "epoch": 1.4213177099393566,
      "grad_norm": 0.19500254094600677,
      "learning_rate": 5.0985065120139994e-05,
      "loss": 0.54,
      "step": 6914
    },
    {
      "epoch": 1.4215232809127352,
      "grad_norm": 0.18495769798755646,
      "learning_rate": 5.097516560149434e-05,
      "loss": 0.5359,
      "step": 6915
    },
    {
      "epoch": 1.4217288518861138,
      "grad_norm": 0.18928299844264984,
      "learning_rate": 5.0965265788480225e-05,
      "loss": 0.5567,
      "step": 6916
    },
    {
      "epoch": 1.4219344228594921,
      "grad_norm": 0.18935348093509674,
      "learning_rate": 5.095536568158535e-05,
      "loss": 0.5359,
      "step": 6917
    },
    {
      "epoch": 1.4221399938328707,
      "grad_norm": 0.1989513635635376,
      "learning_rate": 5.094546528129743e-05,
      "loss": 0.5603,
      "step": 6918
    },
    {
      "epoch": 1.4223455648062493,
      "grad_norm": 0.16001847386360168,
      "learning_rate": 5.093556458810423e-05,
      "loss": 0.5223,
      "step": 6919
    },
    {
      "epoch": 1.4225511357796279,
      "grad_norm": 0.15646837651729584,
      "learning_rate": 5.0925663602493503e-05,
      "loss": 0.5285,
      "step": 6920
    },
    {
      "epoch": 1.4227567067530065,
      "grad_norm": 0.20338685810565948,
      "learning_rate": 5.091576232495304e-05,
      "loss": 0.574,
      "step": 6921
    },
    {
      "epoch": 1.422962277726385,
      "grad_norm": 0.1922929286956787,
      "learning_rate": 5.090586075597061e-05,
      "loss": 0.5376,
      "step": 6922
    },
    {
      "epoch": 1.4231678486997636,
      "grad_norm": 0.24350236356258392,
      "learning_rate": 5.089595889603401e-05,
      "loss": 0.5544,
      "step": 6923
    },
    {
      "epoch": 1.4233734196731422,
      "grad_norm": 0.1872577667236328,
      "learning_rate": 5.088605674563109e-05,
      "loss": 0.5748,
      "step": 6924
    },
    {
      "epoch": 1.4235789906465208,
      "grad_norm": 0.18415029346942902,
      "learning_rate": 5.0876154305249654e-05,
      "loss": 0.5457,
      "step": 6925
    },
    {
      "epoch": 1.4237845616198992,
      "grad_norm": 0.1886397749185562,
      "learning_rate": 5.086625157537757e-05,
      "loss": 0.5477,
      "step": 6926
    },
    {
      "epoch": 1.4239901325932778,
      "grad_norm": 0.19316554069519043,
      "learning_rate": 5.085634855650268e-05,
      "loss": 0.5608,
      "step": 6927
    },
    {
      "epoch": 1.4241957035666564,
      "grad_norm": 0.1911771446466446,
      "learning_rate": 5.084644524911288e-05,
      "loss": 0.5427,
      "step": 6928
    },
    {
      "epoch": 1.424401274540035,
      "grad_norm": 0.19828177988529205,
      "learning_rate": 5.083654165369604e-05,
      "loss": 0.5518,
      "step": 6929
    },
    {
      "epoch": 1.4246068455134135,
      "grad_norm": 0.16796253621578217,
      "learning_rate": 5.082663777074008e-05,
      "loss": 0.5173,
      "step": 6930
    },
    {
      "epoch": 1.4248124164867921,
      "grad_norm": 0.16129761934280396,
      "learning_rate": 5.0816733600732905e-05,
      "loss": 0.562,
      "step": 6931
    },
    {
      "epoch": 1.4250179874601705,
      "grad_norm": 0.19917796552181244,
      "learning_rate": 5.0806829144162455e-05,
      "loss": 0.5394,
      "step": 6932
    },
    {
      "epoch": 1.425223558433549,
      "grad_norm": 0.19599252939224243,
      "learning_rate": 5.079692440151668e-05,
      "loss": 0.5829,
      "step": 6933
    },
    {
      "epoch": 1.4254291294069277,
      "grad_norm": 0.1711527705192566,
      "learning_rate": 5.078701937328352e-05,
      "loss": 0.5075,
      "step": 6934
    },
    {
      "epoch": 1.4256347003803063,
      "grad_norm": 0.12597279250621796,
      "learning_rate": 5.077711405995098e-05,
      "loss": 0.497,
      "step": 6935
    },
    {
      "epoch": 1.4258402713536849,
      "grad_norm": 0.15089215338230133,
      "learning_rate": 5.076720846200702e-05,
      "loss": 0.5364,
      "step": 6936
    },
    {
      "epoch": 1.4260458423270634,
      "grad_norm": 0.19826306402683258,
      "learning_rate": 5.0757302579939656e-05,
      "loss": 0.5371,
      "step": 6937
    },
    {
      "epoch": 1.426251413300442,
      "grad_norm": 0.1632860153913498,
      "learning_rate": 5.0747396414236906e-05,
      "loss": 0.5114,
      "step": 6938
    },
    {
      "epoch": 1.4264569842738206,
      "grad_norm": 0.15971128642559052,
      "learning_rate": 5.07374899653868e-05,
      "loss": 0.5575,
      "step": 6939
    },
    {
      "epoch": 1.4266625552471992,
      "grad_norm": 0.18618735671043396,
      "learning_rate": 5.0727583233877376e-05,
      "loss": 0.557,
      "step": 6940
    },
    {
      "epoch": 1.4268681262205778,
      "grad_norm": 0.19377268850803375,
      "learning_rate": 5.07176762201967e-05,
      "loss": 0.5608,
      "step": 6941
    },
    {
      "epoch": 1.4270736971939562,
      "grad_norm": 0.18944592773914337,
      "learning_rate": 5.0707768924832844e-05,
      "loss": 0.5356,
      "step": 6942
    },
    {
      "epoch": 1.4272792681673347,
      "grad_norm": 0.1696036458015442,
      "learning_rate": 5.06978613482739e-05,
      "loss": 0.529,
      "step": 6943
    },
    {
      "epoch": 1.4274848391407133,
      "grad_norm": 0.1654544472694397,
      "learning_rate": 5.068795349100794e-05,
      "loss": 0.57,
      "step": 6944
    },
    {
      "epoch": 1.427690410114092,
      "grad_norm": 0.19743849337100983,
      "learning_rate": 5.067804535352311e-05,
      "loss": 0.558,
      "step": 6945
    },
    {
      "epoch": 1.4278959810874705,
      "grad_norm": 0.188226580619812,
      "learning_rate": 5.066813693630752e-05,
      "loss": 0.5425,
      "step": 6946
    },
    {
      "epoch": 1.4281015520608489,
      "grad_norm": 0.1916334182024002,
      "learning_rate": 5.065822823984931e-05,
      "loss": 0.582,
      "step": 6947
    },
    {
      "epoch": 1.4283071230342275,
      "grad_norm": 0.1938442885875702,
      "learning_rate": 5.064831926463664e-05,
      "loss": 0.5607,
      "step": 6948
    },
    {
      "epoch": 1.428512694007606,
      "grad_norm": 0.19236359000205994,
      "learning_rate": 5.0638410011157694e-05,
      "loss": 0.5811,
      "step": 6949
    },
    {
      "epoch": 1.4287182649809846,
      "grad_norm": 0.19282235205173492,
      "learning_rate": 5.0628500479900636e-05,
      "loss": 0.5456,
      "step": 6950
    },
    {
      "epoch": 1.4289238359543632,
      "grad_norm": 0.19609522819519043,
      "learning_rate": 5.0618590671353655e-05,
      "loss": 0.5484,
      "step": 6951
    },
    {
      "epoch": 1.4291294069277418,
      "grad_norm": 0.19038927555084229,
      "learning_rate": 5.060868058600499e-05,
      "loss": 0.538,
      "step": 6952
    },
    {
      "epoch": 1.4293349779011204,
      "grad_norm": 0.15865328907966614,
      "learning_rate": 5.0598770224342834e-05,
      "loss": 0.5187,
      "step": 6953
    },
    {
      "epoch": 1.429540548874499,
      "grad_norm": 0.1643393188714981,
      "learning_rate": 5.0588859586855435e-05,
      "loss": 0.561,
      "step": 6954
    },
    {
      "epoch": 1.4297461198478776,
      "grad_norm": 0.18920312821865082,
      "learning_rate": 5.057894867403106e-05,
      "loss": 0.5582,
      "step": 6955
    },
    {
      "epoch": 1.4299516908212562,
      "grad_norm": 0.20650269091129303,
      "learning_rate": 5.0569037486357954e-05,
      "loss": 0.5485,
      "step": 6956
    },
    {
      "epoch": 1.4301572617946345,
      "grad_norm": 0.19086134433746338,
      "learning_rate": 5.0559126024324394e-05,
      "loss": 0.5668,
      "step": 6957
    },
    {
      "epoch": 1.4303628327680131,
      "grad_norm": 0.18574881553649902,
      "learning_rate": 5.0549214288418695e-05,
      "loss": 0.5305,
      "step": 6958
    },
    {
      "epoch": 1.4305684037413917,
      "grad_norm": 0.16486965119838715,
      "learning_rate": 5.053930227912913e-05,
      "loss": 0.5394,
      "step": 6959
    },
    {
      "epoch": 1.4307739747147703,
      "grad_norm": 0.1669962853193283,
      "learning_rate": 5.052938999694403e-05,
      "loss": 0.5604,
      "step": 6960
    },
    {
      "epoch": 1.430979545688149,
      "grad_norm": 0.16902011632919312,
      "learning_rate": 5.0519477442351735e-05,
      "loss": 0.5269,
      "step": 6961
    },
    {
      "epoch": 1.4311851166615273,
      "grad_norm": 0.1662750244140625,
      "learning_rate": 5.0509564615840586e-05,
      "loss": 0.5506,
      "step": 6962
    },
    {
      "epoch": 1.4313906876349058,
      "grad_norm": 0.19221939146518707,
      "learning_rate": 5.049965151789895e-05,
      "loss": 0.5682,
      "step": 6963
    },
    {
      "epoch": 1.4315962586082844,
      "grad_norm": 0.18976832926273346,
      "learning_rate": 5.048973814901516e-05,
      "loss": 0.5402,
      "step": 6964
    },
    {
      "epoch": 1.431801829581663,
      "grad_norm": 0.18504224717617035,
      "learning_rate": 5.047982450967766e-05,
      "loss": 0.536,
      "step": 6965
    },
    {
      "epoch": 1.4320074005550416,
      "grad_norm": 0.18513992428779602,
      "learning_rate": 5.0469910600374815e-05,
      "loss": 0.5433,
      "step": 6966
    },
    {
      "epoch": 1.4322129715284202,
      "grad_norm": 0.1597176045179367,
      "learning_rate": 5.045999642159503e-05,
      "loss": 0.5006,
      "step": 6967
    },
    {
      "epoch": 1.4324185425017988,
      "grad_norm": 0.13741186261177063,
      "learning_rate": 5.045008197382674e-05,
      "loss": 0.5147,
      "step": 6968
    },
    {
      "epoch": 1.4326241134751774,
      "grad_norm": 0.16074904799461365,
      "learning_rate": 5.044016725755838e-05,
      "loss": 0.5536,
      "step": 6969
    },
    {
      "epoch": 1.432829684448556,
      "grad_norm": 0.21094325184822083,
      "learning_rate": 5.043025227327842e-05,
      "loss": 0.5529,
      "step": 6970
    },
    {
      "epoch": 1.4330352554219346,
      "grad_norm": 0.19735904037952423,
      "learning_rate": 5.0420337021475304e-05,
      "loss": 0.5282,
      "step": 6971
    },
    {
      "epoch": 1.433240826395313,
      "grad_norm": 0.1973976045846939,
      "learning_rate": 5.041042150263753e-05,
      "loss": 0.5593,
      "step": 6972
    },
    {
      "epoch": 1.4334463973686915,
      "grad_norm": 0.19355326890945435,
      "learning_rate": 5.0400505717253575e-05,
      "loss": 0.5692,
      "step": 6973
    },
    {
      "epoch": 1.43365196834207,
      "grad_norm": 0.19223208725452423,
      "learning_rate": 5.0390589665811944e-05,
      "loss": 0.5534,
      "step": 6974
    },
    {
      "epoch": 1.4338575393154487,
      "grad_norm": 0.1662292182445526,
      "learning_rate": 5.038067334880113e-05,
      "loss": 0.5175,
      "step": 6975
    },
    {
      "epoch": 1.4340631102888273,
      "grad_norm": 0.15810272097587585,
      "learning_rate": 5.0370756766709716e-05,
      "loss": 0.5404,
      "step": 6976
    },
    {
      "epoch": 1.4342686812622059,
      "grad_norm": 0.19795885682106018,
      "learning_rate": 5.0360839920026215e-05,
      "loss": 0.5718,
      "step": 6977
    },
    {
      "epoch": 1.4344742522355842,
      "grad_norm": 0.19126173853874207,
      "learning_rate": 5.0350922809239184e-05,
      "loss": 0.5549,
      "step": 6978
    },
    {
      "epoch": 1.4346798232089628,
      "grad_norm": 0.20567071437835693,
      "learning_rate": 5.03410054348372e-05,
      "loss": 0.5577,
      "step": 6979
    },
    {
      "epoch": 1.4348853941823414,
      "grad_norm": 0.1884375363588333,
      "learning_rate": 5.033108779730883e-05,
      "loss": 0.5491,
      "step": 6980
    },
    {
      "epoch": 1.43509096515572,
      "grad_norm": 0.16468265652656555,
      "learning_rate": 5.0321169897142695e-05,
      "loss": 0.5049,
      "step": 6981
    },
    {
      "epoch": 1.4352965361290986,
      "grad_norm": 0.16884614527225494,
      "learning_rate": 5.031125173482738e-05,
      "loss": 0.5472,
      "step": 6982
    },
    {
      "epoch": 1.4355021071024772,
      "grad_norm": 0.2028854638338089,
      "learning_rate": 5.0301333310851526e-05,
      "loss": 0.5737,
      "step": 6983
    },
    {
      "epoch": 1.4357076780758558,
      "grad_norm": 0.19400665163993835,
      "learning_rate": 5.029141462570376e-05,
      "loss": 0.5492,
      "step": 6984
    },
    {
      "epoch": 1.4359132490492343,
      "grad_norm": 0.19768649339675903,
      "learning_rate": 5.028149567987271e-05,
      "loss": 0.5461,
      "step": 6985
    },
    {
      "epoch": 1.436118820022613,
      "grad_norm": 0.164305180311203,
      "learning_rate": 5.027157647384708e-05,
      "loss": 0.5386,
      "step": 6986
    },
    {
      "epoch": 1.4363243909959913,
      "grad_norm": 0.16050846874713898,
      "learning_rate": 5.02616570081155e-05,
      "loss": 0.5472,
      "step": 6987
    },
    {
      "epoch": 1.4365299619693699,
      "grad_norm": 0.19127194583415985,
      "learning_rate": 5.025173728316668e-05,
      "loss": 0.5656,
      "step": 6988
    },
    {
      "epoch": 1.4367355329427485,
      "grad_norm": 0.1859859675168991,
      "learning_rate": 5.02418172994893e-05,
      "loss": 0.5506,
      "step": 6989
    },
    {
      "epoch": 1.436941103916127,
      "grad_norm": 0.16769689321517944,
      "learning_rate": 5.0231897057572085e-05,
      "loss": 0.5391,
      "step": 6990
    },
    {
      "epoch": 1.4371466748895056,
      "grad_norm": 0.16699868440628052,
      "learning_rate": 5.0221976557903755e-05,
      "loss": 0.5287,
      "step": 6991
    },
    {
      "epoch": 1.4373522458628842,
      "grad_norm": 0.19447840750217438,
      "learning_rate": 5.021205580097305e-05,
      "loss": 0.5451,
      "step": 6992
    },
    {
      "epoch": 1.4375578168362626,
      "grad_norm": 0.1894395351409912,
      "learning_rate": 5.020213478726871e-05,
      "loss": 0.546,
      "step": 6993
    },
    {
      "epoch": 1.4377633878096412,
      "grad_norm": 0.20027700066566467,
      "learning_rate": 5.0192213517279524e-05,
      "loss": 0.5488,
      "step": 6994
    },
    {
      "epoch": 1.4379689587830198,
      "grad_norm": 0.15890729427337646,
      "learning_rate": 5.0182291991494224e-05,
      "loss": 0.5155,
      "step": 6995
    },
    {
      "epoch": 1.4381745297563984,
      "grad_norm": 0.16410616040229797,
      "learning_rate": 5.017237021040163e-05,
      "loss": 0.5709,
      "step": 6996
    },
    {
      "epoch": 1.438380100729777,
      "grad_norm": 0.19332385063171387,
      "learning_rate": 5.016244817449054e-05,
      "loss": 0.5472,
      "step": 6997
    },
    {
      "epoch": 1.4385856717031555,
      "grad_norm": 0.18809527158737183,
      "learning_rate": 5.015252588424975e-05,
      "loss": 0.5594,
      "step": 6998
    },
    {
      "epoch": 1.4387912426765341,
      "grad_norm": 0.19198375940322876,
      "learning_rate": 5.0142603340168084e-05,
      "loss": 0.5545,
      "step": 6999
    },
    {
      "epoch": 1.4389968136499127,
      "grad_norm": 0.1915784478187561,
      "learning_rate": 5.0132680542734396e-05,
      "loss": 0.5627,
      "step": 7000
    },
    {
      "epoch": 1.4392023846232913,
      "grad_norm": 0.19142676889896393,
      "learning_rate": 5.012275749243752e-05,
      "loss": 0.5473,
      "step": 7001
    },
    {
      "epoch": 1.4394079555966697,
      "grad_norm": 0.18919003009796143,
      "learning_rate": 5.011283418976633e-05,
      "loss": 0.5513,
      "step": 7002
    },
    {
      "epoch": 1.4396135265700483,
      "grad_norm": 0.16133341193199158,
      "learning_rate": 5.010291063520969e-05,
      "loss": 0.4986,
      "step": 7003
    },
    {
      "epoch": 1.4398190975434269,
      "grad_norm": 0.15433275699615479,
      "learning_rate": 5.009298682925651e-05,
      "loss": 0.5429,
      "step": 7004
    },
    {
      "epoch": 1.4400246685168054,
      "grad_norm": 0.17464013397693634,
      "learning_rate": 5.008306277239567e-05,
      "loss": 0.524,
      "step": 7005
    },
    {
      "epoch": 1.440230239490184,
      "grad_norm": 0.15277941524982452,
      "learning_rate": 5.0073138465116075e-05,
      "loss": 0.5293,
      "step": 7006
    },
    {
      "epoch": 1.4404358104635626,
      "grad_norm": 0.1988225281238556,
      "learning_rate": 5.0063213907906665e-05,
      "loss": 0.5324,
      "step": 7007
    },
    {
      "epoch": 1.440641381436941,
      "grad_norm": 0.2008810192346573,
      "learning_rate": 5.005328910125638e-05,
      "loss": 0.5634,
      "step": 7008
    },
    {
      "epoch": 1.4408469524103196,
      "grad_norm": 0.19552162289619446,
      "learning_rate": 5.004336404565415e-05,
      "loss": 0.5382,
      "step": 7009
    },
    {
      "epoch": 1.4410525233836982,
      "grad_norm": 0.1576053947210312,
      "learning_rate": 5.003343874158895e-05,
      "loss": 0.4966,
      "step": 7010
    },
    {
      "epoch": 1.4412580943570767,
      "grad_norm": 0.18060800433158875,
      "learning_rate": 5.002351318954975e-05,
      "loss": 0.5758,
      "step": 7011
    },
    {
      "epoch": 1.4414636653304553,
      "grad_norm": 0.19537772238254547,
      "learning_rate": 5.001358739002553e-05,
      "loss": 0.5713,
      "step": 7012
    },
    {
      "epoch": 1.441669236303834,
      "grad_norm": 0.18666040897369385,
      "learning_rate": 5.0003661343505284e-05,
      "loss": 0.5334,
      "step": 7013
    },
    {
      "epoch": 1.4418748072772125,
      "grad_norm": 0.16254711151123047,
      "learning_rate": 4.9993735050478045e-05,
      "loss": 0.5159,
      "step": 7014
    },
    {
      "epoch": 1.442080378250591,
      "grad_norm": 0.1602196842432022,
      "learning_rate": 4.9983808511432824e-05,
      "loss": 0.5267,
      "step": 7015
    },
    {
      "epoch": 1.4422859492239697,
      "grad_norm": 0.1874070167541504,
      "learning_rate": 4.9973881726858644e-05,
      "loss": 0.5258,
      "step": 7016
    },
    {
      "epoch": 1.442491520197348,
      "grad_norm": 0.19187650084495544,
      "learning_rate": 4.996395469724456e-05,
      "loss": 0.5574,
      "step": 7017
    },
    {
      "epoch": 1.4426970911707266,
      "grad_norm": 0.1952408105134964,
      "learning_rate": 4.995402742307963e-05,
      "loss": 0.5735,
      "step": 7018
    },
    {
      "epoch": 1.4429026621441052,
      "grad_norm": 0.20097225904464722,
      "learning_rate": 4.9944099904852926e-05,
      "loss": 0.572,
      "step": 7019
    },
    {
      "epoch": 1.4431082331174838,
      "grad_norm": 0.16808289289474487,
      "learning_rate": 4.993417214305352e-05,
      "loss": 0.5367,
      "step": 7020
    },
    {
      "epoch": 1.4433138040908624,
      "grad_norm": 0.16581854224205017,
      "learning_rate": 4.992424413817053e-05,
      "loss": 0.5764,
      "step": 7021
    },
    {
      "epoch": 1.443519375064241,
      "grad_norm": 0.15527617931365967,
      "learning_rate": 4.9914315890693035e-05,
      "loss": 0.5166,
      "step": 7022
    },
    {
      "epoch": 1.4437249460376194,
      "grad_norm": 0.15834735333919525,
      "learning_rate": 4.990438740111017e-05,
      "loss": 0.5397,
      "step": 7023
    },
    {
      "epoch": 1.443930517010998,
      "grad_norm": 0.1944034993648529,
      "learning_rate": 4.989445866991105e-05,
      "loss": 0.5449,
      "step": 7024
    },
    {
      "epoch": 1.4441360879843765,
      "grad_norm": 0.1605810672044754,
      "learning_rate": 4.988452969758485e-05,
      "loss": 0.5229,
      "step": 7025
    },
    {
      "epoch": 1.4443416589577551,
      "grad_norm": 0.15166768431663513,
      "learning_rate": 4.9874600484620684e-05,
      "loss": 0.5337,
      "step": 7026
    },
    {
      "epoch": 1.4445472299311337,
      "grad_norm": 0.19105499982833862,
      "learning_rate": 4.9864671031507746e-05,
      "loss": 0.5351,
      "step": 7027
    },
    {
      "epoch": 1.4447528009045123,
      "grad_norm": 0.18772821128368378,
      "learning_rate": 4.98547413387352e-05,
      "loss": 0.5418,
      "step": 7028
    },
    {
      "epoch": 1.444958371877891,
      "grad_norm": 0.1658894121646881,
      "learning_rate": 4.984481140679224e-05,
      "loss": 0.5272,
      "step": 7029
    },
    {
      "epoch": 1.4451639428512695,
      "grad_norm": 0.17171718180179596,
      "learning_rate": 4.983488123616807e-05,
      "loss": 0.5593,
      "step": 7030
    },
    {
      "epoch": 1.445369513824648,
      "grad_norm": 0.18422532081604004,
      "learning_rate": 4.9824950827351894e-05,
      "loss": 0.5262,
      "step": 7031
    },
    {
      "epoch": 1.4455750847980267,
      "grad_norm": 0.19110561907291412,
      "learning_rate": 4.981502018083295e-05,
      "loss": 0.5546,
      "step": 7032
    },
    {
      "epoch": 1.445780655771405,
      "grad_norm": 0.18570828437805176,
      "learning_rate": 4.980508929710045e-05,
      "loss": 0.5493,
      "step": 7033
    },
    {
      "epoch": 1.4459862267447836,
      "grad_norm": 0.19072416424751282,
      "learning_rate": 4.9795158176643665e-05,
      "loss": 0.5656,
      "step": 7034
    },
    {
      "epoch": 1.4461917977181622,
      "grad_norm": 0.18956297636032104,
      "learning_rate": 4.978522681995186e-05,
      "loss": 0.5594,
      "step": 7035
    },
    {
      "epoch": 1.4463973686915408,
      "grad_norm": 0.1876407116651535,
      "learning_rate": 4.977529522751429e-05,
      "loss": 0.5668,
      "step": 7036
    },
    {
      "epoch": 1.4466029396649194,
      "grad_norm": 0.1943429410457611,
      "learning_rate": 4.976536339982024e-05,
      "loss": 0.5389,
      "step": 7037
    },
    {
      "epoch": 1.4468085106382977,
      "grad_norm": 0.19916300475597382,
      "learning_rate": 4.975543133735901e-05,
      "loss": 0.5564,
      "step": 7038
    },
    {
      "epoch": 1.4470140816116763,
      "grad_norm": 0.19892625510692596,
      "learning_rate": 4.974549904061991e-05,
      "loss": 0.5782,
      "step": 7039
    },
    {
      "epoch": 1.447219652585055,
      "grad_norm": 0.19441033899784088,
      "learning_rate": 4.9735566510092245e-05,
      "loss": 0.5703,
      "step": 7040
    },
    {
      "epoch": 1.4474252235584335,
      "grad_norm": 0.1984698474407196,
      "learning_rate": 4.972563374626536e-05,
      "loss": 0.5614,
      "step": 7041
    },
    {
      "epoch": 1.447630794531812,
      "grad_norm": 0.16778507828712463,
      "learning_rate": 4.971570074962859e-05,
      "loss": 0.5299,
      "step": 7042
    },
    {
      "epoch": 1.4478363655051907,
      "grad_norm": 0.14573578536510468,
      "learning_rate": 4.970576752067128e-05,
      "loss": 0.5233,
      "step": 7043
    },
    {
      "epoch": 1.4480419364785693,
      "grad_norm": 0.14844007790088654,
      "learning_rate": 4.9695834059882796e-05,
      "loss": 0.5304,
      "step": 7044
    },
    {
      "epoch": 1.4482475074519479,
      "grad_norm": 0.19099220633506775,
      "learning_rate": 4.968590036775251e-05,
      "loss": 0.5603,
      "step": 7045
    },
    {
      "epoch": 1.4484530784253264,
      "grad_norm": 0.16473321616649628,
      "learning_rate": 4.967596644476983e-05,
      "loss": 0.5134,
      "step": 7046
    },
    {
      "epoch": 1.448658649398705,
      "grad_norm": 0.17135196924209595,
      "learning_rate": 4.966603229142412e-05,
      "loss": 0.5579,
      "step": 7047
    },
    {
      "epoch": 1.4488642203720834,
      "grad_norm": 0.19533687829971313,
      "learning_rate": 4.9656097908204825e-05,
      "loss": 0.5617,
      "step": 7048
    },
    {
      "epoch": 1.449069791345462,
      "grad_norm": 0.1876286268234253,
      "learning_rate": 4.964616329560136e-05,
      "loss": 0.554,
      "step": 7049
    },
    {
      "epoch": 1.4492753623188406,
      "grad_norm": 0.16037873923778534,
      "learning_rate": 4.9636228454103126e-05,
      "loss": 0.529,
      "step": 7050
    },
    {
      "epoch": 1.4494809332922192,
      "grad_norm": 0.1680610179901123,
      "learning_rate": 4.962629338419958e-05,
      "loss": 0.5376,
      "step": 7051
    },
    {
      "epoch": 1.4496865042655978,
      "grad_norm": 0.1924995481967926,
      "learning_rate": 4.9616358086380196e-05,
      "loss": 0.5543,
      "step": 7052
    },
    {
      "epoch": 1.4498920752389761,
      "grad_norm": 0.1638346016407013,
      "learning_rate": 4.9606422561134425e-05,
      "loss": 0.5091,
      "step": 7053
    },
    {
      "epoch": 1.4500976462123547,
      "grad_norm": 0.16642382740974426,
      "learning_rate": 4.9596486808951735e-05,
      "loss": 0.5628,
      "step": 7054
    },
    {
      "epoch": 1.4503032171857333,
      "grad_norm": 0.16534394025802612,
      "learning_rate": 4.958655083032164e-05,
      "loss": 0.5297,
      "step": 7055
    },
    {
      "epoch": 1.4505087881591119,
      "grad_norm": 0.16639864444732666,
      "learning_rate": 4.95766146257336e-05,
      "loss": 0.5561,
      "step": 7056
    },
    {
      "epoch": 1.4507143591324905,
      "grad_norm": 0.190561905503273,
      "learning_rate": 4.956667819567717e-05,
      "loss": 0.5604,
      "step": 7057
    },
    {
      "epoch": 1.450919930105869,
      "grad_norm": 0.19295108318328857,
      "learning_rate": 4.955674154064182e-05,
      "loss": 0.5524,
      "step": 7058
    },
    {
      "epoch": 1.4511255010792476,
      "grad_norm": 0.19699627161026,
      "learning_rate": 4.9546804661117146e-05,
      "loss": 0.5482,
      "step": 7059
    },
    {
      "epoch": 1.4513310720526262,
      "grad_norm": 0.18727873265743256,
      "learning_rate": 4.953686755759265e-05,
      "loss": 0.5565,
      "step": 7060
    },
    {
      "epoch": 1.4515366430260048,
      "grad_norm": 0.19223269820213318,
      "learning_rate": 4.952693023055788e-05,
      "loss": 0.5661,
      "step": 7061
    },
    {
      "epoch": 1.4517422139993834,
      "grad_norm": 0.19679668545722961,
      "learning_rate": 4.951699268050243e-05,
      "loss": 0.5632,
      "step": 7062
    },
    {
      "epoch": 1.4519477849727618,
      "grad_norm": 0.19206634163856506,
      "learning_rate": 4.9507054907915866e-05,
      "loss": 0.5459,
      "step": 7063
    },
    {
      "epoch": 1.4521533559461404,
      "grad_norm": 0.19624993205070496,
      "learning_rate": 4.949711691328777e-05,
      "loss": 0.5741,
      "step": 7064
    },
    {
      "epoch": 1.452358926919519,
      "grad_norm": 0.19353879988193512,
      "learning_rate": 4.948717869710773e-05,
      "loss": 0.5228,
      "step": 7065
    },
    {
      "epoch": 1.4525644978928975,
      "grad_norm": 0.1924706995487213,
      "learning_rate": 4.947724025986538e-05,
      "loss": 0.5716,
      "step": 7066
    },
    {
      "epoch": 1.4527700688662761,
      "grad_norm": 0.19107024371623993,
      "learning_rate": 4.946730160205033e-05,
      "loss": 0.555,
      "step": 7067
    },
    {
      "epoch": 1.4529756398396547,
      "grad_norm": 0.18900389969348907,
      "learning_rate": 4.94573627241522e-05,
      "loss": 0.5505,
      "step": 7068
    },
    {
      "epoch": 1.453181210813033,
      "grad_norm": 0.16496512293815613,
      "learning_rate": 4.944742362666065e-05,
      "loss": 0.5272,
      "step": 7069
    },
    {
      "epoch": 1.4533867817864117,
      "grad_norm": 0.16446129977703094,
      "learning_rate": 4.9437484310065326e-05,
      "loss": 0.5483,
      "step": 7070
    },
    {
      "epoch": 1.4535923527597903,
      "grad_norm": 0.1935243159532547,
      "learning_rate": 4.942754477485588e-05,
      "loss": 0.5516,
      "step": 7071
    },
    {
      "epoch": 1.4537979237331689,
      "grad_norm": 0.1573350727558136,
      "learning_rate": 4.9417605021522016e-05,
      "loss": 0.5269,
      "step": 7072
    },
    {
      "epoch": 1.4540034947065474,
      "grad_norm": 0.1570722460746765,
      "learning_rate": 4.9407665050553395e-05,
      "loss": 0.5599,
      "step": 7073
    },
    {
      "epoch": 1.454209065679926,
      "grad_norm": 0.19235976040363312,
      "learning_rate": 4.9397724862439726e-05,
      "loss": 0.5488,
      "step": 7074
    },
    {
      "epoch": 1.4544146366533046,
      "grad_norm": 0.19353123009204865,
      "learning_rate": 4.938778445767069e-05,
      "loss": 0.5436,
      "step": 7075
    },
    {
      "epoch": 1.4546202076266832,
      "grad_norm": 0.192392498254776,
      "learning_rate": 4.9377843836736026e-05,
      "loss": 0.547,
      "step": 7076
    },
    {
      "epoch": 1.4548257786000618,
      "grad_norm": 0.1857522875070572,
      "learning_rate": 4.936790300012545e-05,
      "loss": 0.5477,
      "step": 7077
    },
    {
      "epoch": 1.4550313495734402,
      "grad_norm": 0.20272956788539886,
      "learning_rate": 4.935796194832872e-05,
      "loss": 0.5526,
      "step": 7078
    },
    {
      "epoch": 1.4552369205468187,
      "grad_norm": 0.1533660888671875,
      "learning_rate": 4.9348020681835573e-05,
      "loss": 0.5079,
      "step": 7079
    },
    {
      "epoch": 1.4554424915201973,
      "grad_norm": 0.15885986387729645,
      "learning_rate": 4.9338079201135777e-05,
      "loss": 0.544,
      "step": 7080
    },
    {
      "epoch": 1.455648062493576,
      "grad_norm": 0.19332925975322723,
      "learning_rate": 4.932813750671909e-05,
      "loss": 0.5493,
      "step": 7081
    },
    {
      "epoch": 1.4558536334669545,
      "grad_norm": 0.16609343886375427,
      "learning_rate": 4.931819559907529e-05,
      "loss": 0.5295,
      "step": 7082
    },
    {
      "epoch": 1.456059204440333,
      "grad_norm": 0.12420736253261566,
      "learning_rate": 4.930825347869418e-05,
      "loss": 0.5104,
      "step": 7083
    },
    {
      "epoch": 1.4562647754137115,
      "grad_norm": 0.12772247195243835,
      "learning_rate": 4.9298311146065565e-05,
      "loss": 0.5214,
      "step": 7084
    },
    {
      "epoch": 1.45647034638709,
      "grad_norm": 0.1771061271429062,
      "learning_rate": 4.9288368601679235e-05,
      "loss": 0.5358,
      "step": 7085
    },
    {
      "epoch": 1.4566759173604686,
      "grad_norm": 0.20758508145809174,
      "learning_rate": 4.9278425846025047e-05,
      "loss": 0.5321,
      "step": 7086
    },
    {
      "epoch": 1.4568814883338472,
      "grad_norm": 0.16325919330120087,
      "learning_rate": 4.926848287959281e-05,
      "loss": 0.5155,
      "step": 7087
    },
    {
      "epoch": 1.4570870593072258,
      "grad_norm": 0.15556760132312775,
      "learning_rate": 4.925853970287236e-05,
      "loss": 0.5374,
      "step": 7088
    },
    {
      "epoch": 1.4572926302806044,
      "grad_norm": 0.19319914281368256,
      "learning_rate": 4.924859631635356e-05,
      "loss": 0.5403,
      "step": 7089
    },
    {
      "epoch": 1.457498201253983,
      "grad_norm": 0.19514033198356628,
      "learning_rate": 4.9238652720526295e-05,
      "loss": 0.5609,
      "step": 7090
    },
    {
      "epoch": 1.4577037722273616,
      "grad_norm": 0.18153122067451477,
      "learning_rate": 4.922870891588042e-05,
      "loss": 0.5313,
      "step": 7091
    },
    {
      "epoch": 1.4579093432007402,
      "grad_norm": 0.19177407026290894,
      "learning_rate": 4.9218764902905814e-05,
      "loss": 0.5595,
      "step": 7092
    },
    {
      "epoch": 1.4581149141741185,
      "grad_norm": 0.18836280703544617,
      "learning_rate": 4.920882068209238e-05,
      "loss": 0.544,
      "step": 7093
    },
    {
      "epoch": 1.4583204851474971,
      "grad_norm": 0.19115997850894928,
      "learning_rate": 4.919887625393003e-05,
      "loss": 0.5544,
      "step": 7094
    },
    {
      "epoch": 1.4585260561208757,
      "grad_norm": 0.1862732619047165,
      "learning_rate": 4.918893161890867e-05,
      "loss": 0.5515,
      "step": 7095
    },
    {
      "epoch": 1.4587316270942543,
      "grad_norm": 0.15882770717144012,
      "learning_rate": 4.917898677751822e-05,
      "loss": 0.5248,
      "step": 7096
    },
    {
      "epoch": 1.458937198067633,
      "grad_norm": 0.16427573561668396,
      "learning_rate": 4.9169041730248634e-05,
      "loss": 0.5654,
      "step": 7097
    },
    {
      "epoch": 1.4591427690410115,
      "grad_norm": 0.19142089784145355,
      "learning_rate": 4.915909647758984e-05,
      "loss": 0.5522,
      "step": 7098
    },
    {
      "epoch": 1.4593483400143898,
      "grad_norm": 0.19446474313735962,
      "learning_rate": 4.914915102003181e-05,
      "loss": 0.5274,
      "step": 7099
    },
    {
      "epoch": 1.4595539109877684,
      "grad_norm": 0.1596178114414215,
      "learning_rate": 4.9139205358064495e-05,
      "loss": 0.5138,
      "step": 7100
    },
    {
      "epoch": 1.459759481961147,
      "grad_norm": 0.1602422297000885,
      "learning_rate": 4.912925949217788e-05,
      "loss": 0.5237,
      "step": 7101
    },
    {
      "epoch": 1.4599650529345256,
      "grad_norm": 0.19484317302703857,
      "learning_rate": 4.911931342286195e-05,
      "loss": 0.5393,
      "step": 7102
    },
    {
      "epoch": 1.4601706239079042,
      "grad_norm": 0.2035979926586151,
      "learning_rate": 4.91093671506067e-05,
      "loss": 0.5555,
      "step": 7103
    },
    {
      "epoch": 1.4603761948812828,
      "grad_norm": 0.19783945381641388,
      "learning_rate": 4.909942067590215e-05,
      "loss": 0.5507,
      "step": 7104
    },
    {
      "epoch": 1.4605817658546614,
      "grad_norm": 0.19101816415786743,
      "learning_rate": 4.9089473999238294e-05,
      "loss": 0.5457,
      "step": 7105
    },
    {
      "epoch": 1.46078733682804,
      "grad_norm": 0.18535058200359344,
      "learning_rate": 4.907952712110516e-05,
      "loss": 0.5209,
      "step": 7106
    },
    {
      "epoch": 1.4609929078014185,
      "grad_norm": 0.1839088499546051,
      "learning_rate": 4.906958004199281e-05,
      "loss": 0.5424,
      "step": 7107
    },
    {
      "epoch": 1.461198478774797,
      "grad_norm": 0.18688786029815674,
      "learning_rate": 4.905963276239127e-05,
      "loss": 0.5383,
      "step": 7108
    },
    {
      "epoch": 1.4614040497481755,
      "grad_norm": 0.19204580783843994,
      "learning_rate": 4.904968528279058e-05,
      "loss": 0.5667,
      "step": 7109
    },
    {
      "epoch": 1.461609620721554,
      "grad_norm": 0.19083940982818604,
      "learning_rate": 4.903973760368084e-05,
      "loss": 0.5628,
      "step": 7110
    },
    {
      "epoch": 1.4618151916949327,
      "grad_norm": 0.1922621876001358,
      "learning_rate": 4.9029789725552105e-05,
      "loss": 0.536,
      "step": 7111
    },
    {
      "epoch": 1.4620207626683113,
      "grad_norm": 0.19811585545539856,
      "learning_rate": 4.901984164889447e-05,
      "loss": 0.571,
      "step": 7112
    },
    {
      "epoch": 1.4622263336416899,
      "grad_norm": 0.1963101178407669,
      "learning_rate": 4.9009893374198015e-05,
      "loss": 0.568,
      "step": 7113
    },
    {
      "epoch": 1.4624319046150682,
      "grad_norm": 0.19826072454452515,
      "learning_rate": 4.899994490195286e-05,
      "loss": 0.541,
      "step": 7114
    },
    {
      "epoch": 1.4626374755884468,
      "grad_norm": 0.19222994148731232,
      "learning_rate": 4.898999623264913e-05,
      "loss": 0.5699,
      "step": 7115
    },
    {
      "epoch": 1.4628430465618254,
      "grad_norm": 0.19945533573627472,
      "learning_rate": 4.898004736677692e-05,
      "loss": 0.5663,
      "step": 7116
    },
    {
      "epoch": 1.463048617535204,
      "grad_norm": 0.18743856251239777,
      "learning_rate": 4.8970098304826384e-05,
      "loss": 0.5423,
      "step": 7117
    },
    {
      "epoch": 1.4632541885085826,
      "grad_norm": 0.1742721050977707,
      "learning_rate": 4.896014904728766e-05,
      "loss": 0.5273,
      "step": 7118
    },
    {
      "epoch": 1.4634597594819612,
      "grad_norm": 0.15842121839523315,
      "learning_rate": 4.895019959465091e-05,
      "loss": 0.5392,
      "step": 7119
    },
    {
      "epoch": 1.4636653304553398,
      "grad_norm": 0.1904791295528412,
      "learning_rate": 4.894024994740627e-05,
      "loss": 0.565,
      "step": 7120
    },
    {
      "epoch": 1.4638709014287183,
      "grad_norm": 0.18996872007846832,
      "learning_rate": 4.893030010604393e-05,
      "loss": 0.5624,
      "step": 7121
    },
    {
      "epoch": 1.464076472402097,
      "grad_norm": 0.18377164006233215,
      "learning_rate": 4.89203500710541e-05,
      "loss": 0.5628,
      "step": 7122
    },
    {
      "epoch": 1.4642820433754755,
      "grad_norm": 0.19251424074172974,
      "learning_rate": 4.891039984292693e-05,
      "loss": 0.5489,
      "step": 7123
    },
    {
      "epoch": 1.4644876143488539,
      "grad_norm": 0.1817564070224762,
      "learning_rate": 4.890044942215263e-05,
      "loss": 0.5592,
      "step": 7124
    },
    {
      "epoch": 1.4646931853222325,
      "grad_norm": 0.1885865181684494,
      "learning_rate": 4.8890498809221434e-05,
      "loss": 0.5447,
      "step": 7125
    },
    {
      "epoch": 1.464898756295611,
      "grad_norm": 0.19473087787628174,
      "learning_rate": 4.8880548004623545e-05,
      "loss": 0.5545,
      "step": 7126
    },
    {
      "epoch": 1.4651043272689896,
      "grad_norm": 0.18976017832756042,
      "learning_rate": 4.8870597008849175e-05,
      "loss": 0.5323,
      "step": 7127
    },
    {
      "epoch": 1.4653098982423682,
      "grad_norm": 0.1930120289325714,
      "learning_rate": 4.88606458223886e-05,
      "loss": 0.5459,
      "step": 7128
    },
    {
      "epoch": 1.4655154692157466,
      "grad_norm": 0.18661560118198395,
      "learning_rate": 4.885069444573205e-05,
      "loss": 0.5345,
      "step": 7129
    },
    {
      "epoch": 1.4657210401891252,
      "grad_norm": 0.1941232681274414,
      "learning_rate": 4.884074287936977e-05,
      "loss": 0.5289,
      "step": 7130
    },
    {
      "epoch": 1.4659266111625038,
      "grad_norm": 0.19508835673332214,
      "learning_rate": 4.883079112379204e-05,
      "loss": 0.5421,
      "step": 7131
    },
    {
      "epoch": 1.4661321821358824,
      "grad_norm": 0.200748473405838,
      "learning_rate": 4.882083917948914e-05,
      "loss": 0.5602,
      "step": 7132
    },
    {
      "epoch": 1.466337753109261,
      "grad_norm": 0.19630691409111023,
      "learning_rate": 4.8810887046951356e-05,
      "loss": 0.5469,
      "step": 7133
    },
    {
      "epoch": 1.4665433240826395,
      "grad_norm": 0.18631185591220856,
      "learning_rate": 4.880093472666897e-05,
      "loss": 0.5349,
      "step": 7134
    },
    {
      "epoch": 1.4667488950560181,
      "grad_norm": 0.20446190237998962,
      "learning_rate": 4.879098221913231e-05,
      "loss": 0.5395,
      "step": 7135
    },
    {
      "epoch": 1.4669544660293967,
      "grad_norm": 0.19369782507419586,
      "learning_rate": 4.8781029524831676e-05,
      "loss": 0.548,
      "step": 7136
    },
    {
      "epoch": 1.4671600370027753,
      "grad_norm": 0.19022773206233978,
      "learning_rate": 4.8771076644257365e-05,
      "loss": 0.5499,
      "step": 7137
    },
    {
      "epoch": 1.467365607976154,
      "grad_norm": 0.19664426147937775,
      "learning_rate": 4.876112357789977e-05,
      "loss": 0.5629,
      "step": 7138
    },
    {
      "epoch": 1.4675711789495323,
      "grad_norm": 0.19032470881938934,
      "learning_rate": 4.875117032624917e-05,
      "loss": 0.546,
      "step": 7139
    },
    {
      "epoch": 1.4677767499229109,
      "grad_norm": 0.18640637397766113,
      "learning_rate": 4.874121688979595e-05,
      "loss": 0.5317,
      "step": 7140
    },
    {
      "epoch": 1.4679823208962894,
      "grad_norm": 0.19098687171936035,
      "learning_rate": 4.873126326903045e-05,
      "loss": 0.5494,
      "step": 7141
    },
    {
      "epoch": 1.468187891869668,
      "grad_norm": 0.19771692156791687,
      "learning_rate": 4.872130946444305e-05,
      "loss": 0.5562,
      "step": 7142
    },
    {
      "epoch": 1.4683934628430466,
      "grad_norm": 0.18976187705993652,
      "learning_rate": 4.871135547652414e-05,
      "loss": 0.5607,
      "step": 7143
    },
    {
      "epoch": 1.4685990338164252,
      "grad_norm": 0.19151365756988525,
      "learning_rate": 4.870140130576408e-05,
      "loss": 0.5471,
      "step": 7144
    },
    {
      "epoch": 1.4688046047898036,
      "grad_norm": 0.19620567560195923,
      "learning_rate": 4.869144695265328e-05,
      "loss": 0.562,
      "step": 7145
    },
    {
      "epoch": 1.4690101757631822,
      "grad_norm": 0.19159796833992004,
      "learning_rate": 4.8681492417682154e-05,
      "loss": 0.5638,
      "step": 7146
    },
    {
      "epoch": 1.4692157467365607,
      "grad_norm": 0.20116734504699707,
      "learning_rate": 4.867153770134108e-05,
      "loss": 0.5677,
      "step": 7147
    },
    {
      "epoch": 1.4694213177099393,
      "grad_norm": 0.19330163300037384,
      "learning_rate": 4.866158280412053e-05,
      "loss": 0.5546,
      "step": 7148
    },
    {
      "epoch": 1.469626888683318,
      "grad_norm": 0.18877775967121124,
      "learning_rate": 4.86516277265109e-05,
      "loss": 0.559,
      "step": 7149
    },
    {
      "epoch": 1.4698324596566965,
      "grad_norm": 0.1901031881570816,
      "learning_rate": 4.864167246900265e-05,
      "loss": 0.5388,
      "step": 7150
    },
    {
      "epoch": 1.470038030630075,
      "grad_norm": 0.18822161853313446,
      "learning_rate": 4.8631717032086195e-05,
      "loss": 0.5466,
      "step": 7151
    },
    {
      "epoch": 1.4702436016034537,
      "grad_norm": 0.16988466680049896,
      "learning_rate": 4.862176141625203e-05,
      "loss": 0.5347,
      "step": 7152
    },
    {
      "epoch": 1.4704491725768323,
      "grad_norm": 0.12935671210289001,
      "learning_rate": 4.86118056219906e-05,
      "loss": 0.5038,
      "step": 7153
    },
    {
      "epoch": 1.4706547435502106,
      "grad_norm": 0.16515877842903137,
      "learning_rate": 4.860184964979239e-05,
      "loss": 0.5383,
      "step": 7154
    },
    {
      "epoch": 1.4708603145235892,
      "grad_norm": 0.2031169980764389,
      "learning_rate": 4.859189350014789e-05,
      "loss": 0.558,
      "step": 7155
    },
    {
      "epoch": 1.4710658854969678,
      "grad_norm": 0.1971338540315628,
      "learning_rate": 4.858193717354759e-05,
      "loss": 0.5552,
      "step": 7156
    },
    {
      "epoch": 1.4712714564703464,
      "grad_norm": 0.18545454740524292,
      "learning_rate": 4.857198067048199e-05,
      "loss": 0.5499,
      "step": 7157
    },
    {
      "epoch": 1.471477027443725,
      "grad_norm": 0.18908904492855072,
      "learning_rate": 4.856202399144157e-05,
      "loss": 0.5331,
      "step": 7158
    },
    {
      "epoch": 1.4716825984171036,
      "grad_norm": 0.18228811025619507,
      "learning_rate": 4.855206713691691e-05,
      "loss": 0.5181,
      "step": 7159
    },
    {
      "epoch": 1.471888169390482,
      "grad_norm": 0.1866607964038849,
      "learning_rate": 4.8542110107398483e-05,
      "loss": 0.5157,
      "step": 7160
    },
    {
      "epoch": 1.4720937403638605,
      "grad_norm": 0.19502104818820953,
      "learning_rate": 4.853215290337685e-05,
      "loss": 0.5462,
      "step": 7161
    },
    {
      "epoch": 1.4722993113372391,
      "grad_norm": 0.16694171726703644,
      "learning_rate": 4.852219552534256e-05,
      "loss": 0.5123,
      "step": 7162
    },
    {
      "epoch": 1.4725048823106177,
      "grad_norm": 0.1643698364496231,
      "learning_rate": 4.851223797378614e-05,
      "loss": 0.5402,
      "step": 7163
    },
    {
      "epoch": 1.4727104532839963,
      "grad_norm": 0.20267751812934875,
      "learning_rate": 4.85022802491982e-05,
      "loss": 0.5493,
      "step": 7164
    },
    {
      "epoch": 1.472916024257375,
      "grad_norm": 0.19984979927539825,
      "learning_rate": 4.849232235206927e-05,
      "loss": 0.5387,
      "step": 7165
    },
    {
      "epoch": 1.4731215952307535,
      "grad_norm": 0.19350376725196838,
      "learning_rate": 4.848236428288993e-05,
      "loss": 0.5465,
      "step": 7166
    },
    {
      "epoch": 1.473327166204132,
      "grad_norm": 0.2067371904850006,
      "learning_rate": 4.84724060421508e-05,
      "loss": 0.5688,
      "step": 7167
    },
    {
      "epoch": 1.4735327371775107,
      "grad_norm": 0.20047098398208618,
      "learning_rate": 4.846244763034243e-05,
      "loss": 0.5426,
      "step": 7168
    },
    {
      "epoch": 1.473738308150889,
      "grad_norm": 0.1930703967809677,
      "learning_rate": 4.845248904795547e-05,
      "loss": 0.5556,
      "step": 7169
    },
    {
      "epoch": 1.4739438791242676,
      "grad_norm": 0.19122304022312164,
      "learning_rate": 4.8442530295480496e-05,
      "loss": 0.5323,
      "step": 7170
    },
    {
      "epoch": 1.4741494500976462,
      "grad_norm": 0.1875450760126114,
      "learning_rate": 4.843257137340816e-05,
      "loss": 0.519,
      "step": 7171
    },
    {
      "epoch": 1.4743550210710248,
      "grad_norm": 0.18695366382598877,
      "learning_rate": 4.842261228222906e-05,
      "loss": 0.538,
      "step": 7172
    },
    {
      "epoch": 1.4745605920444034,
      "grad_norm": 0.19884580373764038,
      "learning_rate": 4.841265302243386e-05,
      "loss": 0.5696,
      "step": 7173
    },
    {
      "epoch": 1.474766163017782,
      "grad_norm": 0.19241276383399963,
      "learning_rate": 4.840269359451319e-05,
      "loss": 0.5595,
      "step": 7174
    },
    {
      "epoch": 1.4749717339911603,
      "grad_norm": 0.16710297763347626,
      "learning_rate": 4.839273399895772e-05,
      "loss": 0.5195,
      "step": 7175
    },
    {
      "epoch": 1.475177304964539,
      "grad_norm": 0.15979520976543427,
      "learning_rate": 4.8382774236258085e-05,
      "loss": 0.5616,
      "step": 7176
    },
    {
      "epoch": 1.4753828759379175,
      "grad_norm": 0.2003268003463745,
      "learning_rate": 4.8372814306904984e-05,
      "loss": 0.5718,
      "step": 7177
    },
    {
      "epoch": 1.475588446911296,
      "grad_norm": 0.18857726454734802,
      "learning_rate": 4.83628542113891e-05,
      "loss": 0.5305,
      "step": 7178
    },
    {
      "epoch": 1.4757940178846747,
      "grad_norm": 0.15321624279022217,
      "learning_rate": 4.8352893950201096e-05,
      "loss": 0.5213,
      "step": 7179
    },
    {
      "epoch": 1.4759995888580533,
      "grad_norm": 0.15973275899887085,
      "learning_rate": 4.834293352383168e-05,
      "loss": 0.5575,
      "step": 7180
    },
    {
      "epoch": 1.4762051598314319,
      "grad_norm": 0.18778233230113983,
      "learning_rate": 4.8332972932771556e-05,
      "loss": 0.5239,
      "step": 7181
    },
    {
      "epoch": 1.4764107308048104,
      "grad_norm": 0.15525855123996735,
      "learning_rate": 4.832301217751142e-05,
      "loss": 0.4881,
      "step": 7182
    },
    {
      "epoch": 1.476616301778189,
      "grad_norm": 0.15355351567268372,
      "learning_rate": 4.8313051258542024e-05,
      "loss": 0.5315,
      "step": 7183
    },
    {
      "epoch": 1.4768218727515674,
      "grad_norm": 0.2030985951423645,
      "learning_rate": 4.830309017635407e-05,
      "loss": 0.5901,
      "step": 7184
    },
    {
      "epoch": 1.477027443724946,
      "grad_norm": 0.19170239567756653,
      "learning_rate": 4.82931289314383e-05,
      "loss": 0.5517,
      "step": 7185
    },
    {
      "epoch": 1.4772330146983246,
      "grad_norm": 0.19333000481128693,
      "learning_rate": 4.828316752428545e-05,
      "loss": 0.5547,
      "step": 7186
    },
    {
      "epoch": 1.4774385856717032,
      "grad_norm": 0.19361145794391632,
      "learning_rate": 4.82732059553863e-05,
      "loss": 0.5518,
      "step": 7187
    },
    {
      "epoch": 1.4776441566450818,
      "grad_norm": 0.16968531906604767,
      "learning_rate": 4.8263244225231586e-05,
      "loss": 0.5055,
      "step": 7188
    },
    {
      "epoch": 1.4778497276184603,
      "grad_norm": 0.1647455245256424,
      "learning_rate": 4.825328233431207e-05,
      "loss": 0.5489,
      "step": 7189
    },
    {
      "epoch": 1.4780552985918387,
      "grad_norm": 0.18998976051807404,
      "learning_rate": 4.824332028311856e-05,
      "loss": 0.5302,
      "step": 7190
    },
    {
      "epoch": 1.4782608695652173,
      "grad_norm": 0.18618905544281006,
      "learning_rate": 4.8233358072141806e-05,
      "loss": 0.5217,
      "step": 7191
    },
    {
      "epoch": 1.4784664405385959,
      "grad_norm": 0.19258539378643036,
      "learning_rate": 4.822339570187261e-05,
      "loss": 0.5551,
      "step": 7192
    },
    {
      "epoch": 1.4786720115119745,
      "grad_norm": 0.1874276101589203,
      "learning_rate": 4.821343317280179e-05,
      "loss": 0.5409,
      "step": 7193
    },
    {
      "epoch": 1.478877582485353,
      "grad_norm": 0.18570971488952637,
      "learning_rate": 4.8203470485420126e-05,
      "loss": 0.5524,
      "step": 7194
    },
    {
      "epoch": 1.4790831534587316,
      "grad_norm": 0.19946832954883575,
      "learning_rate": 4.819350764021844e-05,
      "loss": 0.5618,
      "step": 7195
    },
    {
      "epoch": 1.4792887244321102,
      "grad_norm": 0.1732860952615738,
      "learning_rate": 4.818354463768756e-05,
      "loss": 0.5354,
      "step": 7196
    },
    {
      "epoch": 1.4794942954054888,
      "grad_norm": 0.16083048284053802,
      "learning_rate": 4.817358147831831e-05,
      "loss": 0.539,
      "step": 7197
    },
    {
      "epoch": 1.4796998663788674,
      "grad_norm": 0.1897859424352646,
      "learning_rate": 4.816361816260155e-05,
      "loss": 0.54,
      "step": 7198
    },
    {
      "epoch": 1.479905437352246,
      "grad_norm": 0.1890067458152771,
      "learning_rate": 4.815365469102809e-05,
      "loss": 0.5339,
      "step": 7199
    },
    {
      "epoch": 1.4801110083256244,
      "grad_norm": 0.19852851331233978,
      "learning_rate": 4.8143691064088823e-05,
      "loss": 0.555,
      "step": 7200
    },
    {
      "epoch": 1.480316579299003,
      "grad_norm": 0.1849977821111679,
      "learning_rate": 4.813372728227459e-05,
      "loss": 0.5255,
      "step": 7201
    },
    {
      "epoch": 1.4805221502723815,
      "grad_norm": 0.1914818435907364,
      "learning_rate": 4.8123763346076256e-05,
      "loss": 0.5525,
      "step": 7202
    },
    {
      "epoch": 1.4807277212457601,
      "grad_norm": 0.2014429122209549,
      "learning_rate": 4.811379925598469e-05,
      "loss": 0.5693,
      "step": 7203
    },
    {
      "epoch": 1.4809332922191387,
      "grad_norm": 0.1984141618013382,
      "learning_rate": 4.81038350124908e-05,
      "loss": 0.5566,
      "step": 7204
    },
    {
      "epoch": 1.481138863192517,
      "grad_norm": 0.19716762006282806,
      "learning_rate": 4.809387061608548e-05,
      "loss": 0.5513,
      "step": 7205
    },
    {
      "epoch": 1.4813444341658957,
      "grad_norm": 0.19718822836875916,
      "learning_rate": 4.8083906067259585e-05,
      "loss": 0.5376,
      "step": 7206
    },
    {
      "epoch": 1.4815500051392743,
      "grad_norm": 0.1910613626241684,
      "learning_rate": 4.807394136650406e-05,
      "loss": 0.5604,
      "step": 7207
    },
    {
      "epoch": 1.4817555761126529,
      "grad_norm": 0.19918161630630493,
      "learning_rate": 4.806397651430983e-05,
      "loss": 0.549,
      "step": 7208
    },
    {
      "epoch": 1.4819611470860314,
      "grad_norm": 0.18760617077350616,
      "learning_rate": 4.805401151116778e-05,
      "loss": 0.5507,
      "step": 7209
    },
    {
      "epoch": 1.48216671805941,
      "grad_norm": 0.15669982135295868,
      "learning_rate": 4.804404635756886e-05,
      "loss": 0.5268,
      "step": 7210
    },
    {
      "epoch": 1.4823722890327886,
      "grad_norm": 0.16258768737316132,
      "learning_rate": 4.803408105400401e-05,
      "loss": 0.5557,
      "step": 7211
    },
    {
      "epoch": 1.4825778600061672,
      "grad_norm": 0.200164794921875,
      "learning_rate": 4.802411560096418e-05,
      "loss": 0.5652,
      "step": 7212
    },
    {
      "epoch": 1.4827834309795458,
      "grad_norm": 0.1986524760723114,
      "learning_rate": 4.801414999894028e-05,
      "loss": 0.5608,
      "step": 7213
    },
    {
      "epoch": 1.4829890019529244,
      "grad_norm": 0.15464936196804047,
      "learning_rate": 4.8004184248423325e-05,
      "loss": 0.519,
      "step": 7214
    },
    {
      "epoch": 1.4831945729263027,
      "grad_norm": 0.15096427500247955,
      "learning_rate": 4.799421834990424e-05,
      "loss": 0.5417,
      "step": 7215
    },
    {
      "epoch": 1.4834001438996813,
      "grad_norm": 0.15722674131393433,
      "learning_rate": 4.798425230387402e-05,
      "loss": 0.5158,
      "step": 7216
    },
    {
      "epoch": 1.48360571487306,
      "grad_norm": 0.15923316776752472,
      "learning_rate": 4.797428611082362e-05,
      "loss": 0.5495,
      "step": 7217
    },
    {
      "epoch": 1.4838112858464385,
      "grad_norm": 0.16226224601268768,
      "learning_rate": 4.796431977124405e-05,
      "loss": 0.5213,
      "step": 7218
    },
    {
      "epoch": 1.484016856819817,
      "grad_norm": 0.16145376861095428,
      "learning_rate": 4.7954353285626314e-05,
      "loss": 0.568,
      "step": 7219
    },
    {
      "epoch": 1.4842224277931955,
      "grad_norm": 0.15974651277065277,
      "learning_rate": 4.7944386654461385e-05,
      "loss": 0.512,
      "step": 7220
    },
    {
      "epoch": 1.484427998766574,
      "grad_norm": 0.15350697934627533,
      "learning_rate": 4.7934419878240296e-05,
      "loss": 0.5473,
      "step": 7221
    },
    {
      "epoch": 1.4846335697399526,
      "grad_norm": 0.19197656214237213,
      "learning_rate": 4.792445295745406e-05,
      "loss": 0.5461,
      "step": 7222
    },
    {
      "epoch": 1.4848391407133312,
      "grad_norm": 0.19040462374687195,
      "learning_rate": 4.7914485892593686e-05,
      "loss": 0.5372,
      "step": 7223
    },
    {
      "epoch": 1.4850447116867098,
      "grad_norm": 0.1572524458169937,
      "learning_rate": 4.790451868415021e-05,
      "loss": 0.5145,
      "step": 7224
    },
    {
      "epoch": 1.4852502826600884,
      "grad_norm": 0.15703527629375458,
      "learning_rate": 4.7894551332614686e-05,
      "loss": 0.5627,
      "step": 7225
    },
    {
      "epoch": 1.485455853633467,
      "grad_norm": 0.16500575840473175,
      "learning_rate": 4.788458383847816e-05,
      "loss": 0.5229,
      "step": 7226
    },
    {
      "epoch": 1.4856614246068456,
      "grad_norm": 0.16244147717952728,
      "learning_rate": 4.787461620223164e-05,
      "loss": 0.5392,
      "step": 7227
    },
    {
      "epoch": 1.4858669955802242,
      "grad_norm": 0.19701159000396729,
      "learning_rate": 4.786464842436623e-05,
      "loss": 0.5197,
      "step": 7228
    },
    {
      "epoch": 1.4860725665536028,
      "grad_norm": 0.18858790397644043,
      "learning_rate": 4.785468050537298e-05,
      "loss": 0.5707,
      "step": 7229
    },
    {
      "epoch": 1.4862781375269811,
      "grad_norm": 0.1888207048177719,
      "learning_rate": 4.784471244574295e-05,
      "loss": 0.5432,
      "step": 7230
    },
    {
      "epoch": 1.4864837085003597,
      "grad_norm": 0.19446338713169098,
      "learning_rate": 4.783474424596726e-05,
      "loss": 0.5676,
      "step": 7231
    },
    {
      "epoch": 1.4866892794737383,
      "grad_norm": 0.19412629306316376,
      "learning_rate": 4.782477590653696e-05,
      "loss": 0.5435,
      "step": 7232
    },
    {
      "epoch": 1.486894850447117,
      "grad_norm": 0.18198393285274506,
      "learning_rate": 4.781480742794316e-05,
      "loss": 0.5172,
      "step": 7233
    },
    {
      "epoch": 1.4871004214204955,
      "grad_norm": 0.2016136646270752,
      "learning_rate": 4.7804838810676935e-05,
      "loss": 0.5872,
      "step": 7234
    },
    {
      "epoch": 1.487305992393874,
      "grad_norm": 0.17606668174266815,
      "learning_rate": 4.779487005522943e-05,
      "loss": 0.5324,
      "step": 7235
    },
    {
      "epoch": 1.4875115633672524,
      "grad_norm": 0.16043418645858765,
      "learning_rate": 4.778490116209174e-05,
      "loss": 0.5447,
      "step": 7236
    },
    {
      "epoch": 1.487717134340631,
      "grad_norm": 0.19674460589885712,
      "learning_rate": 4.7774932131754975e-05,
      "loss": 0.5595,
      "step": 7237
    },
    {
      "epoch": 1.4879227053140096,
      "grad_norm": 0.2002599984407425,
      "learning_rate": 4.776496296471029e-05,
      "loss": 0.5289,
      "step": 7238
    },
    {
      "epoch": 1.4881282762873882,
      "grad_norm": 0.18798843026161194,
      "learning_rate": 4.775499366144878e-05,
      "loss": 0.5465,
      "step": 7239
    },
    {
      "epoch": 1.4883338472607668,
      "grad_norm": 0.18151499330997467,
      "learning_rate": 4.7745024222461626e-05,
      "loss": 0.5398,
      "step": 7240
    },
    {
      "epoch": 1.4885394182341454,
      "grad_norm": 0.16490262746810913,
      "learning_rate": 4.773505464823995e-05,
      "loss": 0.5314,
      "step": 7241
    },
    {
      "epoch": 1.488744989207524,
      "grad_norm": 0.16644752025604248,
      "learning_rate": 4.772508493927492e-05,
      "loss": 0.5573,
      "step": 7242
    },
    {
      "epoch": 1.4889505601809025,
      "grad_norm": 0.1932040899991989,
      "learning_rate": 4.77151150960577e-05,
      "loss": 0.5464,
      "step": 7243
    },
    {
      "epoch": 1.4891561311542811,
      "grad_norm": 0.19342085719108582,
      "learning_rate": 4.770514511907943e-05,
      "loss": 0.5528,
      "step": 7244
    },
    {
      "epoch": 1.4893617021276595,
      "grad_norm": 0.1693827509880066,
      "learning_rate": 4.7695175008831317e-05,
      "loss": 0.5318,
      "step": 7245
    },
    {
      "epoch": 1.489567273101038,
      "grad_norm": 0.15933051705360413,
      "learning_rate": 4.768520476580454e-05,
      "loss": 0.5436,
      "step": 7246
    },
    {
      "epoch": 1.4897728440744167,
      "grad_norm": 0.19581708312034607,
      "learning_rate": 4.767523439049026e-05,
      "loss": 0.5502,
      "step": 7247
    },
    {
      "epoch": 1.4899784150477953,
      "grad_norm": 0.1909896582365036,
      "learning_rate": 4.7665263883379685e-05,
      "loss": 0.5415,
      "step": 7248
    },
    {
      "epoch": 1.4901839860211739,
      "grad_norm": 0.1643315702676773,
      "learning_rate": 4.765529324496402e-05,
      "loss": 0.5078,
      "step": 7249
    },
    {
      "epoch": 1.4903895569945524,
      "grad_norm": 0.15782994031906128,
      "learning_rate": 4.764532247573446e-05,
      "loss": 0.5169,
      "step": 7250
    },
    {
      "epoch": 1.4905951279679308,
      "grad_norm": 0.16611091792583466,
      "learning_rate": 4.763535157618222e-05,
      "loss": 0.5207,
      "step": 7251
    },
    {
      "epoch": 1.4908006989413094,
      "grad_norm": 0.1263076364994049,
      "learning_rate": 4.7625380546798546e-05,
      "loss": 0.5362,
      "step": 7252
    },
    {
      "epoch": 1.491006269914688,
      "grad_norm": 0.16741037368774414,
      "learning_rate": 4.761540938807464e-05,
      "loss": 0.5364,
      "step": 7253
    },
    {
      "epoch": 1.4912118408880666,
      "grad_norm": 0.19533216953277588,
      "learning_rate": 4.760543810050174e-05,
      "loss": 0.5505,
      "step": 7254
    },
    {
      "epoch": 1.4914174118614452,
      "grad_norm": 0.19828902184963226,
      "learning_rate": 4.759546668457107e-05,
      "loss": 0.5722,
      "step": 7255
    },
    {
      "epoch": 1.4916229828348238,
      "grad_norm": 0.19037294387817383,
      "learning_rate": 4.7585495140773894e-05,
      "loss": 0.557,
      "step": 7256
    },
    {
      "epoch": 1.4918285538082023,
      "grad_norm": 0.1699882447719574,
      "learning_rate": 4.7575523469601464e-05,
      "loss": 0.5252,
      "step": 7257
    },
    {
      "epoch": 1.492034124781581,
      "grad_norm": 0.16292616724967957,
      "learning_rate": 4.7565551671545003e-05,
      "loss": 0.5557,
      "step": 7258
    },
    {
      "epoch": 1.4922396957549595,
      "grad_norm": 0.1898314654827118,
      "learning_rate": 4.755557974709584e-05,
      "loss": 0.5341,
      "step": 7259
    },
    {
      "epoch": 1.4924452667283379,
      "grad_norm": 0.18847902119159698,
      "learning_rate": 4.7545607696745186e-05,
      "loss": 0.557,
      "step": 7260
    },
    {
      "epoch": 1.4926508377017165,
      "grad_norm": 0.16627360880374908,
      "learning_rate": 4.753563552098433e-05,
      "loss": 0.5283,
      "step": 7261
    },
    {
      "epoch": 1.492856408675095,
      "grad_norm": 0.16107410192489624,
      "learning_rate": 4.752566322030457e-05,
      "loss": 0.5447,
      "step": 7262
    },
    {
      "epoch": 1.4930619796484736,
      "grad_norm": 0.16727054119110107,
      "learning_rate": 4.751569079519721e-05,
      "loss": 0.5214,
      "step": 7263
    },
    {
      "epoch": 1.4932675506218522,
      "grad_norm": 0.1620626598596573,
      "learning_rate": 4.75057182461535e-05,
      "loss": 0.5117,
      "step": 7264
    },
    {
      "epoch": 1.4934731215952308,
      "grad_norm": 0.1607995629310608,
      "learning_rate": 4.749574557366477e-05,
      "loss": 0.5112,
      "step": 7265
    },
    {
      "epoch": 1.4936786925686092,
      "grad_norm": 0.16359218955039978,
      "learning_rate": 4.748577277822232e-05,
      "loss": 0.5528,
      "step": 7266
    },
    {
      "epoch": 1.4938842635419878,
      "grad_norm": 0.19799359142780304,
      "learning_rate": 4.747579986031747e-05,
      "loss": 0.5505,
      "step": 7267
    },
    {
      "epoch": 1.4940898345153664,
      "grad_norm": 0.1984180063009262,
      "learning_rate": 4.746582682044153e-05,
      "loss": 0.5721,
      "step": 7268
    },
    {
      "epoch": 1.494295405488745,
      "grad_norm": 0.196151003241539,
      "learning_rate": 4.745585365908582e-05,
      "loss": 0.5405,
      "step": 7269
    },
    {
      "epoch": 1.4945009764621235,
      "grad_norm": 0.16846486926078796,
      "learning_rate": 4.744588037674169e-05,
      "loss": 0.5246,
      "step": 7270
    },
    {
      "epoch": 1.4947065474355021,
      "grad_norm": 0.16317616403102875,
      "learning_rate": 4.743590697390045e-05,
      "loss": 0.5584,
      "step": 7271
    },
    {
      "epoch": 1.4949121184088807,
      "grad_norm": 0.18906491994857788,
      "learning_rate": 4.7425933451053474e-05,
      "loss": 0.5638,
      "step": 7272
    },
    {
      "epoch": 1.4951176893822593,
      "grad_norm": 0.16003085672855377,
      "learning_rate": 4.7415959808692085e-05,
      "loss": 0.5194,
      "step": 7273
    },
    {
      "epoch": 1.495323260355638,
      "grad_norm": 0.12602053582668304,
      "learning_rate": 4.740598604730766e-05,
      "loss": 0.5273,
      "step": 7274
    },
    {
      "epoch": 1.4955288313290163,
      "grad_norm": 0.15797413885593414,
      "learning_rate": 4.7396012167391536e-05,
      "loss": 0.537,
      "step": 7275
    },
    {
      "epoch": 1.4957344023023948,
      "grad_norm": 0.19751828908920288,
      "learning_rate": 4.73860381694351e-05,
      "loss": 0.5497,
      "step": 7276
    },
    {
      "epoch": 1.4959399732757734,
      "grad_norm": 0.1944907009601593,
      "learning_rate": 4.7376064053929724e-05,
      "loss": 0.5645,
      "step": 7277
    },
    {
      "epoch": 1.496145544249152,
      "grad_norm": 0.18657876551151276,
      "learning_rate": 4.736608982136676e-05,
      "loss": 0.5405,
      "step": 7278
    },
    {
      "epoch": 1.4963511152225306,
      "grad_norm": 0.19843631982803345,
      "learning_rate": 4.735611547223761e-05,
      "loss": 0.5417,
      "step": 7279
    },
    {
      "epoch": 1.4965566861959092,
      "grad_norm": 0.19256174564361572,
      "learning_rate": 4.7346141007033676e-05,
      "loss": 0.541,
      "step": 7280
    },
    {
      "epoch": 1.4967622571692876,
      "grad_norm": 0.18709734082221985,
      "learning_rate": 4.733616642624634e-05,
      "loss": 0.539,
      "step": 7281
    },
    {
      "epoch": 1.4969678281426662,
      "grad_norm": 0.1940479278564453,
      "learning_rate": 4.732619173036699e-05,
      "loss": 0.5556,
      "step": 7282
    },
    {
      "epoch": 1.4971733991160447,
      "grad_norm": 0.2690550684928894,
      "learning_rate": 4.731621691988705e-05,
      "loss": 0.5554,
      "step": 7283
    },
    {
      "epoch": 1.4973789700894233,
      "grad_norm": 0.19769832491874695,
      "learning_rate": 4.730624199529793e-05,
      "loss": 0.5355,
      "step": 7284
    },
    {
      "epoch": 1.497584541062802,
      "grad_norm": 0.23557159304618835,
      "learning_rate": 4.729626695709105e-05,
      "loss": 0.5675,
      "step": 7285
    },
    {
      "epoch": 1.4977901120361805,
      "grad_norm": 0.19808165729045868,
      "learning_rate": 4.728629180575783e-05,
      "loss": 0.5494,
      "step": 7286
    },
    {
      "epoch": 1.497995683009559,
      "grad_norm": 0.19333380460739136,
      "learning_rate": 4.7276316541789694e-05,
      "loss": 0.5534,
      "step": 7287
    },
    {
      "epoch": 1.4982012539829377,
      "grad_norm": 0.185968816280365,
      "learning_rate": 4.726634116567809e-05,
      "loss": 0.5273,
      "step": 7288
    },
    {
      "epoch": 1.4984068249563163,
      "grad_norm": 0.19186194241046906,
      "learning_rate": 4.725636567791443e-05,
      "loss": 0.5485,
      "step": 7289
    },
    {
      "epoch": 1.4986123959296949,
      "grad_norm": 0.16538295149803162,
      "learning_rate": 4.7246390078990195e-05,
      "loss": 0.5292,
      "step": 7290
    },
    {
      "epoch": 1.4988179669030732,
      "grad_norm": 0.16167549788951874,
      "learning_rate": 4.723641436939683e-05,
      "loss": 0.5493,
      "step": 7291
    },
    {
      "epoch": 1.4990235378764518,
      "grad_norm": 0.19126403331756592,
      "learning_rate": 4.722643854962577e-05,
      "loss": 0.531,
      "step": 7292
    },
    {
      "epoch": 1.4992291088498304,
      "grad_norm": 0.19075235724449158,
      "learning_rate": 4.721646262016849e-05,
      "loss": 0.5507,
      "step": 7293
    },
    {
      "epoch": 1.499434679823209,
      "grad_norm": 0.19539231061935425,
      "learning_rate": 4.720648658151645e-05,
      "loss": 0.5525,
      "step": 7294
    },
    {
      "epoch": 1.4996402507965876,
      "grad_norm": 0.19356007874011993,
      "learning_rate": 4.719651043416114e-05,
      "loss": 0.5398,
      "step": 7295
    },
    {
      "epoch": 1.499845821769966,
      "grad_norm": 0.19021181762218475,
      "learning_rate": 4.7186534178594016e-05,
      "loss": 0.5507,
      "step": 7296
    },
    {
      "epoch": 1.5000513927433445,
      "grad_norm": 0.1926860511302948,
      "learning_rate": 4.717655781530658e-05,
      "loss": 0.5716,
      "step": 7297
    },
    {
      "epoch": 1.5002569637167231,
      "grad_norm": 0.16523759067058563,
      "learning_rate": 4.716658134479031e-05,
      "loss": 0.4999,
      "step": 7298
    },
    {
      "epoch": 1.5004625346901017,
      "grad_norm": 0.15917497873306274,
      "learning_rate": 4.7156604767536716e-05,
      "loss": 0.5651,
      "step": 7299
    },
    {
      "epoch": 1.5006681056634803,
      "grad_norm": 0.16436144709587097,
      "learning_rate": 4.714662808403727e-05,
      "loss": 0.5352,
      "step": 7300
    },
    {
      "epoch": 1.5008736766368589,
      "grad_norm": 0.15653958916664124,
      "learning_rate": 4.71366512947835e-05,
      "loss": 0.5314,
      "step": 7301
    },
    {
      "epoch": 1.5010792476102375,
      "grad_norm": 0.1997946798801422,
      "learning_rate": 4.71266744002669e-05,
      "loss": 0.5585,
      "step": 7302
    },
    {
      "epoch": 1.501284818583616,
      "grad_norm": 0.1864425539970398,
      "learning_rate": 4.7116697400979e-05,
      "loss": 0.5312,
      "step": 7303
    },
    {
      "epoch": 1.5014903895569947,
      "grad_norm": 0.19595369696617126,
      "learning_rate": 4.710672029741131e-05,
      "loss": 0.5518,
      "step": 7304
    },
    {
      "epoch": 1.5016959605303732,
      "grad_norm": 0.16003580391407013,
      "learning_rate": 4.7096743090055354e-05,
      "loss": 0.5241,
      "step": 7305
    },
    {
      "epoch": 1.5019015315037518,
      "grad_norm": 0.16186951100826263,
      "learning_rate": 4.708676577940266e-05,
      "loss": 0.5391,
      "step": 7306
    },
    {
      "epoch": 1.5021071024771302,
      "grad_norm": 0.16420379281044006,
      "learning_rate": 4.707678836594478e-05,
      "loss": 0.4949,
      "step": 7307
    },
    {
      "epoch": 1.5023126734505088,
      "grad_norm": 0.15808852016925812,
      "learning_rate": 4.706681085017325e-05,
      "loss": 0.5566,
      "step": 7308
    },
    {
      "epoch": 1.5025182444238874,
      "grad_norm": 0.18840067088603973,
      "learning_rate": 4.7056833232579604e-05,
      "loss": 0.5491,
      "step": 7309
    },
    {
      "epoch": 1.502723815397266,
      "grad_norm": 0.16313523054122925,
      "learning_rate": 4.70468555136554e-05,
      "loss": 0.5258,
      "step": 7310
    },
    {
      "epoch": 1.5029293863706443,
      "grad_norm": 0.15801596641540527,
      "learning_rate": 4.703687769389219e-05,
      "loss": 0.5443,
      "step": 7311
    },
    {
      "epoch": 1.503134957344023,
      "grad_norm": 0.19635756313800812,
      "learning_rate": 4.702689977378154e-05,
      "loss": 0.5529,
      "step": 7312
    },
    {
      "epoch": 1.5033405283174015,
      "grad_norm": 0.1938237100839615,
      "learning_rate": 4.7016921753815e-05,
      "loss": 0.564,
      "step": 7313
    },
    {
      "epoch": 1.50354609929078,
      "grad_norm": 0.22758108377456665,
      "learning_rate": 4.7006943634484154e-05,
      "loss": 0.5604,
      "step": 7314
    },
    {
      "epoch": 1.5037516702641587,
      "grad_norm": 0.2014021873474121,
      "learning_rate": 4.699696541628058e-05,
      "loss": 0.5574,
      "step": 7315
    },
    {
      "epoch": 1.5039572412375373,
      "grad_norm": 0.1863914430141449,
      "learning_rate": 4.698698709969585e-05,
      "loss": 0.5268,
      "step": 7316
    },
    {
      "epoch": 1.5041628122109159,
      "grad_norm": 0.19100484251976013,
      "learning_rate": 4.6977008685221556e-05,
      "loss": 0.5515,
      "step": 7317
    },
    {
      "epoch": 1.5043683831842944,
      "grad_norm": 0.1965937614440918,
      "learning_rate": 4.6967030173349285e-05,
      "loss": 0.557,
      "step": 7318
    },
    {
      "epoch": 1.504573954157673,
      "grad_norm": 0.16544751822948456,
      "learning_rate": 4.695705156457064e-05,
      "loss": 0.5139,
      "step": 7319
    },
    {
      "epoch": 1.5047795251310516,
      "grad_norm": 0.1309744417667389,
      "learning_rate": 4.69470728593772e-05,
      "loss": 0.5451,
      "step": 7320
    },
    {
      "epoch": 1.5049850961044302,
      "grad_norm": 0.16225290298461914,
      "learning_rate": 4.6937094058260585e-05,
      "loss": 0.5624,
      "step": 7321
    },
    {
      "epoch": 1.5051906670778086,
      "grad_norm": 0.19539402425289154,
      "learning_rate": 4.69271151617124e-05,
      "loss": 0.5485,
      "step": 7322
    },
    {
      "epoch": 1.5053962380511872,
      "grad_norm": 0.19238321483135223,
      "learning_rate": 4.691713617022427e-05,
      "loss": 0.537,
      "step": 7323
    },
    {
      "epoch": 1.5056018090245658,
      "grad_norm": 0.19159597158432007,
      "learning_rate": 4.6907157084287774e-05,
      "loss": 0.5662,
      "step": 7324
    },
    {
      "epoch": 1.5058073799979443,
      "grad_norm": 0.16289053857326508,
      "learning_rate": 4.689717790439459e-05,
      "loss": 0.5125,
      "step": 7325
    },
    {
      "epoch": 1.5060129509713227,
      "grad_norm": 0.16851918399333954,
      "learning_rate": 4.6887198631036295e-05,
      "loss": 0.5272,
      "step": 7326
    },
    {
      "epoch": 1.5062185219447013,
      "grad_norm": 0.1957252323627472,
      "learning_rate": 4.687721926470455e-05,
      "loss": 0.5669,
      "step": 7327
    },
    {
      "epoch": 1.5064240929180799,
      "grad_norm": 0.1756441295146942,
      "learning_rate": 4.686723980589099e-05,
      "loss": 0.5055,
      "step": 7328
    },
    {
      "epoch": 1.5066296638914585,
      "grad_norm": 0.16394411027431488,
      "learning_rate": 4.685726025508726e-05,
      "loss": 0.5624,
      "step": 7329
    },
    {
      "epoch": 1.506835234864837,
      "grad_norm": 0.16520611941814423,
      "learning_rate": 4.684728061278499e-05,
      "loss": 0.5223,
      "step": 7330
    },
    {
      "epoch": 1.5070408058382156,
      "grad_norm": 0.12648457288742065,
      "learning_rate": 4.683730087947584e-05,
      "loss": 0.5101,
      "step": 7331
    },
    {
      "epoch": 1.5072463768115942,
      "grad_norm": 0.11723072826862335,
      "learning_rate": 4.682732105565146e-05,
      "loss": 0.5237,
      "step": 7332
    },
    {
      "epoch": 1.5074519477849728,
      "grad_norm": 0.12541693449020386,
      "learning_rate": 4.681734114180352e-05,
      "loss": 0.5185,
      "step": 7333
    },
    {
      "epoch": 1.5076575187583514,
      "grad_norm": 0.15850338339805603,
      "learning_rate": 4.6807361138423664e-05,
      "loss": 0.5335,
      "step": 7334
    },
    {
      "epoch": 1.50786308973173,
      "grad_norm": 0.1691320687532425,
      "learning_rate": 4.679738104600359e-05,
      "loss": 0.5226,
      "step": 7335
    },
    {
      "epoch": 1.5080686607051086,
      "grad_norm": 0.16223326325416565,
      "learning_rate": 4.678740086503494e-05,
      "loss": 0.5376,
      "step": 7336
    },
    {
      "epoch": 1.508274231678487,
      "grad_norm": 0.18564042448997498,
      "learning_rate": 4.6777420596009406e-05,
      "loss": 0.5129,
      "step": 7337
    },
    {
      "epoch": 1.5084798026518655,
      "grad_norm": 0.1631714105606079,
      "learning_rate": 4.676744023941866e-05,
      "loss": 0.5274,
      "step": 7338
    },
    {
      "epoch": 1.5086853736252441,
      "grad_norm": 0.15576131641864777,
      "learning_rate": 4.67574597957544e-05,
      "loss": 0.5431,
      "step": 7339
    },
    {
      "epoch": 1.5088909445986227,
      "grad_norm": 0.19135643541812897,
      "learning_rate": 4.6747479265508314e-05,
      "loss": 0.5605,
      "step": 7340
    },
    {
      "epoch": 1.509096515572001,
      "grad_norm": 0.2023853212594986,
      "learning_rate": 4.673749864917209e-05,
      "loss": 0.5562,
      "step": 7341
    },
    {
      "epoch": 1.5093020865453797,
      "grad_norm": 0.1936071515083313,
      "learning_rate": 4.672751794723743e-05,
      "loss": 0.5556,
      "step": 7342
    },
    {
      "epoch": 1.5095076575187583,
      "grad_norm": 0.18370911478996277,
      "learning_rate": 4.671753716019604e-05,
      "loss": 0.5524,
      "step": 7343
    },
    {
      "epoch": 1.5097132284921368,
      "grad_norm": 0.15776073932647705,
      "learning_rate": 4.6707556288539605e-05,
      "loss": 0.4955,
      "step": 7344
    },
    {
      "epoch": 1.5099187994655154,
      "grad_norm": 0.15899749100208282,
      "learning_rate": 4.6697575332759865e-05,
      "loss": 0.5609,
      "step": 7345
    },
    {
      "epoch": 1.510124370438894,
      "grad_norm": 0.19149565696716309,
      "learning_rate": 4.668759429334852e-05,
      "loss": 0.5453,
      "step": 7346
    },
    {
      "epoch": 1.5103299414122726,
      "grad_norm": 0.20891959965229034,
      "learning_rate": 4.667761317079729e-05,
      "loss": 0.5634,
      "step": 7347
    },
    {
      "epoch": 1.5105355123856512,
      "grad_norm": 0.18865418434143066,
      "learning_rate": 4.666763196559791e-05,
      "loss": 0.5462,
      "step": 7348
    },
    {
      "epoch": 1.5107410833590298,
      "grad_norm": 0.18833813071250916,
      "learning_rate": 4.6657650678242085e-05,
      "loss": 0.5334,
      "step": 7349
    },
    {
      "epoch": 1.5109466543324084,
      "grad_norm": 0.18930873274803162,
      "learning_rate": 4.664766930922157e-05,
      "loss": 0.5332,
      "step": 7350
    },
    {
      "epoch": 1.511152225305787,
      "grad_norm": 0.19529637694358826,
      "learning_rate": 4.663768785902807e-05,
      "loss": 0.5644,
      "step": 7351
    },
    {
      "epoch": 1.5113577962791653,
      "grad_norm": 0.1973542720079422,
      "learning_rate": 4.662770632815337e-05,
      "loss": 0.5617,
      "step": 7352
    },
    {
      "epoch": 1.511563367252544,
      "grad_norm": 0.18992508947849274,
      "learning_rate": 4.6617724717089174e-05,
      "loss": 0.5536,
      "step": 7353
    },
    {
      "epoch": 1.5117689382259225,
      "grad_norm": 0.16945968568325043,
      "learning_rate": 4.660774302632724e-05,
      "loss": 0.5303,
      "step": 7354
    },
    {
      "epoch": 1.511974509199301,
      "grad_norm": 0.15689992904663086,
      "learning_rate": 4.659776125635932e-05,
      "loss": 0.5519,
      "step": 7355
    },
    {
      "epoch": 1.5121800801726795,
      "grad_norm": 0.8934375643730164,
      "learning_rate": 4.6587779407677185e-05,
      "loss": 0.579,
      "step": 7356
    },
    {
      "epoch": 1.512385651146058,
      "grad_norm": 0.1862555593252182,
      "learning_rate": 4.657779748077257e-05,
      "loss": 0.5403,
      "step": 7357
    },
    {
      "epoch": 1.5125912221194366,
      "grad_norm": 0.19881917536258698,
      "learning_rate": 4.656781547613724e-05,
      "loss": 0.5623,
      "step": 7358
    },
    {
      "epoch": 1.5127967930928152,
      "grad_norm": 0.16885186731815338,
      "learning_rate": 4.655783339426297e-05,
      "loss": 0.5123,
      "step": 7359
    },
    {
      "epoch": 1.5130023640661938,
      "grad_norm": 0.1638081818819046,
      "learning_rate": 4.654785123564155e-05,
      "loss": 0.5536,
      "step": 7360
    },
    {
      "epoch": 1.5132079350395724,
      "grad_norm": 0.19882342219352722,
      "learning_rate": 4.653786900076472e-05,
      "loss": 0.5512,
      "step": 7361
    },
    {
      "epoch": 1.513413506012951,
      "grad_norm": 0.20189371705055237,
      "learning_rate": 4.652788669012427e-05,
      "loss": 0.5612,
      "step": 7362
    },
    {
      "epoch": 1.5136190769863296,
      "grad_norm": 0.1760426163673401,
      "learning_rate": 4.651790430421199e-05,
      "loss": 0.5255,
      "step": 7363
    },
    {
      "epoch": 1.5138246479597082,
      "grad_norm": 0.16108988225460052,
      "learning_rate": 4.6507921843519664e-05,
      "loss": 0.5382,
      "step": 7364
    },
    {
      "epoch": 1.5140302189330868,
      "grad_norm": 0.19698002934455872,
      "learning_rate": 4.649793930853907e-05,
      "loss": 0.5369,
      "step": 7365
    },
    {
      "epoch": 1.5142357899064653,
      "grad_norm": 0.20208927989006042,
      "learning_rate": 4.6487956699762004e-05,
      "loss": 0.5455,
      "step": 7366
    },
    {
      "epoch": 1.5144413608798437,
      "grad_norm": 0.1949499249458313,
      "learning_rate": 4.6477974017680275e-05,
      "loss": 0.5547,
      "step": 7367
    },
    {
      "epoch": 1.5146469318532223,
      "grad_norm": 0.19195735454559326,
      "learning_rate": 4.646799126278567e-05,
      "loss": 0.5309,
      "step": 7368
    },
    {
      "epoch": 1.5148525028266009,
      "grad_norm": 0.16471721231937408,
      "learning_rate": 4.645800843556999e-05,
      "loss": 0.5248,
      "step": 7369
    },
    {
      "epoch": 1.5150580737999795,
      "grad_norm": 0.16040369868278503,
      "learning_rate": 4.644802553652505e-05,
      "loss": 0.5192,
      "step": 7370
    },
    {
      "epoch": 1.5152636447733578,
      "grad_norm": 0.16164757311344147,
      "learning_rate": 4.643804256614267e-05,
      "loss": 0.5253,
      "step": 7371
    },
    {
      "epoch": 1.5154692157467364,
      "grad_norm": 0.15787971019744873,
      "learning_rate": 4.6428059524914643e-05,
      "loss": 0.5589,
      "step": 7372
    },
    {
      "epoch": 1.515674786720115,
      "grad_norm": 0.19109466671943665,
      "learning_rate": 4.641807641333281e-05,
      "loss": 0.5557,
      "step": 7373
    },
    {
      "epoch": 1.5158803576934936,
      "grad_norm": 0.19500547647476196,
      "learning_rate": 4.640809323188897e-05,
      "loss": 0.5407,
      "step": 7374
    },
    {
      "epoch": 1.5160859286668722,
      "grad_norm": 0.1970156580209732,
      "learning_rate": 4.639810998107497e-05,
      "loss": 0.5453,
      "step": 7375
    },
    {
      "epoch": 1.5162914996402508,
      "grad_norm": 0.20001158118247986,
      "learning_rate": 4.638812666138261e-05,
      "loss": 0.5552,
      "step": 7376
    },
    {
      "epoch": 1.5164970706136294,
      "grad_norm": 0.16510051488876343,
      "learning_rate": 4.637814327330376e-05,
      "loss": 0.5262,
      "step": 7377
    },
    {
      "epoch": 1.516702641587008,
      "grad_norm": 0.161884605884552,
      "learning_rate": 4.636815981733022e-05,
      "loss": 0.5346,
      "step": 7378
    },
    {
      "epoch": 1.5169082125603865,
      "grad_norm": 0.17277652025222778,
      "learning_rate": 4.635817629395383e-05,
      "loss": 0.5142,
      "step": 7379
    },
    {
      "epoch": 1.5171137835337651,
      "grad_norm": 0.15767474472522736,
      "learning_rate": 4.6348192703666444e-05,
      "loss": 0.529,
      "step": 7380
    },
    {
      "epoch": 1.5173193545071437,
      "grad_norm": 0.19689583778381348,
      "learning_rate": 4.633820904695992e-05,
      "loss": 0.5467,
      "step": 7381
    },
    {
      "epoch": 1.5175249254805223,
      "grad_norm": 0.19332459568977356,
      "learning_rate": 4.6328225324326066e-05,
      "loss": 0.5505,
      "step": 7382
    },
    {
      "epoch": 1.5177304964539007,
      "grad_norm": 0.19339875876903534,
      "learning_rate": 4.631824153625679e-05,
      "loss": 0.5504,
      "step": 7383
    },
    {
      "epoch": 1.5179360674272793,
      "grad_norm": 0.19665616750717163,
      "learning_rate": 4.63082576832439e-05,
      "loss": 0.5474,
      "step": 7384
    },
    {
      "epoch": 1.5181416384006579,
      "grad_norm": 0.19962632656097412,
      "learning_rate": 4.629827376577927e-05,
      "loss": 0.5514,
      "step": 7385
    },
    {
      "epoch": 1.5183472093740362,
      "grad_norm": 0.19536101818084717,
      "learning_rate": 4.628828978435475e-05,
      "loss": 0.55,
      "step": 7386
    },
    {
      "epoch": 1.5185527803474148,
      "grad_norm": 0.19217143952846527,
      "learning_rate": 4.627830573946223e-05,
      "loss": 0.5404,
      "step": 7387
    },
    {
      "epoch": 1.5187583513207934,
      "grad_norm": 0.20492962002754211,
      "learning_rate": 4.6268321631593556e-05,
      "loss": 0.5701,
      "step": 7388
    },
    {
      "epoch": 1.518963922294172,
      "grad_norm": 0.16076092422008514,
      "learning_rate": 4.6258337461240595e-05,
      "loss": 0.5199,
      "step": 7389
    },
    {
      "epoch": 1.5191694932675506,
      "grad_norm": 0.16766008734703064,
      "learning_rate": 4.624835322889524e-05,
      "loss": 0.544,
      "step": 7390
    },
    {
      "epoch": 1.5193750642409292,
      "grad_norm": 0.19400693476200104,
      "learning_rate": 4.623836893504934e-05,
      "loss": 0.526,
      "step": 7391
    },
    {
      "epoch": 1.5195806352143078,
      "grad_norm": 0.19015835225582123,
      "learning_rate": 4.62283845801948e-05,
      "loss": 0.5383,
      "step": 7392
    },
    {
      "epoch": 1.5197862061876863,
      "grad_norm": 0.19058318436145782,
      "learning_rate": 4.6218400164823495e-05,
      "loss": 0.5406,
      "step": 7393
    },
    {
      "epoch": 1.519991777161065,
      "grad_norm": 0.1955268830060959,
      "learning_rate": 4.620841568942731e-05,
      "loss": 0.5357,
      "step": 7394
    },
    {
      "epoch": 1.5201973481344435,
      "grad_norm": 0.16312715411186218,
      "learning_rate": 4.619843115449814e-05,
      "loss": 0.5241,
      "step": 7395
    },
    {
      "epoch": 1.520402919107822,
      "grad_norm": 0.16432897746562958,
      "learning_rate": 4.6188446560527846e-05,
      "loss": 0.5364,
      "step": 7396
    },
    {
      "epoch": 1.5206084900812007,
      "grad_norm": 0.1991865038871765,
      "learning_rate": 4.617846190800837e-05,
      "loss": 0.5332,
      "step": 7397
    },
    {
      "epoch": 1.520814061054579,
      "grad_norm": 0.19771799445152283,
      "learning_rate": 4.616847719743157e-05,
      "loss": 0.5473,
      "step": 7398
    },
    {
      "epoch": 1.5210196320279576,
      "grad_norm": 0.21633638441562653,
      "learning_rate": 4.615849242928936e-05,
      "loss": 0.582,
      "step": 7399
    },
    {
      "epoch": 1.5212252030013362,
      "grad_norm": 0.19637715816497803,
      "learning_rate": 4.614850760407364e-05,
      "loss": 0.5619,
      "step": 7400
    },
    {
      "epoch": 1.5214307739747148,
      "grad_norm": 0.1928258240222931,
      "learning_rate": 4.613852272227633e-05,
      "loss": 0.5578,
      "step": 7401
    },
    {
      "epoch": 1.5216363449480932,
      "grad_norm": 0.19066447019577026,
      "learning_rate": 4.612853778438931e-05,
      "loss": 0.5507,
      "step": 7402
    },
    {
      "epoch": 1.5218419159214718,
      "grad_norm": 0.19168606400489807,
      "learning_rate": 4.611855279090452e-05,
      "loss": 0.5625,
      "step": 7403
    },
    {
      "epoch": 1.5220474868948504,
      "grad_norm": 0.18386611342430115,
      "learning_rate": 4.610856774231386e-05,
      "loss": 0.5484,
      "step": 7404
    },
    {
      "epoch": 1.522253057868229,
      "grad_norm": 0.1938936412334442,
      "learning_rate": 4.609858263910925e-05,
      "loss": 0.5629,
      "step": 7405
    },
    {
      "epoch": 1.5224586288416075,
      "grad_norm": 0.1900719851255417,
      "learning_rate": 4.6088597481782606e-05,
      "loss": 0.5491,
      "step": 7406
    },
    {
      "epoch": 1.5226641998149861,
      "grad_norm": 0.18934617936611176,
      "learning_rate": 4.607861227082585e-05,
      "loss": 0.5377,
      "step": 7407
    },
    {
      "epoch": 1.5228697707883647,
      "grad_norm": 0.20040073990821838,
      "learning_rate": 4.606862700673091e-05,
      "loss": 0.5384,
      "step": 7408
    },
    {
      "epoch": 1.5230753417617433,
      "grad_norm": 0.19345182180404663,
      "learning_rate": 4.6058641689989724e-05,
      "loss": 0.5519,
      "step": 7409
    },
    {
      "epoch": 1.523280912735122,
      "grad_norm": 0.19998955726623535,
      "learning_rate": 4.6048656321094196e-05,
      "loss": 0.5499,
      "step": 7410
    },
    {
      "epoch": 1.5234864837085005,
      "grad_norm": 0.2003701776266098,
      "learning_rate": 4.603867090053627e-05,
      "loss": 0.5471,
      "step": 7411
    },
    {
      "epoch": 1.523692054681879,
      "grad_norm": 0.1997435837984085,
      "learning_rate": 4.6028685428807896e-05,
      "loss": 0.5349,
      "step": 7412
    },
    {
      "epoch": 1.5238976256552574,
      "grad_norm": 0.19210022687911987,
      "learning_rate": 4.6018699906400996e-05,
      "loss": 0.5452,
      "step": 7413
    },
    {
      "epoch": 1.524103196628636,
      "grad_norm": 0.19292627274990082,
      "learning_rate": 4.6008714333807496e-05,
      "loss": 0.5605,
      "step": 7414
    },
    {
      "epoch": 1.5243087676020146,
      "grad_norm": 0.18850092589855194,
      "learning_rate": 4.599872871151937e-05,
      "loss": 0.5521,
      "step": 7415
    },
    {
      "epoch": 1.5245143385753932,
      "grad_norm": 0.19602644443511963,
      "learning_rate": 4.5988743040028554e-05,
      "loss": 0.55,
      "step": 7416
    },
    {
      "epoch": 1.5247199095487716,
      "grad_norm": 0.19302399456501007,
      "learning_rate": 4.597875731982697e-05,
      "loss": 0.5361,
      "step": 7417
    },
    {
      "epoch": 1.5249254805221502,
      "grad_norm": 0.16675427556037903,
      "learning_rate": 4.596877155140661e-05,
      "loss": 0.5136,
      "step": 7418
    },
    {
      "epoch": 1.5251310514955287,
      "grad_norm": 0.15877321362495422,
      "learning_rate": 4.59587857352594e-05,
      "loss": 0.5591,
      "step": 7419
    },
    {
      "epoch": 1.5253366224689073,
      "grad_norm": 0.16738201677799225,
      "learning_rate": 4.594879987187729e-05,
      "loss": 0.5191,
      "step": 7420
    },
    {
      "epoch": 1.525542193442286,
      "grad_norm": 0.16919690370559692,
      "learning_rate": 4.5938813961752254e-05,
      "loss": 0.5439,
      "step": 7421
    },
    {
      "epoch": 1.5257477644156645,
      "grad_norm": 0.15980926156044006,
      "learning_rate": 4.592882800537624e-05,
      "loss": 0.5099,
      "step": 7422
    },
    {
      "epoch": 1.525953335389043,
      "grad_norm": 0.1241704598069191,
      "learning_rate": 4.5918842003241195e-05,
      "loss": 0.5069,
      "step": 7423
    },
    {
      "epoch": 1.5261589063624217,
      "grad_norm": 0.1193804070353508,
      "learning_rate": 4.59088559558391e-05,
      "loss": 0.5091,
      "step": 7424
    },
    {
      "epoch": 1.5263644773358003,
      "grad_norm": 0.12635476887226105,
      "learning_rate": 4.589886986366194e-05,
      "loss": 0.5111,
      "step": 7425
    },
    {
      "epoch": 1.5265700483091789,
      "grad_norm": 0.11729497462511063,
      "learning_rate": 4.5888883727201665e-05,
      "loss": 0.5215,
      "step": 7426
    },
    {
      "epoch": 1.5267756192825575,
      "grad_norm": 0.16425076127052307,
      "learning_rate": 4.5878897546950225e-05,
      "loss": 0.5357,
      "step": 7427
    },
    {
      "epoch": 1.5269811902559358,
      "grad_norm": 0.20362845063209534,
      "learning_rate": 4.586891132339962e-05,
      "loss": 0.5392,
      "step": 7428
    },
    {
      "epoch": 1.5271867612293144,
      "grad_norm": 0.1934981644153595,
      "learning_rate": 4.585892505704182e-05,
      "loss": 0.5484,
      "step": 7429
    },
    {
      "epoch": 1.527392332202693,
      "grad_norm": 0.19643427431583405,
      "learning_rate": 4.584893874836879e-05,
      "loss": 0.5564,
      "step": 7430
    },
    {
      "epoch": 1.5275979031760716,
      "grad_norm": 0.1882271021604538,
      "learning_rate": 4.583895239787251e-05,
      "loss": 0.5667,
      "step": 7431
    },
    {
      "epoch": 1.52780347414945,
      "grad_norm": 0.15838836133480072,
      "learning_rate": 4.5828966006044974e-05,
      "loss": 0.5059,
      "step": 7432
    },
    {
      "epoch": 1.5280090451228285,
      "grad_norm": 0.16002227365970612,
      "learning_rate": 4.581897957337817e-05,
      "loss": 0.5405,
      "step": 7433
    },
    {
      "epoch": 1.5282146160962071,
      "grad_norm": 0.19433261454105377,
      "learning_rate": 4.5808993100364055e-05,
      "loss": 0.5678,
      "step": 7434
    },
    {
      "epoch": 1.5284201870695857,
      "grad_norm": 0.16582860052585602,
      "learning_rate": 4.579900658749462e-05,
      "loss": 0.5538,
      "step": 7435
    },
    {
      "epoch": 1.5286257580429643,
      "grad_norm": 0.1574729084968567,
      "learning_rate": 4.5789020035261886e-05,
      "loss": 0.5472,
      "step": 7436
    },
    {
      "epoch": 1.5288313290163429,
      "grad_norm": 0.20113399624824524,
      "learning_rate": 4.577903344415781e-05,
      "loss": 0.568,
      "step": 7437
    },
    {
      "epoch": 1.5290368999897215,
      "grad_norm": 0.19250795245170593,
      "learning_rate": 4.57690468146744e-05,
      "loss": 0.548,
      "step": 7438
    },
    {
      "epoch": 1.5292424709631,
      "grad_norm": 0.1601334810256958,
      "learning_rate": 4.5759060147303655e-05,
      "loss": 0.4955,
      "step": 7439
    },
    {
      "epoch": 1.5294480419364787,
      "grad_norm": 0.16352780163288116,
      "learning_rate": 4.5749073442537566e-05,
      "loss": 0.5445,
      "step": 7440
    },
    {
      "epoch": 1.5296536129098572,
      "grad_norm": 0.1970401108264923,
      "learning_rate": 4.573908670086812e-05,
      "loss": 0.5818,
      "step": 7441
    },
    {
      "epoch": 1.5298591838832358,
      "grad_norm": 0.19766905903816223,
      "learning_rate": 4.572909992278734e-05,
      "loss": 0.5515,
      "step": 7442
    },
    {
      "epoch": 1.5300647548566142,
      "grad_norm": 0.19481036067008972,
      "learning_rate": 4.57191131087872e-05,
      "loss": 0.5512,
      "step": 7443
    },
    {
      "epoch": 1.5302703258299928,
      "grad_norm": 0.20617318153381348,
      "learning_rate": 4.570912625935972e-05,
      "loss": 0.5534,
      "step": 7444
    },
    {
      "epoch": 1.5304758968033714,
      "grad_norm": 0.20254306495189667,
      "learning_rate": 4.5699139374996906e-05,
      "loss": 0.5534,
      "step": 7445
    },
    {
      "epoch": 1.53068146777675,
      "grad_norm": 0.1929122805595398,
      "learning_rate": 4.568915245619076e-05,
      "loss": 0.5436,
      "step": 7446
    },
    {
      "epoch": 1.5308870387501283,
      "grad_norm": 0.19024674594402313,
      "learning_rate": 4.5679165503433306e-05,
      "loss": 0.5508,
      "step": 7447
    },
    {
      "epoch": 1.531092609723507,
      "grad_norm": 0.19227847456932068,
      "learning_rate": 4.5669178517216525e-05,
      "loss": 0.5456,
      "step": 7448
    },
    {
      "epoch": 1.5312981806968855,
      "grad_norm": 0.1958528608083725,
      "learning_rate": 4.5659191498032456e-05,
      "loss": 0.5482,
      "step": 7449
    },
    {
      "epoch": 1.531503751670264,
      "grad_norm": 0.19175393879413605,
      "learning_rate": 4.564920444637311e-05,
      "loss": 0.5557,
      "step": 7450
    },
    {
      "epoch": 1.5317093226436427,
      "grad_norm": 0.19114267826080322,
      "learning_rate": 4.5639217362730484e-05,
      "loss": 0.5439,
      "step": 7451
    },
    {
      "epoch": 1.5319148936170213,
      "grad_norm": 0.16341425478458405,
      "learning_rate": 4.56292302475966e-05,
      "loss": 0.507,
      "step": 7452
    },
    {
      "epoch": 1.5321204645903999,
      "grad_norm": 0.15693975985050201,
      "learning_rate": 4.56192431014635e-05,
      "loss": 0.5558,
      "step": 7453
    },
    {
      "epoch": 1.5323260355637784,
      "grad_norm": 0.21227800846099854,
      "learning_rate": 4.560925592482319e-05,
      "loss": 0.5398,
      "step": 7454
    },
    {
      "epoch": 1.532531606537157,
      "grad_norm": 0.19406823813915253,
      "learning_rate": 4.559926871816767e-05,
      "loss": 0.5334,
      "step": 7455
    },
    {
      "epoch": 1.5327371775105356,
      "grad_norm": 0.19032882153987885,
      "learning_rate": 4.558928148198898e-05,
      "loss": 0.5247,
      "step": 7456
    },
    {
      "epoch": 1.5329427484839142,
      "grad_norm": 0.19708728790283203,
      "learning_rate": 4.557929421677916e-05,
      "loss": 0.5549,
      "step": 7457
    },
    {
      "epoch": 1.5331483194572928,
      "grad_norm": 0.1929347962141037,
      "learning_rate": 4.556930692303021e-05,
      "loss": 0.5586,
      "step": 7458
    },
    {
      "epoch": 1.5333538904306712,
      "grad_norm": 0.19860495626926422,
      "learning_rate": 4.555931960123418e-05,
      "loss": 0.5539,
      "step": 7459
    },
    {
      "epoch": 1.5335594614040498,
      "grad_norm": 0.1928236037492752,
      "learning_rate": 4.554933225188308e-05,
      "loss": 0.5639,
      "step": 7460
    },
    {
      "epoch": 1.5337650323774283,
      "grad_norm": 0.19600355625152588,
      "learning_rate": 4.553934487546895e-05,
      "loss": 0.5587,
      "step": 7461
    },
    {
      "epoch": 1.5339706033508067,
      "grad_norm": 0.1872026026248932,
      "learning_rate": 4.5529357472483815e-05,
      "loss": 0.5292,
      "step": 7462
    },
    {
      "epoch": 1.5341761743241853,
      "grad_norm": 0.19457010924816132,
      "learning_rate": 4.551937004341971e-05,
      "loss": 0.5526,
      "step": 7463
    },
    {
      "epoch": 1.5343817452975639,
      "grad_norm": 0.19338703155517578,
      "learning_rate": 4.5509382588768684e-05,
      "loss": 0.5475,
      "step": 7464
    },
    {
      "epoch": 1.5345873162709425,
      "grad_norm": 0.16978971660137177,
      "learning_rate": 4.549939510902274e-05,
      "loss": 0.5315,
      "step": 7465
    },
    {
      "epoch": 1.534792887244321,
      "grad_norm": 0.16673077642917633,
      "learning_rate": 4.548940760467395e-05,
      "loss": 0.5475,
      "step": 7466
    },
    {
      "epoch": 1.5349984582176996,
      "grad_norm": 0.195562481880188,
      "learning_rate": 4.5479420076214315e-05,
      "loss": 0.5599,
      "step": 7467
    },
    {
      "epoch": 1.5352040291910782,
      "grad_norm": 0.1955966353416443,
      "learning_rate": 4.5469432524135913e-05,
      "loss": 0.5538,
      "step": 7468
    },
    {
      "epoch": 1.5354096001644568,
      "grad_norm": 0.20345093309879303,
      "learning_rate": 4.5459444948930754e-05,
      "loss": 0.5529,
      "step": 7469
    },
    {
      "epoch": 1.5356151711378354,
      "grad_norm": 0.16392046213150024,
      "learning_rate": 4.5449457351090896e-05,
      "loss": 0.53,
      "step": 7470
    },
    {
      "epoch": 1.535820742111214,
      "grad_norm": 0.1566355973482132,
      "learning_rate": 4.5439469731108383e-05,
      "loss": 0.5523,
      "step": 7471
    },
    {
      "epoch": 1.5360263130845926,
      "grad_norm": 0.1888071596622467,
      "learning_rate": 4.542948208947523e-05,
      "loss": 0.5527,
      "step": 7472
    },
    {
      "epoch": 1.5362318840579712,
      "grad_norm": 0.19896787405014038,
      "learning_rate": 4.5419494426683514e-05,
      "loss": 0.5568,
      "step": 7473
    },
    {
      "epoch": 1.5364374550313495,
      "grad_norm": 0.1599314957857132,
      "learning_rate": 4.5409506743225274e-05,
      "loss": 0.5418,
      "step": 7474
    },
    {
      "epoch": 1.5366430260047281,
      "grad_norm": 0.15871824324131012,
      "learning_rate": 4.5399519039592546e-05,
      "loss": 0.5393,
      "step": 7475
    },
    {
      "epoch": 1.5368485969781067,
      "grad_norm": 0.18515051901340485,
      "learning_rate": 4.538953131627737e-05,
      "loss": 0.5383,
      "step": 7476
    },
    {
      "epoch": 1.537054167951485,
      "grad_norm": 0.1832568496465683,
      "learning_rate": 4.5379543573771823e-05,
      "loss": 0.5393,
      "step": 7477
    },
    {
      "epoch": 1.5372597389248637,
      "grad_norm": 0.188548281788826,
      "learning_rate": 4.5369555812567926e-05,
      "loss": 0.5413,
      "step": 7478
    },
    {
      "epoch": 1.5374653098982423,
      "grad_norm": 0.16678757965564728,
      "learning_rate": 4.535956803315774e-05,
      "loss": 0.5216,
      "step": 7479
    },
    {
      "epoch": 1.5376708808716208,
      "grad_norm": 0.12842969596385956,
      "learning_rate": 4.534958023603333e-05,
      "loss": 0.5017,
      "step": 7480
    },
    {
      "epoch": 1.5378764518449994,
      "grad_norm": 0.16010682284832,
      "learning_rate": 4.5339592421686734e-05,
      "loss": 0.5213,
      "step": 7481
    },
    {
      "epoch": 1.538082022818378,
      "grad_norm": 0.20323491096496582,
      "learning_rate": 4.5329604590610004e-05,
      "loss": 0.5543,
      "step": 7482
    },
    {
      "epoch": 1.5382875937917566,
      "grad_norm": 0.19236190617084503,
      "learning_rate": 4.531961674329519e-05,
      "loss": 0.5641,
      "step": 7483
    },
    {
      "epoch": 1.5384931647651352,
      "grad_norm": 0.19376271963119507,
      "learning_rate": 4.5309628880234356e-05,
      "loss": 0.542,
      "step": 7484
    },
    {
      "epoch": 1.5386987357385138,
      "grad_norm": 0.18914787471294403,
      "learning_rate": 4.529964100191957e-05,
      "loss": 0.5481,
      "step": 7485
    },
    {
      "epoch": 1.5389043067118924,
      "grad_norm": 0.19532737135887146,
      "learning_rate": 4.5289653108842845e-05,
      "loss": 0.5634,
      "step": 7486
    },
    {
      "epoch": 1.539109877685271,
      "grad_norm": 0.1869991570711136,
      "learning_rate": 4.527966520149629e-05,
      "loss": 0.5536,
      "step": 7487
    },
    {
      "epoch": 1.5393154486586496,
      "grad_norm": 0.18661408126354218,
      "learning_rate": 4.526967728037191e-05,
      "loss": 0.5466,
      "step": 7488
    },
    {
      "epoch": 1.539521019632028,
      "grad_norm": 0.2640432119369507,
      "learning_rate": 4.525968934596181e-05,
      "loss": 0.5553,
      "step": 7489
    },
    {
      "epoch": 1.5397265906054065,
      "grad_norm": 0.20137301087379456,
      "learning_rate": 4.524970139875803e-05,
      "loss": 0.5563,
      "step": 7490
    },
    {
      "epoch": 1.539932161578785,
      "grad_norm": 0.17082248628139496,
      "learning_rate": 4.523971343925263e-05,
      "loss": 0.5198,
      "step": 7491
    },
    {
      "epoch": 1.5401377325521637,
      "grad_norm": 0.13131971657276154,
      "learning_rate": 4.5229725467937666e-05,
      "loss": 0.5375,
      "step": 7492
    },
    {
      "epoch": 1.540343303525542,
      "grad_norm": 0.16236910223960876,
      "learning_rate": 4.5219737485305194e-05,
      "loss": 0.5435,
      "step": 7493
    },
    {
      "epoch": 1.5405488744989206,
      "grad_norm": 0.19899526238441467,
      "learning_rate": 4.5209749491847295e-05,
      "loss": 0.5685,
      "step": 7494
    },
    {
      "epoch": 1.5407544454722992,
      "grad_norm": 0.19995881617069244,
      "learning_rate": 4.519976148805602e-05,
      "loss": 0.5646,
      "step": 7495
    },
    {
      "epoch": 1.5409600164456778,
      "grad_norm": 0.20216208696365356,
      "learning_rate": 4.518977347442341e-05,
      "loss": 0.5596,
      "step": 7496
    },
    {
      "epoch": 1.5411655874190564,
      "grad_norm": 0.17260567843914032,
      "learning_rate": 4.5179785451441574e-05,
      "loss": 0.5084,
      "step": 7497
    },
    {
      "epoch": 1.541371158392435,
      "grad_norm": 0.15725255012512207,
      "learning_rate": 4.516979741960254e-05,
      "loss": 0.5399,
      "step": 7498
    },
    {
      "epoch": 1.5415767293658136,
      "grad_norm": 0.1909477263689041,
      "learning_rate": 4.515980937939837e-05,
      "loss": 0.5416,
      "step": 7499
    },
    {
      "epoch": 1.5417823003391922,
      "grad_norm": 0.1896287351846695,
      "learning_rate": 4.514982133132114e-05,
      "loss": 0.5395,
      "step": 7500
    },
    {
      "epoch": 1.5419878713125708,
      "grad_norm": 0.188772514462471,
      "learning_rate": 4.5139833275862925e-05,
      "loss": 0.5456,
      "step": 7501
    },
    {
      "epoch": 1.5421934422859493,
      "grad_norm": 0.18162913620471954,
      "learning_rate": 4.5129845213515775e-05,
      "loss": 0.543,
      "step": 7502
    },
    {
      "epoch": 1.542399013259328,
      "grad_norm": 0.19076716899871826,
      "learning_rate": 4.511985714477175e-05,
      "loss": 0.5502,
      "step": 7503
    },
    {
      "epoch": 1.5426045842327063,
      "grad_norm": 0.20053020119667053,
      "learning_rate": 4.5109869070122946e-05,
      "loss": 0.5675,
      "step": 7504
    },
    {
      "epoch": 1.5428101552060849,
      "grad_norm": 0.19717735052108765,
      "learning_rate": 4.509988099006138e-05,
      "loss": 0.5525,
      "step": 7505
    },
    {
      "epoch": 1.5430157261794635,
      "grad_norm": 0.1972462683916092,
      "learning_rate": 4.5089892905079175e-05,
      "loss": 0.561,
      "step": 7506
    },
    {
      "epoch": 1.543221297152842,
      "grad_norm": 0.1987045705318451,
      "learning_rate": 4.507990481566833e-05,
      "loss": 0.5333,
      "step": 7507
    },
    {
      "epoch": 1.5434268681262204,
      "grad_norm": 0.18806061148643494,
      "learning_rate": 4.506991672232097e-05,
      "loss": 0.5213,
      "step": 7508
    },
    {
      "epoch": 1.543632439099599,
      "grad_norm": 0.19716767966747284,
      "learning_rate": 4.505992862552913e-05,
      "loss": 0.5605,
      "step": 7509
    },
    {
      "epoch": 1.5438380100729776,
      "grad_norm": 0.18911804258823395,
      "learning_rate": 4.50499405257849e-05,
      "loss": 0.559,
      "step": 7510
    },
    {
      "epoch": 1.5440435810463562,
      "grad_norm": 0.18609070777893066,
      "learning_rate": 4.5039952423580324e-05,
      "loss": 0.5176,
      "step": 7511
    },
    {
      "epoch": 1.5442491520197348,
      "grad_norm": 0.19210830330848694,
      "learning_rate": 4.502996431940748e-05,
      "loss": 0.5397,
      "step": 7512
    },
    {
      "epoch": 1.5444547229931134,
      "grad_norm": 0.1905742585659027,
      "learning_rate": 4.5019976213758434e-05,
      "loss": 0.5585,
      "step": 7513
    },
    {
      "epoch": 1.544660293966492,
      "grad_norm": 0.16525664925575256,
      "learning_rate": 4.500998810712525e-05,
      "loss": 0.5138,
      "step": 7514
    },
    {
      "epoch": 1.5448658649398705,
      "grad_norm": 0.16021090745925903,
      "learning_rate": 4.5e-05,
      "loss": 0.5536,
      "step": 7515
    },
    {
      "epoch": 1.5450714359132491,
      "grad_norm": 0.1621478945016861,
      "learning_rate": 4.499001189287476e-05,
      "loss": 0.5065,
      "step": 7516
    },
    {
      "epoch": 1.5452770068866277,
      "grad_norm": 0.19542866945266724,
      "learning_rate": 4.4980023786241585e-05,
      "loss": 0.5389,
      "step": 7517
    },
    {
      "epoch": 1.5454825778600063,
      "grad_norm": 0.18569281697273254,
      "learning_rate": 4.497003568059254e-05,
      "loss": 0.5289,
      "step": 7518
    },
    {
      "epoch": 1.5456881488333847,
      "grad_norm": 0.19323447346687317,
      "learning_rate": 4.496004757641968e-05,
      "loss": 0.5605,
      "step": 7519
    },
    {
      "epoch": 1.5458937198067633,
      "grad_norm": 0.18728816509246826,
      "learning_rate": 4.495005947421511e-05,
      "loss": 0.5522,
      "step": 7520
    },
    {
      "epoch": 1.5460992907801419,
      "grad_norm": 0.19524379074573517,
      "learning_rate": 4.4940071374470875e-05,
      "loss": 0.5501,
      "step": 7521
    },
    {
      "epoch": 1.5463048617535204,
      "grad_norm": 0.19686923921108246,
      "learning_rate": 4.4930083277679036e-05,
      "loss": 0.5574,
      "step": 7522
    },
    {
      "epoch": 1.5465104327268988,
      "grad_norm": 0.19316346943378448,
      "learning_rate": 4.492009518433167e-05,
      "loss": 0.5493,
      "step": 7523
    },
    {
      "epoch": 1.5467160037002774,
      "grad_norm": 0.19701054692268372,
      "learning_rate": 4.491010709492085e-05,
      "loss": 0.5269,
      "step": 7524
    },
    {
      "epoch": 1.546921574673656,
      "grad_norm": 0.1707211434841156,
      "learning_rate": 4.490011900993863e-05,
      "loss": 0.5326,
      "step": 7525
    },
    {
      "epoch": 1.5471271456470346,
      "grad_norm": 0.16687439382076263,
      "learning_rate": 4.489013092987706e-05,
      "loss": 0.5514,
      "step": 7526
    },
    {
      "epoch": 1.5473327166204132,
      "grad_norm": 0.1970919817686081,
      "learning_rate": 4.488014285522825e-05,
      "loss": 0.5512,
      "step": 7527
    },
    {
      "epoch": 1.5475382875937918,
      "grad_norm": 0.20226997137069702,
      "learning_rate": 4.487015478648423e-05,
      "loss": 0.5549,
      "step": 7528
    },
    {
      "epoch": 1.5477438585671703,
      "grad_norm": 0.1875869780778885,
      "learning_rate": 4.486016672413708e-05,
      "loss": 0.5532,
      "step": 7529
    },
    {
      "epoch": 1.547949429540549,
      "grad_norm": 0.19215047359466553,
      "learning_rate": 4.4850178668678864e-05,
      "loss": 0.5533,
      "step": 7530
    },
    {
      "epoch": 1.5481550005139275,
      "grad_norm": 0.18497878313064575,
      "learning_rate": 4.484019062060164e-05,
      "loss": 0.5389,
      "step": 7531
    },
    {
      "epoch": 1.548360571487306,
      "grad_norm": 0.18966837227344513,
      "learning_rate": 4.483020258039748e-05,
      "loss": 0.5352,
      "step": 7532
    },
    {
      "epoch": 1.5485661424606847,
      "grad_norm": 0.19131658971309662,
      "learning_rate": 4.482021454855844e-05,
      "loss": 0.5429,
      "step": 7533
    },
    {
      "epoch": 1.548771713434063,
      "grad_norm": 0.18846401572227478,
      "learning_rate": 4.481022652557658e-05,
      "loss": 0.5442,
      "step": 7534
    },
    {
      "epoch": 1.5489772844074416,
      "grad_norm": 0.16239413619041443,
      "learning_rate": 4.480023851194399e-05,
      "loss": 0.5047,
      "step": 7535
    },
    {
      "epoch": 1.5491828553808202,
      "grad_norm": 0.13217657804489136,
      "learning_rate": 4.479025050815272e-05,
      "loss": 0.4997,
      "step": 7536
    },
    {
      "epoch": 1.5493884263541988,
      "grad_norm": 0.12488622963428497,
      "learning_rate": 4.478026251469482e-05,
      "loss": 0.5081,
      "step": 7537
    },
    {
      "epoch": 1.5495939973275772,
      "grad_norm": 0.1763962060213089,
      "learning_rate": 4.477027453206236e-05,
      "loss": 0.5517,
      "step": 7538
    },
    {
      "epoch": 1.5497995683009558,
      "grad_norm": 0.20494931936264038,
      "learning_rate": 4.476028656074739e-05,
      "loss": 0.5535,
      "step": 7539
    },
    {
      "epoch": 1.5500051392743344,
      "grad_norm": 0.2072146087884903,
      "learning_rate": 4.4750298601241976e-05,
      "loss": 0.5409,
      "step": 7540
    },
    {
      "epoch": 1.550210710247713,
      "grad_norm": 0.1965474635362625,
      "learning_rate": 4.4740310654038194e-05,
      "loss": 0.5307,
      "step": 7541
    },
    {
      "epoch": 1.5504162812210915,
      "grad_norm": 0.16837544739246368,
      "learning_rate": 4.47303227196281e-05,
      "loss": 0.5289,
      "step": 7542
    },
    {
      "epoch": 1.5506218521944701,
      "grad_norm": 0.16805261373519897,
      "learning_rate": 4.4720334798503725e-05,
      "loss": 0.5413,
      "step": 7543
    },
    {
      "epoch": 1.5508274231678487,
      "grad_norm": 0.203588604927063,
      "learning_rate": 4.471034689115717e-05,
      "loss": 0.5474,
      "step": 7544
    },
    {
      "epoch": 1.5510329941412273,
      "grad_norm": 0.20456770062446594,
      "learning_rate": 4.470035899808046e-05,
      "loss": 0.5409,
      "step": 7545
    },
    {
      "epoch": 1.551238565114606,
      "grad_norm": 0.18718034029006958,
      "learning_rate": 4.469037111976566e-05,
      "loss": 0.537,
      "step": 7546
    },
    {
      "epoch": 1.5514441360879845,
      "grad_norm": 0.19375449419021606,
      "learning_rate": 4.4680383256704814e-05,
      "loss": 0.5322,
      "step": 7547
    },
    {
      "epoch": 1.551649707061363,
      "grad_norm": 0.23705141246318817,
      "learning_rate": 4.467039540939001e-05,
      "loss": 0.5616,
      "step": 7548
    },
    {
      "epoch": 1.5518552780347417,
      "grad_norm": 0.16841238737106323,
      "learning_rate": 4.466040757831328e-05,
      "loss": 0.4964,
      "step": 7549
    },
    {
      "epoch": 1.55206084900812,
      "grad_norm": 0.16423995792865753,
      "learning_rate": 4.465041976396668e-05,
      "loss": 0.5511,
      "step": 7550
    },
    {
      "epoch": 1.5522664199814986,
      "grad_norm": 0.1915719360113144,
      "learning_rate": 4.464043196684227e-05,
      "loss": 0.5412,
      "step": 7551
    },
    {
      "epoch": 1.5524719909548772,
      "grad_norm": 0.19022904336452484,
      "learning_rate": 4.463044418743209e-05,
      "loss": 0.5372,
      "step": 7552
    },
    {
      "epoch": 1.5526775619282556,
      "grad_norm": 0.19907855987548828,
      "learning_rate": 4.4620456426228196e-05,
      "loss": 0.5657,
      "step": 7553
    },
    {
      "epoch": 1.5528831329016342,
      "grad_norm": 0.1949799507856369,
      "learning_rate": 4.461046868372264e-05,
      "loss": 0.5452,
      "step": 7554
    },
    {
      "epoch": 1.5530887038750127,
      "grad_norm": 0.1677858829498291,
      "learning_rate": 4.4600480960407467e-05,
      "loss": 0.5087,
      "step": 7555
    },
    {
      "epoch": 1.5532942748483913,
      "grad_norm": 0.1660327911376953,
      "learning_rate": 4.459049325677474e-05,
      "loss": 0.5361,
      "step": 7556
    },
    {
      "epoch": 1.55349984582177,
      "grad_norm": 0.16196422278881073,
      "learning_rate": 4.45805055733165e-05,
      "loss": 0.5322,
      "step": 7557
    },
    {
      "epoch": 1.5537054167951485,
      "grad_norm": 0.1612974852323532,
      "learning_rate": 4.457051791052478e-05,
      "loss": 0.5549,
      "step": 7558
    },
    {
      "epoch": 1.553910987768527,
      "grad_norm": 0.19015921652317047,
      "learning_rate": 4.456053026889164e-05,
      "loss": 0.5375,
      "step": 7559
    },
    {
      "epoch": 1.5541165587419057,
      "grad_norm": 0.19856490194797516,
      "learning_rate": 4.45505426489091e-05,
      "loss": 0.5626,
      "step": 7560
    },
    {
      "epoch": 1.5543221297152843,
      "grad_norm": 0.18954843282699585,
      "learning_rate": 4.454055505106925e-05,
      "loss": 0.5461,
      "step": 7561
    },
    {
      "epoch": 1.5545277006886629,
      "grad_norm": 0.16355063021183014,
      "learning_rate": 4.45305674758641e-05,
      "loss": 0.5308,
      "step": 7562
    },
    {
      "epoch": 1.5547332716620414,
      "grad_norm": 0.16068147122859955,
      "learning_rate": 4.452057992378569e-05,
      "loss": 0.5596,
      "step": 7563
    },
    {
      "epoch": 1.55493884263542,
      "grad_norm": 0.18733803927898407,
      "learning_rate": 4.4510592395326064e-05,
      "loss": 0.5618,
      "step": 7564
    },
    {
      "epoch": 1.5551444136087984,
      "grad_norm": 0.16565637290477753,
      "learning_rate": 4.4500604890977264e-05,
      "loss": 0.533,
      "step": 7565
    },
    {
      "epoch": 1.555349984582177,
      "grad_norm": 0.1654541790485382,
      "learning_rate": 4.449061741123134e-05,
      "loss": 0.5562,
      "step": 7566
    },
    {
      "epoch": 1.5555555555555556,
      "grad_norm": 0.20242147147655487,
      "learning_rate": 4.448062995658028e-05,
      "loss": 0.5494,
      "step": 7567
    },
    {
      "epoch": 1.5557611265289342,
      "grad_norm": 0.19619537889957428,
      "learning_rate": 4.447064252751619e-05,
      "loss": 0.5455,
      "step": 7568
    },
    {
      "epoch": 1.5559666975023125,
      "grad_norm": 0.16296258568763733,
      "learning_rate": 4.446065512453106e-05,
      "loss": 0.5202,
      "step": 7569
    },
    {
      "epoch": 1.5561722684756911,
      "grad_norm": 0.15891185402870178,
      "learning_rate": 4.4450667748116935e-05,
      "loss": 0.5455,
      "step": 7570
    },
    {
      "epoch": 1.5563778394490697,
      "grad_norm": 0.19792260229587555,
      "learning_rate": 4.444068039876584e-05,
      "loss": 0.5495,
      "step": 7571
    },
    {
      "epoch": 1.5565834104224483,
      "grad_norm": 0.19216637313365936,
      "learning_rate": 4.4430693076969805e-05,
      "loss": 0.5576,
      "step": 7572
    },
    {
      "epoch": 1.5567889813958269,
      "grad_norm": 0.18915432691574097,
      "learning_rate": 4.442070578322086e-05,
      "loss": 0.5269,
      "step": 7573
    },
    {
      "epoch": 1.5569945523692055,
      "grad_norm": 0.19710315763950348,
      "learning_rate": 4.441071851801102e-05,
      "loss": 0.589,
      "step": 7574
    },
    {
      "epoch": 1.557200123342584,
      "grad_norm": 0.19663040339946747,
      "learning_rate": 4.4400731281832346e-05,
      "loss": 0.5445,
      "step": 7575
    },
    {
      "epoch": 1.5574056943159627,
      "grad_norm": 0.16456833481788635,
      "learning_rate": 4.4390744075176826e-05,
      "loss": 0.5084,
      "step": 7576
    },
    {
      "epoch": 1.5576112652893412,
      "grad_norm": 0.16168387234210968,
      "learning_rate": 4.438075689853651e-05,
      "loss": 0.5335,
      "step": 7577
    },
    {
      "epoch": 1.5578168362627198,
      "grad_norm": 0.19194790720939636,
      "learning_rate": 4.43707697524034e-05,
      "loss": 0.5517,
      "step": 7578
    },
    {
      "epoch": 1.5580224072360984,
      "grad_norm": 0.19601012766361237,
      "learning_rate": 4.4360782637269535e-05,
      "loss": 0.5568,
      "step": 7579
    },
    {
      "epoch": 1.5582279782094768,
      "grad_norm": 0.18594755232334137,
      "learning_rate": 4.435079555362691e-05,
      "loss": 0.5313,
      "step": 7580
    },
    {
      "epoch": 1.5584335491828554,
      "grad_norm": 0.16498349606990814,
      "learning_rate": 4.434080850196754e-05,
      "loss": 0.5261,
      "step": 7581
    },
    {
      "epoch": 1.558639120156234,
      "grad_norm": 0.15921123325824738,
      "learning_rate": 4.433082148278348e-05,
      "loss": 0.5481,
      "step": 7582
    },
    {
      "epoch": 1.5588446911296125,
      "grad_norm": 0.19702661037445068,
      "learning_rate": 4.4320834496566706e-05,
      "loss": 0.565,
      "step": 7583
    },
    {
      "epoch": 1.559050262102991,
      "grad_norm": 0.19030775129795074,
      "learning_rate": 4.431084754380925e-05,
      "loss": 0.5561,
      "step": 7584
    },
    {
      "epoch": 1.5592558330763695,
      "grad_norm": 0.19048479199409485,
      "learning_rate": 4.43008606250031e-05,
      "loss": 0.5367,
      "step": 7585
    },
    {
      "epoch": 1.559461404049748,
      "grad_norm": 0.189329594373703,
      "learning_rate": 4.429087374064029e-05,
      "loss": 0.5271,
      "step": 7586
    },
    {
      "epoch": 1.5596669750231267,
      "grad_norm": 0.1947106570005417,
      "learning_rate": 4.428088689121282e-05,
      "loss": 0.5415,
      "step": 7587
    },
    {
      "epoch": 1.5598725459965053,
      "grad_norm": 0.19340308010578156,
      "learning_rate": 4.427090007721267e-05,
      "loss": 0.5465,
      "step": 7588
    },
    {
      "epoch": 1.5600781169698839,
      "grad_norm": 0.19165843725204468,
      "learning_rate": 4.4260913299131885e-05,
      "loss": 0.5478,
      "step": 7589
    },
    {
      "epoch": 1.5602836879432624,
      "grad_norm": 0.20227845013141632,
      "learning_rate": 4.425092655746244e-05,
      "loss": 0.5432,
      "step": 7590
    },
    {
      "epoch": 1.560489258916641,
      "grad_norm": 0.20343764126300812,
      "learning_rate": 4.424093985269635e-05,
      "loss": 0.5508,
      "step": 7591
    },
    {
      "epoch": 1.5606948298900196,
      "grad_norm": 0.19420337677001953,
      "learning_rate": 4.423095318532561e-05,
      "loss": 0.5483,
      "step": 7592
    },
    {
      "epoch": 1.5609004008633982,
      "grad_norm": 0.19176806509494781,
      "learning_rate": 4.42209665558422e-05,
      "loss": 0.5431,
      "step": 7593
    },
    {
      "epoch": 1.5611059718367768,
      "grad_norm": 0.1622324138879776,
      "learning_rate": 4.421097996473813e-05,
      "loss": 0.5213,
      "step": 7594
    },
    {
      "epoch": 1.5613115428101552,
      "grad_norm": 0.1601867824792862,
      "learning_rate": 4.420099341250538e-05,
      "loss": 0.5538,
      "step": 7595
    },
    {
      "epoch": 1.5615171137835338,
      "grad_norm": 0.1894841194152832,
      "learning_rate": 4.4191006899635964e-05,
      "loss": 0.5515,
      "step": 7596
    },
    {
      "epoch": 1.5617226847569123,
      "grad_norm": 0.15804892778396606,
      "learning_rate": 4.418102042662184e-05,
      "loss": 0.493,
      "step": 7597
    },
    {
      "epoch": 1.561928255730291,
      "grad_norm": 0.15905854105949402,
      "learning_rate": 4.417103399395503e-05,
      "loss": 0.5405,
      "step": 7598
    },
    {
      "epoch": 1.5621338267036693,
      "grad_norm": 0.19244399666786194,
      "learning_rate": 4.4161047602127494e-05,
      "loss": 0.5372,
      "step": 7599
    },
    {
      "epoch": 1.5623393976770479,
      "grad_norm": 0.18696913123130798,
      "learning_rate": 4.415106125163123e-05,
      "loss": 0.534,
      "step": 7600
    },
    {
      "epoch": 1.5625449686504265,
      "grad_norm": 0.19538486003875732,
      "learning_rate": 4.41410749429582e-05,
      "loss": 0.5348,
      "step": 7601
    },
    {
      "epoch": 1.562750539623805,
      "grad_norm": 0.19690623879432678,
      "learning_rate": 4.4131088676600386e-05,
      "loss": 0.5461,
      "step": 7602
    },
    {
      "epoch": 1.5629561105971836,
      "grad_norm": 0.19831502437591553,
      "learning_rate": 4.412110245304978e-05,
      "loss": 0.5541,
      "step": 7603
    },
    {
      "epoch": 1.5631616815705622,
      "grad_norm": 0.20122960209846497,
      "learning_rate": 4.411111627279835e-05,
      "loss": 0.5473,
      "step": 7604
    },
    {
      "epoch": 1.5633672525439408,
      "grad_norm": 0.1640729159116745,
      "learning_rate": 4.410113013633807e-05,
      "loss": 0.5054,
      "step": 7605
    },
    {
      "epoch": 1.5635728235173194,
      "grad_norm": 0.16052688658237457,
      "learning_rate": 4.4091144044160905e-05,
      "loss": 0.5322,
      "step": 7606
    },
    {
      "epoch": 1.563778394490698,
      "grad_norm": 0.19739840924739838,
      "learning_rate": 4.408115799675881e-05,
      "loss": 0.5606,
      "step": 7607
    },
    {
      "epoch": 1.5639839654640766,
      "grad_norm": 0.19876334071159363,
      "learning_rate": 4.407117199462378e-05,
      "loss": 0.5147,
      "step": 7608
    },
    {
      "epoch": 1.5641895364374552,
      "grad_norm": 0.19272910058498383,
      "learning_rate": 4.406118603824775e-05,
      "loss": 0.5433,
      "step": 7609
    },
    {
      "epoch": 1.5643951074108335,
      "grad_norm": 0.1927374005317688,
      "learning_rate": 4.4051200128122715e-05,
      "loss": 0.5351,
      "step": 7610
    },
    {
      "epoch": 1.5646006783842121,
      "grad_norm": 0.19942370057106018,
      "learning_rate": 4.404121426474061e-05,
      "loss": 0.543,
      "step": 7611
    },
    {
      "epoch": 1.5648062493575907,
      "grad_norm": 0.15870188176631927,
      "learning_rate": 4.4031228448593395e-05,
      "loss": 0.5113,
      "step": 7612
    },
    {
      "epoch": 1.5650118203309693,
      "grad_norm": 0.1612454354763031,
      "learning_rate": 4.402124268017303e-05,
      "loss": 0.54,
      "step": 7613
    },
    {
      "epoch": 1.5652173913043477,
      "grad_norm": 0.19843849539756775,
      "learning_rate": 4.4011256959971465e-05,
      "loss": 0.5468,
      "step": 7614
    },
    {
      "epoch": 1.5654229622777263,
      "grad_norm": 0.1602935492992401,
      "learning_rate": 4.400127128848065e-05,
      "loss": 0.5168,
      "step": 7615
    },
    {
      "epoch": 1.5656285332511048,
      "grad_norm": 0.18167522549629211,
      "learning_rate": 4.39912856661925e-05,
      "loss": 0.5568,
      "step": 7616
    },
    {
      "epoch": 1.5658341042244834,
      "grad_norm": 0.16602426767349243,
      "learning_rate": 4.398130009359902e-05,
      "loss": 0.5254,
      "step": 7617
    },
    {
      "epoch": 1.566039675197862,
      "grad_norm": 0.16260112822055817,
      "learning_rate": 4.397131457119212e-05,
      "loss": 0.5646,
      "step": 7618
    },
    {
      "epoch": 1.5662452461712406,
      "grad_norm": 0.19944046437740326,
      "learning_rate": 4.396132909946373e-05,
      "loss": 0.5459,
      "step": 7619
    },
    {
      "epoch": 1.5664508171446192,
      "grad_norm": 0.19292668998241425,
      "learning_rate": 4.3951343678905816e-05,
      "loss": 0.5421,
      "step": 7620
    },
    {
      "epoch": 1.5666563881179978,
      "grad_norm": 0.19421285390853882,
      "learning_rate": 4.3941358310010295e-05,
      "loss": 0.5649,
      "step": 7621
    },
    {
      "epoch": 1.5668619590913764,
      "grad_norm": 0.1894664317369461,
      "learning_rate": 4.393137299326911e-05,
      "loss": 0.5683,
      "step": 7622
    },
    {
      "epoch": 1.567067530064755,
      "grad_norm": 0.18972072005271912,
      "learning_rate": 4.392138772917415e-05,
      "loss": 0.5459,
      "step": 7623
    },
    {
      "epoch": 1.5672731010381336,
      "grad_norm": 0.16586807370185852,
      "learning_rate": 4.39114025182174e-05,
      "loss": 0.5409,
      "step": 7624
    },
    {
      "epoch": 1.567478672011512,
      "grad_norm": 0.13293050229549408,
      "learning_rate": 4.390141736089076e-05,
      "loss": 0.5069,
      "step": 7625
    },
    {
      "epoch": 1.5676842429848905,
      "grad_norm": 0.15764681994915009,
      "learning_rate": 4.389143225768616e-05,
      "loss": 0.5475,
      "step": 7626
    },
    {
      "epoch": 1.567889813958269,
      "grad_norm": 0.1995992809534073,
      "learning_rate": 4.3881447209095495e-05,
      "loss": 0.5426,
      "step": 7627
    },
    {
      "epoch": 1.5680953849316477,
      "grad_norm": 0.1619638353586197,
      "learning_rate": 4.3871462215610696e-05,
      "loss": 0.5103,
      "step": 7628
    },
    {
      "epoch": 1.568300955905026,
      "grad_norm": 0.16626045107841492,
      "learning_rate": 4.386147727772369e-05,
      "loss": 0.5347,
      "step": 7629
    },
    {
      "epoch": 1.5685065268784046,
      "grad_norm": 0.20278498530387878,
      "learning_rate": 4.3851492395926364e-05,
      "loss": 0.5572,
      "step": 7630
    },
    {
      "epoch": 1.5687120978517832,
      "grad_norm": 0.2107708603143692,
      "learning_rate": 4.384150757071064e-05,
      "loss": 0.5623,
      "step": 7631
    },
    {
      "epoch": 1.5689176688251618,
      "grad_norm": 0.19431017339229584,
      "learning_rate": 4.383152280256844e-05,
      "loss": 0.5589,
      "step": 7632
    },
    {
      "epoch": 1.5691232397985404,
      "grad_norm": 0.1882307529449463,
      "learning_rate": 4.3821538091991645e-05,
      "loss": 0.5481,
      "step": 7633
    },
    {
      "epoch": 1.569328810771919,
      "grad_norm": 0.19112688302993774,
      "learning_rate": 4.3811553439472166e-05,
      "loss": 0.5419,
      "step": 7634
    },
    {
      "epoch": 1.5695343817452976,
      "grad_norm": 0.19997398555278778,
      "learning_rate": 4.380156884550188e-05,
      "loss": 0.5692,
      "step": 7635
    },
    {
      "epoch": 1.5697399527186762,
      "grad_norm": 0.19339673221111298,
      "learning_rate": 4.3791584310572686e-05,
      "loss": 0.5366,
      "step": 7636
    },
    {
      "epoch": 1.5699455236920548,
      "grad_norm": 0.18707948923110962,
      "learning_rate": 4.3781599835176504e-05,
      "loss": 0.5303,
      "step": 7637
    },
    {
      "epoch": 1.5701510946654333,
      "grad_norm": 0.1914735585451126,
      "learning_rate": 4.37716154198052e-05,
      "loss": 0.5569,
      "step": 7638
    },
    {
      "epoch": 1.570356665638812,
      "grad_norm": 0.19773781299591064,
      "learning_rate": 4.376163106495067e-05,
      "loss": 0.5482,
      "step": 7639
    },
    {
      "epoch": 1.5705622366121905,
      "grad_norm": 0.17177283763885498,
      "learning_rate": 4.3751646771104774e-05,
      "loss": 0.5203,
      "step": 7640
    },
    {
      "epoch": 1.5707678075855689,
      "grad_norm": 0.16656096279621124,
      "learning_rate": 4.374166253875942e-05,
      "loss": 0.5528,
      "step": 7641
    },
    {
      "epoch": 1.5709733785589475,
      "grad_norm": 0.19667677581310272,
      "learning_rate": 4.3731678368406464e-05,
      "loss": 0.5588,
      "step": 7642
    },
    {
      "epoch": 1.571178949532326,
      "grad_norm": 0.15893961489200592,
      "learning_rate": 4.372169426053777e-05,
      "loss": 0.5165,
      "step": 7643
    },
    {
      "epoch": 1.5713845205057044,
      "grad_norm": 0.15546555817127228,
      "learning_rate": 4.371171021564525e-05,
      "loss": 0.5631,
      "step": 7644
    },
    {
      "epoch": 1.571590091479083,
      "grad_norm": 0.16072389483451843,
      "learning_rate": 4.3701726234220744e-05,
      "loss": 0.5273,
      "step": 7645
    },
    {
      "epoch": 1.5717956624524616,
      "grad_norm": 0.15544024109840393,
      "learning_rate": 4.369174231675611e-05,
      "loss": 0.5508,
      "step": 7646
    },
    {
      "epoch": 1.5720012334258402,
      "grad_norm": 0.15451103448867798,
      "learning_rate": 4.3681758463743225e-05,
      "loss": 0.5066,
      "step": 7647
    },
    {
      "epoch": 1.5722068043992188,
      "grad_norm": 0.15433375537395477,
      "learning_rate": 4.367177467567394e-05,
      "loss": 0.5444,
      "step": 7648
    },
    {
      "epoch": 1.5724123753725974,
      "grad_norm": 0.16077595949172974,
      "learning_rate": 4.36617909530401e-05,
      "loss": 0.5234,
      "step": 7649
    },
    {
      "epoch": 1.572617946345976,
      "grad_norm": 0.15683984756469727,
      "learning_rate": 4.3651807296333555e-05,
      "loss": 0.5316,
      "step": 7650
    },
    {
      "epoch": 1.5728235173193545,
      "grad_norm": 0.1868003010749817,
      "learning_rate": 4.3641823706046186e-05,
      "loss": 0.5313,
      "step": 7651
    },
    {
      "epoch": 1.5730290882927331,
      "grad_norm": 0.1609300971031189,
      "learning_rate": 4.363184018266979e-05,
      "loss": 0.5225,
      "step": 7652
    },
    {
      "epoch": 1.5732346592661117,
      "grad_norm": 0.15994325280189514,
      "learning_rate": 4.362185672669626e-05,
      "loss": 0.5298,
      "step": 7653
    },
    {
      "epoch": 1.5734402302394903,
      "grad_norm": 0.1932908594608307,
      "learning_rate": 4.3611873338617393e-05,
      "loss": 0.5419,
      "step": 7654
    },
    {
      "epoch": 1.573645801212869,
      "grad_norm": 0.1590869426727295,
      "learning_rate": 4.3601890018925046e-05,
      "loss": 0.5014,
      "step": 7655
    },
    {
      "epoch": 1.5738513721862473,
      "grad_norm": 0.16261689364910126,
      "learning_rate": 4.359190676811104e-05,
      "loss": 0.5592,
      "step": 7656
    },
    {
      "epoch": 1.5740569431596259,
      "grad_norm": 0.20458675920963287,
      "learning_rate": 4.3581923586667196e-05,
      "loss": 0.5704,
      "step": 7657
    },
    {
      "epoch": 1.5742625141330044,
      "grad_norm": 0.189193993806839,
      "learning_rate": 4.3571940475085355e-05,
      "loss": 0.5508,
      "step": 7658
    },
    {
      "epoch": 1.574468085106383,
      "grad_norm": 0.18907295167446136,
      "learning_rate": 4.356195743385734e-05,
      "loss": 0.5312,
      "step": 7659
    },
    {
      "epoch": 1.5746736560797614,
      "grad_norm": 0.18863658607006073,
      "learning_rate": 4.3551974463474956e-05,
      "loss": 0.5668,
      "step": 7660
    },
    {
      "epoch": 1.57487922705314,
      "grad_norm": 0.1917717009782791,
      "learning_rate": 4.354199156443002e-05,
      "loss": 0.5327,
      "step": 7661
    },
    {
      "epoch": 1.5750847980265186,
      "grad_norm": 0.19521358609199524,
      "learning_rate": 4.353200873721435e-05,
      "loss": 0.5242,
      "step": 7662
    },
    {
      "epoch": 1.5752903689998972,
      "grad_norm": 0.19762447476387024,
      "learning_rate": 4.352202598231975e-05,
      "loss": 0.5609,
      "step": 7663
    },
    {
      "epoch": 1.5754959399732757,
      "grad_norm": 0.19108183681964874,
      "learning_rate": 4.3512043300237994e-05,
      "loss": 0.5453,
      "step": 7664
    },
    {
      "epoch": 1.5757015109466543,
      "grad_norm": 0.19864460825920105,
      "learning_rate": 4.3502060691460935e-05,
      "loss": 0.5685,
      "step": 7665
    },
    {
      "epoch": 1.575907081920033,
      "grad_norm": 0.1909678727388382,
      "learning_rate": 4.349207815648035e-05,
      "loss": 0.5466,
      "step": 7666
    },
    {
      "epoch": 1.5761126528934115,
      "grad_norm": 0.19452133774757385,
      "learning_rate": 4.348209569578802e-05,
      "loss": 0.5522,
      "step": 7667
    },
    {
      "epoch": 1.57631822386679,
      "grad_norm": 0.1838688850402832,
      "learning_rate": 4.3472113309875744e-05,
      "loss": 0.5366,
      "step": 7668
    },
    {
      "epoch": 1.5765237948401687,
      "grad_norm": 0.18900097906589508,
      "learning_rate": 4.3462130999235295e-05,
      "loss": 0.5301,
      "step": 7669
    },
    {
      "epoch": 1.5767293658135473,
      "grad_norm": 0.19407951831817627,
      "learning_rate": 4.345214876435847e-05,
      "loss": 0.544,
      "step": 7670
    },
    {
      "epoch": 1.5769349367869256,
      "grad_norm": 0.19032980501651764,
      "learning_rate": 4.344216660573703e-05,
      "loss": 0.5525,
      "step": 7671
    },
    {
      "epoch": 1.5771405077603042,
      "grad_norm": 0.19637268781661987,
      "learning_rate": 4.343218452386277e-05,
      "loss": 0.5492,
      "step": 7672
    },
    {
      "epoch": 1.5773460787336828,
      "grad_norm": 0.18958862125873566,
      "learning_rate": 4.342220251922744e-05,
      "loss": 0.5393,
      "step": 7673
    },
    {
      "epoch": 1.5775516497070614,
      "grad_norm": 0.1648726463317871,
      "learning_rate": 4.341222059232283e-05,
      "loss": 0.4955,
      "step": 7674
    },
    {
      "epoch": 1.5777572206804398,
      "grad_norm": 0.16251088678836823,
      "learning_rate": 4.340223874364069e-05,
      "loss": 0.5312,
      "step": 7675
    },
    {
      "epoch": 1.5779627916538184,
      "grad_norm": 0.19399689137935638,
      "learning_rate": 4.3392256973672776e-05,
      "loss": 0.5527,
      "step": 7676
    },
    {
      "epoch": 1.578168362627197,
      "grad_norm": 0.1864946484565735,
      "learning_rate": 4.338227528291085e-05,
      "loss": 0.5352,
      "step": 7677
    },
    {
      "epoch": 1.5783739336005755,
      "grad_norm": 0.19393518567085266,
      "learning_rate": 4.337229367184664e-05,
      "loss": 0.5451,
      "step": 7678
    },
    {
      "epoch": 1.5785795045739541,
      "grad_norm": 0.19147159159183502,
      "learning_rate": 4.3362312140971927e-05,
      "loss": 0.5515,
      "step": 7679
    },
    {
      "epoch": 1.5787850755473327,
      "grad_norm": 0.19576434791088104,
      "learning_rate": 4.3352330690778445e-05,
      "loss": 0.5504,
      "step": 7680
    },
    {
      "epoch": 1.5789906465207113,
      "grad_norm": 0.19198796153068542,
      "learning_rate": 4.3342349321757934e-05,
      "loss": 0.5452,
      "step": 7681
    },
    {
      "epoch": 1.57919621749409,
      "grad_norm": 0.19014614820480347,
      "learning_rate": 4.3332368034402105e-05,
      "loss": 0.5615,
      "step": 7682
    },
    {
      "epoch": 1.5794017884674685,
      "grad_norm": 0.1940838247537613,
      "learning_rate": 4.332238682920272e-05,
      "loss": 0.5369,
      "step": 7683
    },
    {
      "epoch": 1.579607359440847,
      "grad_norm": 0.1929844617843628,
      "learning_rate": 4.3312405706651496e-05,
      "loss": 0.5502,
      "step": 7684
    },
    {
      "epoch": 1.5798129304142257,
      "grad_norm": 0.1682363599538803,
      "learning_rate": 4.330242466724014e-05,
      "loss": 0.5245,
      "step": 7685
    },
    {
      "epoch": 1.580018501387604,
      "grad_norm": 0.14466369152069092,
      "learning_rate": 4.32924437114604e-05,
      "loss": 0.5162,
      "step": 7686
    },
    {
      "epoch": 1.5802240723609826,
      "grad_norm": 0.16307014226913452,
      "learning_rate": 4.3282462839803976e-05,
      "loss": 0.5432,
      "step": 7687
    },
    {
      "epoch": 1.5804296433343612,
      "grad_norm": 0.1943131685256958,
      "learning_rate": 4.3272482052762584e-05,
      "loss": 0.5377,
      "step": 7688
    },
    {
      "epoch": 1.5806352143077398,
      "grad_norm": 0.1945241242647171,
      "learning_rate": 4.3262501350827925e-05,
      "loss": 0.5425,
      "step": 7689
    },
    {
      "epoch": 1.5808407852811182,
      "grad_norm": 0.1876905858516693,
      "learning_rate": 4.3252520734491706e-05,
      "loss": 0.5435,
      "step": 7690
    },
    {
      "epoch": 1.5810463562544967,
      "grad_norm": 0.18484771251678467,
      "learning_rate": 4.3242540204245625e-05,
      "loss": 0.5292,
      "step": 7691
    },
    {
      "epoch": 1.5812519272278753,
      "grad_norm": 0.19600705802440643,
      "learning_rate": 4.323255976058135e-05,
      "loss": 0.5593,
      "step": 7692
    },
    {
      "epoch": 1.581457498201254,
      "grad_norm": 0.18599645793437958,
      "learning_rate": 4.3222579403990614e-05,
      "loss": 0.5226,
      "step": 7693
    },
    {
      "epoch": 1.5816630691746325,
      "grad_norm": 0.18677088618278503,
      "learning_rate": 4.321259913496508e-05,
      "loss": 0.517,
      "step": 7694
    },
    {
      "epoch": 1.581868640148011,
      "grad_norm": 0.19142663478851318,
      "learning_rate": 4.3202618953996425e-05,
      "loss": 0.5486,
      "step": 7695
    },
    {
      "epoch": 1.5820742111213897,
      "grad_norm": 0.19013050198554993,
      "learning_rate": 4.319263886157634e-05,
      "loss": 0.5584,
      "step": 7696
    },
    {
      "epoch": 1.5822797820947683,
      "grad_norm": 0.1859898418188095,
      "learning_rate": 4.31826588581965e-05,
      "loss": 0.5319,
      "step": 7697
    },
    {
      "epoch": 1.5824853530681469,
      "grad_norm": 0.19170920550823212,
      "learning_rate": 4.3172678944348556e-05,
      "loss": 0.5519,
      "step": 7698
    },
    {
      "epoch": 1.5826909240415254,
      "grad_norm": 0.21785251796245575,
      "learning_rate": 4.3162699120524165e-05,
      "loss": 0.5545,
      "step": 7699
    },
    {
      "epoch": 1.582896495014904,
      "grad_norm": 0.19362372159957886,
      "learning_rate": 4.3152719387215016e-05,
      "loss": 0.5692,
      "step": 7700
    },
    {
      "epoch": 1.5831020659882824,
      "grad_norm": 0.19510303437709808,
      "learning_rate": 4.3142739744912754e-05,
      "loss": 0.53,
      "step": 7701
    },
    {
      "epoch": 1.583307636961661,
      "grad_norm": 0.18352247774600983,
      "learning_rate": 4.3132760194109017e-05,
      "loss": 0.5042,
      "step": 7702
    },
    {
      "epoch": 1.5835132079350396,
      "grad_norm": 0.17170487344264984,
      "learning_rate": 4.312278073529546e-05,
      "loss": 0.5543,
      "step": 7703
    },
    {
      "epoch": 1.5837187789084182,
      "grad_norm": 0.19856008887290955,
      "learning_rate": 4.311280136896372e-05,
      "loss": 0.5696,
      "step": 7704
    },
    {
      "epoch": 1.5839243498817965,
      "grad_norm": 0.19567757844924927,
      "learning_rate": 4.310282209560543e-05,
      "loss": 0.5493,
      "step": 7705
    },
    {
      "epoch": 1.5841299208551751,
      "grad_norm": 0.20032745599746704,
      "learning_rate": 4.309284291571223e-05,
      "loss": 0.5603,
      "step": 7706
    },
    {
      "epoch": 1.5843354918285537,
      "grad_norm": 0.19758538901805878,
      "learning_rate": 4.308286382977575e-05,
      "loss": 0.5574,
      "step": 7707
    },
    {
      "epoch": 1.5845410628019323,
      "grad_norm": 0.1984431892633438,
      "learning_rate": 4.3072884838287605e-05,
      "loss": 0.5502,
      "step": 7708
    },
    {
      "epoch": 1.5847466337753109,
      "grad_norm": 0.18602418899536133,
      "learning_rate": 4.306290594173942e-05,
      "loss": 0.5592,
      "step": 7709
    },
    {
      "epoch": 1.5849522047486895,
      "grad_norm": 0.19030845165252686,
      "learning_rate": 4.3052927140622814e-05,
      "loss": 0.5444,
      "step": 7710
    },
    {
      "epoch": 1.585157775722068,
      "grad_norm": 0.1725304126739502,
      "learning_rate": 4.304294843542938e-05,
      "loss": 0.5359,
      "step": 7711
    },
    {
      "epoch": 1.5853633466954467,
      "grad_norm": 0.16047422587871552,
      "learning_rate": 4.3032969826650714e-05,
      "loss": 0.5433,
      "step": 7712
    },
    {
      "epoch": 1.5855689176688252,
      "grad_norm": 0.19161836802959442,
      "learning_rate": 4.302299131477844e-05,
      "loss": 0.5271,
      "step": 7713
    },
    {
      "epoch": 1.5857744886422038,
      "grad_norm": 0.15936709940433502,
      "learning_rate": 4.301301290030415e-05,
      "loss": 0.542,
      "step": 7714
    },
    {
      "epoch": 1.5859800596155824,
      "grad_norm": 0.16099698841571808,
      "learning_rate": 4.3003034583719435e-05,
      "loss": 0.5483,
      "step": 7715
    },
    {
      "epoch": 1.586185630588961,
      "grad_norm": 0.19221562147140503,
      "learning_rate": 4.299305636551585e-05,
      "loss": 0.5501,
      "step": 7716
    },
    {
      "epoch": 1.5863912015623394,
      "grad_norm": 0.15940634906291962,
      "learning_rate": 4.2983078246185015e-05,
      "loss": 0.5228,
      "step": 7717
    },
    {
      "epoch": 1.586596772535718,
      "grad_norm": 0.15413826704025269,
      "learning_rate": 4.297310022621849e-05,
      "loss": 0.5451,
      "step": 7718
    },
    {
      "epoch": 1.5868023435090965,
      "grad_norm": 0.18997204303741455,
      "learning_rate": 4.2963122306107816e-05,
      "loss": 0.5558,
      "step": 7719
    },
    {
      "epoch": 1.587007914482475,
      "grad_norm": 0.18921561539173126,
      "learning_rate": 4.295314448634461e-05,
      "loss": 0.5325,
      "step": 7720
    },
    {
      "epoch": 1.5872134854558535,
      "grad_norm": 0.19362856447696686,
      "learning_rate": 4.29431667674204e-05,
      "loss": 0.5475,
      "step": 7721
    },
    {
      "epoch": 1.587419056429232,
      "grad_norm": 0.1657908409833908,
      "learning_rate": 4.293318914982676e-05,
      "loss": 0.4907,
      "step": 7722
    },
    {
      "epoch": 1.5876246274026107,
      "grad_norm": 0.16281838715076447,
      "learning_rate": 4.2923211634055226e-05,
      "loss": 0.5385,
      "step": 7723
    },
    {
      "epoch": 1.5878301983759893,
      "grad_norm": 0.19449788331985474,
      "learning_rate": 4.291323422059735e-05,
      "loss": 0.5253,
      "step": 7724
    },
    {
      "epoch": 1.5880357693493679,
      "grad_norm": 0.19563239812850952,
      "learning_rate": 4.2903256909944665e-05,
      "loss": 0.5349,
      "step": 7725
    },
    {
      "epoch": 1.5882413403227464,
      "grad_norm": 0.19291435182094574,
      "learning_rate": 4.28932797025887e-05,
      "loss": 0.5294,
      "step": 7726
    },
    {
      "epoch": 1.588446911296125,
      "grad_norm": 0.21474219858646393,
      "learning_rate": 4.288330259902101e-05,
      "loss": 0.5389,
      "step": 7727
    },
    {
      "epoch": 1.5886524822695036,
      "grad_norm": 0.19437165558338165,
      "learning_rate": 4.28733255997331e-05,
      "loss": 0.5459,
      "step": 7728
    },
    {
      "epoch": 1.5888580532428822,
      "grad_norm": 0.18734323978424072,
      "learning_rate": 4.2863348705216516e-05,
      "loss": 0.5381,
      "step": 7729
    },
    {
      "epoch": 1.5890636242162608,
      "grad_norm": 0.19250360131263733,
      "learning_rate": 4.285337191596274e-05,
      "loss": 0.5357,
      "step": 7730
    },
    {
      "epoch": 1.5892691951896394,
      "grad_norm": 0.19198143482208252,
      "learning_rate": 4.284339523246331e-05,
      "loss": 0.5375,
      "step": 7731
    },
    {
      "epoch": 1.5894747661630177,
      "grad_norm": 0.18675874173641205,
      "learning_rate": 4.2833418655209703e-05,
      "loss": 0.5385,
      "step": 7732
    },
    {
      "epoch": 1.5896803371363963,
      "grad_norm": 0.19431853294372559,
      "learning_rate": 4.282344218469342e-05,
      "loss": 0.5468,
      "step": 7733
    },
    {
      "epoch": 1.589885908109775,
      "grad_norm": 0.16220088303089142,
      "learning_rate": 4.281346582140599e-05,
      "loss": 0.5035,
      "step": 7734
    },
    {
      "epoch": 1.5900914790831535,
      "grad_norm": 0.1646573841571808,
      "learning_rate": 4.2803489565838874e-05,
      "loss": 0.5567,
      "step": 7735
    },
    {
      "epoch": 1.5902970500565319,
      "grad_norm": 0.16230642795562744,
      "learning_rate": 4.2793513418483565e-05,
      "loss": 0.5234,
      "step": 7736
    },
    {
      "epoch": 1.5905026210299105,
      "grad_norm": 0.15772514045238495,
      "learning_rate": 4.2783537379831524e-05,
      "loss": 0.5393,
      "step": 7737
    },
    {
      "epoch": 1.590708192003289,
      "grad_norm": 0.18681733310222626,
      "learning_rate": 4.277356145037425e-05,
      "loss": 0.5332,
      "step": 7738
    },
    {
      "epoch": 1.5909137629766676,
      "grad_norm": 0.19121171534061432,
      "learning_rate": 4.276358563060319e-05,
      "loss": 0.5351,
      "step": 7739
    },
    {
      "epoch": 1.5911193339500462,
      "grad_norm": 0.2163754552602768,
      "learning_rate": 4.27536099210098e-05,
      "loss": 0.5121,
      "step": 7740
    },
    {
      "epoch": 1.5913249049234248,
      "grad_norm": 0.17165131866931915,
      "learning_rate": 4.274363432208556e-05,
      "loss": 0.5342,
      "step": 7741
    },
    {
      "epoch": 1.5915304758968034,
      "grad_norm": 0.17426596581935883,
      "learning_rate": 4.273365883432192e-05,
      "loss": 0.5432,
      "step": 7742
    },
    {
      "epoch": 1.591736046870182,
      "grad_norm": 0.16686050593852997,
      "learning_rate": 4.272368345821031e-05,
      "loss": 0.5046,
      "step": 7743
    },
    {
      "epoch": 1.5919416178435606,
      "grad_norm": 0.1610487550497055,
      "learning_rate": 4.2713708194242184e-05,
      "loss": 0.5472,
      "step": 7744
    },
    {
      "epoch": 1.5921471888169392,
      "grad_norm": 0.1983231157064438,
      "learning_rate": 4.270373304290897e-05,
      "loss": 0.5526,
      "step": 7745
    },
    {
      "epoch": 1.5923527597903178,
      "grad_norm": 0.19409148395061493,
      "learning_rate": 4.2693758004702076e-05,
      "loss": 0.5521,
      "step": 7746
    },
    {
      "epoch": 1.5925583307636961,
      "grad_norm": 0.18833288550376892,
      "learning_rate": 4.268378308011296e-05,
      "loss": 0.5263,
      "step": 7747
    },
    {
      "epoch": 1.5927639017370747,
      "grad_norm": 0.18587639927864075,
      "learning_rate": 4.2673808269633016e-05,
      "loss": 0.5297,
      "step": 7748
    },
    {
      "epoch": 1.5929694727104533,
      "grad_norm": 0.18532033264636993,
      "learning_rate": 4.266383357375367e-05,
      "loss": 0.5309,
      "step": 7749
    },
    {
      "epoch": 1.593175043683832,
      "grad_norm": 0.1910453587770462,
      "learning_rate": 4.2653858992966336e-05,
      "loss": 0.5683,
      "step": 7750
    },
    {
      "epoch": 1.5933806146572103,
      "grad_norm": 0.19505764544010162,
      "learning_rate": 4.26438845277624e-05,
      "loss": 0.5421,
      "step": 7751
    },
    {
      "epoch": 1.5935861856305888,
      "grad_norm": 0.19671325385570526,
      "learning_rate": 4.263391017863326e-05,
      "loss": 0.5408,
      "step": 7752
    },
    {
      "epoch": 1.5937917566039674,
      "grad_norm": 0.1978052705526352,
      "learning_rate": 4.26239359460703e-05,
      "loss": 0.5512,
      "step": 7753
    },
    {
      "epoch": 1.593997327577346,
      "grad_norm": 0.1925462931394577,
      "learning_rate": 4.26139618305649e-05,
      "loss": 0.5285,
      "step": 7754
    },
    {
      "epoch": 1.5942028985507246,
      "grad_norm": 0.1875825971364975,
      "learning_rate": 4.260398783260846e-05,
      "loss": 0.5481,
      "step": 7755
    },
    {
      "epoch": 1.5944084695241032,
      "grad_norm": 0.1970067173242569,
      "learning_rate": 4.2594013952692353e-05,
      "loss": 0.528,
      "step": 7756
    },
    {
      "epoch": 1.5946140404974818,
      "grad_norm": 0.19316576421260834,
      "learning_rate": 4.258404019130792e-05,
      "loss": 0.5348,
      "step": 7757
    },
    {
      "epoch": 1.5948196114708604,
      "grad_norm": 0.19398510456085205,
      "learning_rate": 4.257406654894653e-05,
      "loss": 0.5404,
      "step": 7758
    },
    {
      "epoch": 1.595025182444239,
      "grad_norm": 0.19227631390094757,
      "learning_rate": 4.256409302609956e-05,
      "loss": 0.5298,
      "step": 7759
    },
    {
      "epoch": 1.5952307534176176,
      "grad_norm": 0.16509932279586792,
      "learning_rate": 4.255411962325833e-05,
      "loss": 0.5097,
      "step": 7760
    },
    {
      "epoch": 1.5954363243909961,
      "grad_norm": 0.16759321093559265,
      "learning_rate": 4.254414634091418e-05,
      "loss": 0.5725,
      "step": 7761
    },
    {
      "epoch": 1.5956418953643745,
      "grad_norm": 0.19898711144924164,
      "learning_rate": 4.253417317955848e-05,
      "loss": 0.5409,
      "step": 7762
    },
    {
      "epoch": 1.595847466337753,
      "grad_norm": 0.19673512876033783,
      "learning_rate": 4.252420013968254e-05,
      "loss": 0.5403,
      "step": 7763
    },
    {
      "epoch": 1.5960530373111317,
      "grad_norm": 0.19727066159248352,
      "learning_rate": 4.251422722177769e-05,
      "loss": 0.5597,
      "step": 7764
    },
    {
      "epoch": 1.5962586082845103,
      "grad_norm": 0.16265854239463806,
      "learning_rate": 4.250425442633524e-05,
      "loss": 0.5227,
      "step": 7765
    },
    {
      "epoch": 1.5964641792578886,
      "grad_norm": 0.15699994564056396,
      "learning_rate": 4.2494281753846515e-05,
      "loss": 0.5637,
      "step": 7766
    },
    {
      "epoch": 1.5966697502312672,
      "grad_norm": 0.1968710571527481,
      "learning_rate": 4.2484309204802816e-05,
      "loss": 0.5566,
      "step": 7767
    },
    {
      "epoch": 1.5968753212046458,
      "grad_norm": 0.19877804815769196,
      "learning_rate": 4.2474336779695427e-05,
      "loss": 0.5463,
      "step": 7768
    },
    {
      "epoch": 1.5970808921780244,
      "grad_norm": 0.18838095664978027,
      "learning_rate": 4.246436447901567e-05,
      "loss": 0.5483,
      "step": 7769
    },
    {
      "epoch": 1.597286463151403,
      "grad_norm": 0.18883812427520752,
      "learning_rate": 4.245439230325483e-05,
      "loss": 0.5465,
      "step": 7770
    },
    {
      "epoch": 1.5974920341247816,
      "grad_norm": 0.20267321169376373,
      "learning_rate": 4.244442025290418e-05,
      "loss": 0.5651,
      "step": 7771
    },
    {
      "epoch": 1.5976976050981602,
      "grad_norm": 0.19783546030521393,
      "learning_rate": 4.2434448328455e-05,
      "loss": 0.5623,
      "step": 7772
    },
    {
      "epoch": 1.5979031760715388,
      "grad_norm": 0.20209753513336182,
      "learning_rate": 4.242447653039856e-05,
      "loss": 0.5378,
      "step": 7773
    },
    {
      "epoch": 1.5981087470449173,
      "grad_norm": 0.16521279513835907,
      "learning_rate": 4.2414504859226125e-05,
      "loss": 0.4948,
      "step": 7774
    },
    {
      "epoch": 1.598314318018296,
      "grad_norm": 0.15857960283756256,
      "learning_rate": 4.240453331542894e-05,
      "loss": 0.5269,
      "step": 7775
    },
    {
      "epoch": 1.5985198889916745,
      "grad_norm": 0.16982486844062805,
      "learning_rate": 4.239456189949828e-05,
      "loss": 0.5311,
      "step": 7776
    },
    {
      "epoch": 1.5987254599650529,
      "grad_norm": 0.15772342681884766,
      "learning_rate": 4.238459061192537e-05,
      "loss": 0.5586,
      "step": 7777
    },
    {
      "epoch": 1.5989310309384315,
      "grad_norm": 0.18286247551441193,
      "learning_rate": 4.2374619453201466e-05,
      "loss": 0.527,
      "step": 7778
    },
    {
      "epoch": 1.59913660191181,
      "grad_norm": 0.19069987535476685,
      "learning_rate": 4.236464842381778e-05,
      "loss": 0.5576,
      "step": 7779
    },
    {
      "epoch": 1.5993421728851887,
      "grad_norm": 0.19216850399971008,
      "learning_rate": 4.235467752426555e-05,
      "loss": 0.5289,
      "step": 7780
    },
    {
      "epoch": 1.599547743858567,
      "grad_norm": 0.1922430843114853,
      "learning_rate": 4.2344706755036e-05,
      "loss": 0.568,
      "step": 7781
    },
    {
      "epoch": 1.5997533148319456,
      "grad_norm": 0.18228840827941895,
      "learning_rate": 4.2334736116620314e-05,
      "loss": 0.531,
      "step": 7782
    },
    {
      "epoch": 1.5999588858053242,
      "grad_norm": 0.18847499787807465,
      "learning_rate": 4.2324765609509746e-05,
      "loss": 0.5421,
      "step": 7783
    },
    {
      "epoch": 1.6001644567787028,
      "grad_norm": 0.1916157454252243,
      "learning_rate": 4.231479523419547e-05,
      "loss": 0.5423,
      "step": 7784
    },
    {
      "epoch": 1.6003700277520814,
      "grad_norm": 0.19695116579532623,
      "learning_rate": 4.230482499116869e-05,
      "loss": 0.5403,
      "step": 7785
    },
    {
      "epoch": 1.60057559872546,
      "grad_norm": 0.20415250957012177,
      "learning_rate": 4.2294854880920575e-05,
      "loss": 0.5381,
      "step": 7786
    },
    {
      "epoch": 1.6007811696988385,
      "grad_norm": 0.20049957931041718,
      "learning_rate": 4.228488490394232e-05,
      "loss": 0.5711,
      "step": 7787
    },
    {
      "epoch": 1.6009867406722171,
      "grad_norm": 0.16379691660404205,
      "learning_rate": 4.227491506072508e-05,
      "loss": 0.5004,
      "step": 7788
    },
    {
      "epoch": 1.6011923116455957,
      "grad_norm": 0.16042593121528625,
      "learning_rate": 4.226494535176005e-05,
      "loss": 0.5595,
      "step": 7789
    },
    {
      "epoch": 1.6013978826189743,
      "grad_norm": 0.18765395879745483,
      "learning_rate": 4.2254975777538386e-05,
      "loss": 0.5608,
      "step": 7790
    },
    {
      "epoch": 1.601603453592353,
      "grad_norm": 0.16303540766239166,
      "learning_rate": 4.224500633855123e-05,
      "loss": 0.5161,
      "step": 7791
    },
    {
      "epoch": 1.6018090245657313,
      "grad_norm": 0.16182848811149597,
      "learning_rate": 4.223503703528973e-05,
      "loss": 0.5586,
      "step": 7792
    },
    {
      "epoch": 1.6020145955391099,
      "grad_norm": 0.1916949301958084,
      "learning_rate": 4.222506786824504e-05,
      "loss": 0.5563,
      "step": 7793
    },
    {
      "epoch": 1.6022201665124884,
      "grad_norm": 0.18221786618232727,
      "learning_rate": 4.221509883790828e-05,
      "loss": 0.5474,
      "step": 7794
    },
    {
      "epoch": 1.602425737485867,
      "grad_norm": 0.1872803419828415,
      "learning_rate": 4.2205129944770574e-05,
      "loss": 0.5405,
      "step": 7795
    },
    {
      "epoch": 1.6026313084592454,
      "grad_norm": 0.1901916116476059,
      "learning_rate": 4.2195161189323064e-05,
      "loss": 0.5595,
      "step": 7796
    },
    {
      "epoch": 1.602836879432624,
      "grad_norm": 0.48569947481155396,
      "learning_rate": 4.2185192572056856e-05,
      "loss": 0.5194,
      "step": 7797
    },
    {
      "epoch": 1.6030424504060026,
      "grad_norm": 0.1648416817188263,
      "learning_rate": 4.217522409346305e-05,
      "loss": 0.5493,
      "step": 7798
    },
    {
      "epoch": 1.6032480213793812,
      "grad_norm": 0.2003573477268219,
      "learning_rate": 4.216525575403275e-05,
      "loss": 0.5516,
      "step": 7799
    },
    {
      "epoch": 1.6034535923527597,
      "grad_norm": 0.16360460221767426,
      "learning_rate": 4.2155287554257056e-05,
      "loss": 0.5167,
      "step": 7800
    },
    {
      "epoch": 1.6036591633261383,
      "grad_norm": 0.12889930605888367,
      "learning_rate": 4.2145319494627034e-05,
      "loss": 0.4986,
      "step": 7801
    },
    {
      "epoch": 1.603864734299517,
      "grad_norm": 0.16201470792293549,
      "learning_rate": 4.213535157563378e-05,
      "loss": 0.5439,
      "step": 7802
    },
    {
      "epoch": 1.6040703052728955,
      "grad_norm": 0.19083839654922485,
      "learning_rate": 4.212538379776837e-05,
      "loss": 0.5315,
      "step": 7803
    },
    {
      "epoch": 1.604275876246274,
      "grad_norm": 0.19757793843746185,
      "learning_rate": 4.211541616152186e-05,
      "loss": 0.5288,
      "step": 7804
    },
    {
      "epoch": 1.6044814472196527,
      "grad_norm": 0.21021337807178497,
      "learning_rate": 4.210544866738532e-05,
      "loss": 0.5579,
      "step": 7805
    },
    {
      "epoch": 1.6046870181930313,
      "grad_norm": 0.18950164318084717,
      "learning_rate": 4.2095481315849796e-05,
      "loss": 0.5447,
      "step": 7806
    },
    {
      "epoch": 1.6048925891664099,
      "grad_norm": 0.18903128802776337,
      "learning_rate": 4.2085514107406326e-05,
      "loss": 0.5478,
      "step": 7807
    },
    {
      "epoch": 1.6050981601397882,
      "grad_norm": 0.1963806450366974,
      "learning_rate": 4.207554704254596e-05,
      "loss": 0.5411,
      "step": 7808
    },
    {
      "epoch": 1.6053037311131668,
      "grad_norm": 0.19509243965148926,
      "learning_rate": 4.20655801217597e-05,
      "loss": 0.5447,
      "step": 7809
    },
    {
      "epoch": 1.6055093020865454,
      "grad_norm": 0.18859466910362244,
      "learning_rate": 4.205561334553862e-05,
      "loss": 0.5434,
      "step": 7810
    },
    {
      "epoch": 1.6057148730599238,
      "grad_norm": 0.1625402718782425,
      "learning_rate": 4.20456467143737e-05,
      "loss": 0.5226,
      "step": 7811
    },
    {
      "epoch": 1.6059204440333024,
      "grad_norm": 0.16071906685829163,
      "learning_rate": 4.203568022875596e-05,
      "loss": 0.5362,
      "step": 7812
    },
    {
      "epoch": 1.606126015006681,
      "grad_norm": 0.19820047914981842,
      "learning_rate": 4.202571388917638e-05,
      "loss": 0.5452,
      "step": 7813
    },
    {
      "epoch": 1.6063315859800595,
      "grad_norm": 0.1983959972858429,
      "learning_rate": 4.2015747696126e-05,
      "loss": 0.526,
      "step": 7814
    },
    {
      "epoch": 1.6065371569534381,
      "grad_norm": 0.19241683185100555,
      "learning_rate": 4.200578165009578e-05,
      "loss": 0.5378,
      "step": 7815
    },
    {
      "epoch": 1.6067427279268167,
      "grad_norm": 0.19365909695625305,
      "learning_rate": 4.199581575157668e-05,
      "loss": 0.5589,
      "step": 7816
    },
    {
      "epoch": 1.6069482989001953,
      "grad_norm": 0.1934269517660141,
      "learning_rate": 4.198585000105971e-05,
      "loss": 0.5438,
      "step": 7817
    },
    {
      "epoch": 1.607153869873574,
      "grad_norm": 0.19813553988933563,
      "learning_rate": 4.1975884399035834e-05,
      "loss": 0.5569,
      "step": 7818
    },
    {
      "epoch": 1.6073594408469525,
      "grad_norm": 0.1831195056438446,
      "learning_rate": 4.1965918945995994e-05,
      "loss": 0.5217,
      "step": 7819
    },
    {
      "epoch": 1.607565011820331,
      "grad_norm": 0.16004779934883118,
      "learning_rate": 4.1955953642431144e-05,
      "loss": 0.5526,
      "step": 7820
    },
    {
      "epoch": 1.6077705827937097,
      "grad_norm": 0.19440321624279022,
      "learning_rate": 4.1945988488832236e-05,
      "loss": 0.5287,
      "step": 7821
    },
    {
      "epoch": 1.6079761537670882,
      "grad_norm": 0.18852464854717255,
      "learning_rate": 4.1936023485690185e-05,
      "loss": 0.526,
      "step": 7822
    },
    {
      "epoch": 1.6081817247404666,
      "grad_norm": 0.18994298577308655,
      "learning_rate": 4.192605863349594e-05,
      "loss": 0.5729,
      "step": 7823
    },
    {
      "epoch": 1.6083872957138452,
      "grad_norm": 0.18983709812164307,
      "learning_rate": 4.191609393274042e-05,
      "loss": 0.5418,
      "step": 7824
    },
    {
      "epoch": 1.6085928666872238,
      "grad_norm": 0.19144746661186218,
      "learning_rate": 4.190612938391454e-05,
      "loss": 0.5502,
      "step": 7825
    },
    {
      "epoch": 1.6087984376606024,
      "grad_norm": 0.18976972997188568,
      "learning_rate": 4.18961649875092e-05,
      "loss": 0.5363,
      "step": 7826
    },
    {
      "epoch": 1.6090040086339807,
      "grad_norm": 0.19141483306884766,
      "learning_rate": 4.188620074401532e-05,
      "loss": 0.5285,
      "step": 7827
    },
    {
      "epoch": 1.6092095796073593,
      "grad_norm": 0.19065243005752563,
      "learning_rate": 4.187623665392377e-05,
      "loss": 0.5374,
      "step": 7828
    },
    {
      "epoch": 1.609415150580738,
      "grad_norm": 0.19287769496440887,
      "learning_rate": 4.186627271772544e-05,
      "loss": 0.5363,
      "step": 7829
    },
    {
      "epoch": 1.6096207215541165,
      "grad_norm": 0.19527852535247803,
      "learning_rate": 4.1856308935911175e-05,
      "loss": 0.562,
      "step": 7830
    },
    {
      "epoch": 1.609826292527495,
      "grad_norm": 0.16113971173763275,
      "learning_rate": 4.184634530897191e-05,
      "loss": 0.5236,
      "step": 7831
    },
    {
      "epoch": 1.6100318635008737,
      "grad_norm": 0.16417936980724335,
      "learning_rate": 4.183638183739846e-05,
      "loss": 0.5406,
      "step": 7832
    },
    {
      "epoch": 1.6102374344742523,
      "grad_norm": 0.19486621022224426,
      "learning_rate": 4.1826418521681696e-05,
      "loss": 0.5471,
      "step": 7833
    },
    {
      "epoch": 1.6104430054476309,
      "grad_norm": 0.19623447954654694,
      "learning_rate": 4.181645536231245e-05,
      "loss": 0.5402,
      "step": 7834
    },
    {
      "epoch": 1.6106485764210094,
      "grad_norm": 0.19563686847686768,
      "learning_rate": 4.180649235978158e-05,
      "loss": 0.559,
      "step": 7835
    },
    {
      "epoch": 1.610854147394388,
      "grad_norm": 0.19006025791168213,
      "learning_rate": 4.17965295145799e-05,
      "loss": 0.5595,
      "step": 7836
    },
    {
      "epoch": 1.6110597183677666,
      "grad_norm": 0.1941699981689453,
      "learning_rate": 4.178656682719822e-05,
      "loss": 0.5391,
      "step": 7837
    },
    {
      "epoch": 1.611265289341145,
      "grad_norm": 0.20085136592388153,
      "learning_rate": 4.177660429812739e-05,
      "loss": 0.5546,
      "step": 7838
    },
    {
      "epoch": 1.6114708603145236,
      "grad_norm": 0.19179563224315643,
      "learning_rate": 4.1766641927858206e-05,
      "loss": 0.5336,
      "step": 7839
    },
    {
      "epoch": 1.6116764312879022,
      "grad_norm": 0.19993935525417328,
      "learning_rate": 4.175667971688145e-05,
      "loss": 0.5577,
      "step": 7840
    },
    {
      "epoch": 1.6118820022612808,
      "grad_norm": 0.19750361144542694,
      "learning_rate": 4.1746717665687934e-05,
      "loss": 0.5378,
      "step": 7841
    },
    {
      "epoch": 1.6120875732346591,
      "grad_norm": 0.1938353031873703,
      "learning_rate": 4.173675577476843e-05,
      "loss": 0.5184,
      "step": 7842
    },
    {
      "epoch": 1.6122931442080377,
      "grad_norm": 0.16070544719696045,
      "learning_rate": 4.172679404461371e-05,
      "loss": 0.521,
      "step": 7843
    },
    {
      "epoch": 1.6124987151814163,
      "grad_norm": 0.17982181906700134,
      "learning_rate": 4.171683247571455e-05,
      "loss": 0.546,
      "step": 7844
    },
    {
      "epoch": 1.6127042861547949,
      "grad_norm": 0.19503210484981537,
      "learning_rate": 4.170687106856171e-05,
      "loss": 0.557,
      "step": 7845
    },
    {
      "epoch": 1.6129098571281735,
      "grad_norm": 0.19316428899765015,
      "learning_rate": 4.1696909823645936e-05,
      "loss": 0.5496,
      "step": 7846
    },
    {
      "epoch": 1.613115428101552,
      "grad_norm": 0.19165056943893433,
      "learning_rate": 4.168694874145799e-05,
      "loss": 0.5376,
      "step": 7847
    },
    {
      "epoch": 1.6133209990749307,
      "grad_norm": 0.1679886281490326,
      "learning_rate": 4.167698782248859e-05,
      "loss": 0.5215,
      "step": 7848
    },
    {
      "epoch": 1.6135265700483092,
      "grad_norm": 0.16243119537830353,
      "learning_rate": 4.166702706722847e-05,
      "loss": 0.5333,
      "step": 7849
    },
    {
      "epoch": 1.6137321410216878,
      "grad_norm": 0.19812798500061035,
      "learning_rate": 4.1657066476168345e-05,
      "loss": 0.542,
      "step": 7850
    },
    {
      "epoch": 1.6139377119950664,
      "grad_norm": 0.20092356204986572,
      "learning_rate": 4.164710604979891e-05,
      "loss": 0.5599,
      "step": 7851
    },
    {
      "epoch": 1.614143282968445,
      "grad_norm": 0.1670868694782257,
      "learning_rate": 4.1637145788610914e-05,
      "loss": 0.5134,
      "step": 7852
    },
    {
      "epoch": 1.6143488539418234,
      "grad_norm": 0.1713995337486267,
      "learning_rate": 4.162718569309502e-05,
      "loss": 0.5345,
      "step": 7853
    },
    {
      "epoch": 1.614554424915202,
      "grad_norm": 0.1977371871471405,
      "learning_rate": 4.161722576374192e-05,
      "loss": 0.5363,
      "step": 7854
    },
    {
      "epoch": 1.6147599958885805,
      "grad_norm": 0.16193437576293945,
      "learning_rate": 4.1607266001042295e-05,
      "loss": 0.4881,
      "step": 7855
    },
    {
      "epoch": 1.6149655668619591,
      "grad_norm": 0.16229775547981262,
      "learning_rate": 4.159730640548683e-05,
      "loss": 0.5395,
      "step": 7856
    },
    {
      "epoch": 1.6151711378353375,
      "grad_norm": 0.1984437257051468,
      "learning_rate": 4.158734697756616e-05,
      "loss": 0.5634,
      "step": 7857
    },
    {
      "epoch": 1.615376708808716,
      "grad_norm": 0.1987016797065735,
      "learning_rate": 4.157738771777094e-05,
      "loss": 0.5676,
      "step": 7858
    },
    {
      "epoch": 1.6155822797820947,
      "grad_norm": 0.15975748002529144,
      "learning_rate": 4.156742862659185e-05,
      "loss": 0.5129,
      "step": 7859
    },
    {
      "epoch": 1.6157878507554733,
      "grad_norm": 0.15861687064170837,
      "learning_rate": 4.155746970451951e-05,
      "loss": 0.5272,
      "step": 7860
    },
    {
      "epoch": 1.6159934217288519,
      "grad_norm": 0.18841111660003662,
      "learning_rate": 4.154751095204455e-05,
      "loss": 0.54,
      "step": 7861
    },
    {
      "epoch": 1.6161989927022304,
      "grad_norm": 0.19690489768981934,
      "learning_rate": 4.153755236965758e-05,
      "loss": 0.5461,
      "step": 7862
    },
    {
      "epoch": 1.616404563675609,
      "grad_norm": 0.1665157824754715,
      "learning_rate": 4.1527593957849224e-05,
      "loss": 0.5081,
      "step": 7863
    },
    {
      "epoch": 1.6166101346489876,
      "grad_norm": 0.15810109674930573,
      "learning_rate": 4.1517635717110087e-05,
      "loss": 0.5394,
      "step": 7864
    },
    {
      "epoch": 1.6168157056223662,
      "grad_norm": 0.1974000781774521,
      "learning_rate": 4.150767764793074e-05,
      "loss": 0.5227,
      "step": 7865
    },
    {
      "epoch": 1.6170212765957448,
      "grad_norm": 0.19814777374267578,
      "learning_rate": 4.149771975080181e-05,
      "loss": 0.5464,
      "step": 7866
    },
    {
      "epoch": 1.6172268475691234,
      "grad_norm": 0.1915402114391327,
      "learning_rate": 4.148776202621386e-05,
      "loss": 0.541,
      "step": 7867
    },
    {
      "epoch": 1.6174324185425017,
      "grad_norm": 0.19537873566150665,
      "learning_rate": 4.147780447465745e-05,
      "loss": 0.5508,
      "step": 7868
    },
    {
      "epoch": 1.6176379895158803,
      "grad_norm": 0.1904834657907486,
      "learning_rate": 4.146784709662316e-05,
      "loss": 0.4971,
      "step": 7869
    },
    {
      "epoch": 1.617843560489259,
      "grad_norm": 0.19342583417892456,
      "learning_rate": 4.1457889892601536e-05,
      "loss": 0.5577,
      "step": 7870
    },
    {
      "epoch": 1.6180491314626375,
      "grad_norm": 0.19713959097862244,
      "learning_rate": 4.14479328630831e-05,
      "loss": 0.5568,
      "step": 7871
    },
    {
      "epoch": 1.6182547024360159,
      "grad_norm": 0.19126051664352417,
      "learning_rate": 4.143797600855843e-05,
      "loss": 0.5551,
      "step": 7872
    },
    {
      "epoch": 1.6184602734093945,
      "grad_norm": 0.16810829937458038,
      "learning_rate": 4.142801932951803e-05,
      "loss": 0.5213,
      "step": 7873
    },
    {
      "epoch": 1.618665844382773,
      "grad_norm": 0.15974818170070648,
      "learning_rate": 4.1418062826452424e-05,
      "loss": 0.5456,
      "step": 7874
    },
    {
      "epoch": 1.6188714153561516,
      "grad_norm": 0.19179581105709076,
      "learning_rate": 4.140810649985212e-05,
      "loss": 0.5298,
      "step": 7875
    },
    {
      "epoch": 1.6190769863295302,
      "grad_norm": 0.19233964383602142,
      "learning_rate": 4.139815035020762e-05,
      "loss": 0.5471,
      "step": 7876
    },
    {
      "epoch": 1.6192825573029088,
      "grad_norm": 0.18875513970851898,
      "learning_rate": 4.1388194378009406e-05,
      "loss": 0.5382,
      "step": 7877
    },
    {
      "epoch": 1.6194881282762874,
      "grad_norm": 0.18729184567928314,
      "learning_rate": 4.1378238583747975e-05,
      "loss": 0.5342,
      "step": 7878
    },
    {
      "epoch": 1.619693699249666,
      "grad_norm": 0.19150425493717194,
      "learning_rate": 4.136828296791382e-05,
      "loss": 0.565,
      "step": 7879
    },
    {
      "epoch": 1.6198992702230446,
      "grad_norm": 0.18844369053840637,
      "learning_rate": 4.1358327530997366e-05,
      "loss": 0.5405,
      "step": 7880
    },
    {
      "epoch": 1.6201048411964232,
      "grad_norm": 0.19033032655715942,
      "learning_rate": 4.1348372273489106e-05,
      "loss": 0.5274,
      "step": 7881
    },
    {
      "epoch": 1.6203104121698018,
      "grad_norm": 0.16202832758426666,
      "learning_rate": 4.133841719587948e-05,
      "loss": 0.4988,
      "step": 7882
    },
    {
      "epoch": 1.6205159831431801,
      "grad_norm": 0.16193822026252747,
      "learning_rate": 4.132846229865892e-05,
      "loss": 0.542,
      "step": 7883
    },
    {
      "epoch": 1.6207215541165587,
      "grad_norm": 0.1977519690990448,
      "learning_rate": 4.131850758231787e-05,
      "loss": 0.5588,
      "step": 7884
    },
    {
      "epoch": 1.6209271250899373,
      "grad_norm": 0.20576632022857666,
      "learning_rate": 4.1308553047346713e-05,
      "loss": 0.5583,
      "step": 7885
    },
    {
      "epoch": 1.621132696063316,
      "grad_norm": 0.1919194608926773,
      "learning_rate": 4.129859869423592e-05,
      "loss": 0.5519,
      "step": 7886
    },
    {
      "epoch": 1.6213382670366943,
      "grad_norm": 0.19272786378860474,
      "learning_rate": 4.128864452347587e-05,
      "loss": 0.5368,
      "step": 7887
    },
    {
      "epoch": 1.6215438380100728,
      "grad_norm": 0.19439461827278137,
      "learning_rate": 4.127869053555696e-05,
      "loss": 0.5374,
      "step": 7888
    },
    {
      "epoch": 1.6217494089834514,
      "grad_norm": 0.19123432040214539,
      "learning_rate": 4.126873673096956e-05,
      "loss": 0.5392,
      "step": 7889
    },
    {
      "epoch": 1.62195497995683,
      "grad_norm": 0.18603573739528656,
      "learning_rate": 4.1258783110204074e-05,
      "loss": 0.5217,
      "step": 7890
    },
    {
      "epoch": 1.6221605509302086,
      "grad_norm": 0.1992233395576477,
      "learning_rate": 4.1248829673750846e-05,
      "loss": 0.5625,
      "step": 7891
    },
    {
      "epoch": 1.6223661219035872,
      "grad_norm": 0.18787723779678345,
      "learning_rate": 4.123887642210024e-05,
      "loss": 0.554,
      "step": 7892
    },
    {
      "epoch": 1.6225716928769658,
      "grad_norm": 0.18760953843593597,
      "learning_rate": 4.122892335574263e-05,
      "loss": 0.5411,
      "step": 7893
    },
    {
      "epoch": 1.6227772638503444,
      "grad_norm": 0.19207806885242462,
      "learning_rate": 4.121897047516834e-05,
      "loss": 0.5274,
      "step": 7894
    },
    {
      "epoch": 1.622982834823723,
      "grad_norm": 0.1640760600566864,
      "learning_rate": 4.12090177808677e-05,
      "loss": 0.5044,
      "step": 7895
    },
    {
      "epoch": 1.6231884057971016,
      "grad_norm": 0.1595536321401596,
      "learning_rate": 4.1199065273331035e-05,
      "loss": 0.5382,
      "step": 7896
    },
    {
      "epoch": 1.6233939767704801,
      "grad_norm": 0.7902474403381348,
      "learning_rate": 4.118911295304866e-05,
      "loss": 0.5773,
      "step": 7897
    },
    {
      "epoch": 1.6235995477438587,
      "grad_norm": 0.15795102715492249,
      "learning_rate": 4.1179160820510866e-05,
      "loss": 0.5388,
      "step": 7898
    },
    {
      "epoch": 1.623805118717237,
      "grad_norm": 0.2210693508386612,
      "learning_rate": 4.116920887620797e-05,
      "loss": 0.5724,
      "step": 7899
    },
    {
      "epoch": 1.6240106896906157,
      "grad_norm": 0.16837280988693237,
      "learning_rate": 4.1159257120630244e-05,
      "loss": 0.5361,
      "step": 7900
    },
    {
      "epoch": 1.6242162606639943,
      "grad_norm": 0.16610947251319885,
      "learning_rate": 4.1149305554267965e-05,
      "loss": 0.5441,
      "step": 7901
    },
    {
      "epoch": 1.6244218316373726,
      "grad_norm": 0.19282789528369904,
      "learning_rate": 4.1139354177611413e-05,
      "loss": 0.5416,
      "step": 7902
    },
    {
      "epoch": 1.6246274026107512,
      "grad_norm": 0.19123776257038116,
      "learning_rate": 4.112940299115083e-05,
      "loss": 0.5602,
      "step": 7903
    },
    {
      "epoch": 1.6248329735841298,
      "grad_norm": 0.19537465274333954,
      "learning_rate": 4.111945199537648e-05,
      "loss": 0.5568,
      "step": 7904
    },
    {
      "epoch": 1.6250385445575084,
      "grad_norm": 0.1960020512342453,
      "learning_rate": 4.1109501190778585e-05,
      "loss": 0.5366,
      "step": 7905
    },
    {
      "epoch": 1.625244115530887,
      "grad_norm": 0.16584603488445282,
      "learning_rate": 4.109955057784737e-05,
      "loss": 0.5022,
      "step": 7906
    },
    {
      "epoch": 1.6254496865042656,
      "grad_norm": 0.14005246758460999,
      "learning_rate": 4.108960015707308e-05,
      "loss": 0.5147,
      "step": 7907
    },
    {
      "epoch": 1.6256552574776442,
      "grad_norm": 0.16588489711284637,
      "learning_rate": 4.107964992894592e-05,
      "loss": 0.5522,
      "step": 7908
    },
    {
      "epoch": 1.6258608284510228,
      "grad_norm": 0.19331607222557068,
      "learning_rate": 4.1069699893956074e-05,
      "loss": 0.5099,
      "step": 7909
    },
    {
      "epoch": 1.6260663994244013,
      "grad_norm": 0.1920442134141922,
      "learning_rate": 4.105975005259374e-05,
      "loss": 0.543,
      "step": 7910
    },
    {
      "epoch": 1.62627197039778,
      "grad_norm": 0.19395653903484344,
      "learning_rate": 4.1049800405349116e-05,
      "loss": 0.5589,
      "step": 7911
    },
    {
      "epoch": 1.6264775413711585,
      "grad_norm": 0.19541949033737183,
      "learning_rate": 4.103985095271236e-05,
      "loss": 0.5376,
      "step": 7912
    },
    {
      "epoch": 1.626683112344537,
      "grad_norm": 0.16967599093914032,
      "learning_rate": 4.102990169517362e-05,
      "loss": 0.5135,
      "step": 7913
    },
    {
      "epoch": 1.6268886833179155,
      "grad_norm": 0.16106168925762177,
      "learning_rate": 4.101995263322308e-05,
      "loss": 0.5548,
      "step": 7914
    },
    {
      "epoch": 1.627094254291294,
      "grad_norm": 0.20895619690418243,
      "learning_rate": 4.101000376735088e-05,
      "loss": 0.536,
      "step": 7915
    },
    {
      "epoch": 1.6272998252646727,
      "grad_norm": 0.1945531964302063,
      "learning_rate": 4.1000055098047144e-05,
      "loss": 0.5196,
      "step": 7916
    },
    {
      "epoch": 1.6275053962380512,
      "grad_norm": 0.19166290760040283,
      "learning_rate": 4.099010662580199e-05,
      "loss": 0.5329,
      "step": 7917
    },
    {
      "epoch": 1.6277109672114296,
      "grad_norm": 0.1970268040895462,
      "learning_rate": 4.0980158351105554e-05,
      "loss": 0.5334,
      "step": 7918
    },
    {
      "epoch": 1.6279165381848082,
      "grad_norm": 0.19781675934791565,
      "learning_rate": 4.097021027444791e-05,
      "loss": 0.557,
      "step": 7919
    },
    {
      "epoch": 1.6281221091581868,
      "grad_norm": 0.2081199437379837,
      "learning_rate": 4.0960262396319165e-05,
      "loss": 0.5316,
      "step": 7920
    },
    {
      "epoch": 1.6283276801315654,
      "grad_norm": 0.19772003591060638,
      "learning_rate": 4.0950314717209425e-05,
      "loss": 0.5286,
      "step": 7921
    },
    {
      "epoch": 1.628533251104944,
      "grad_norm": 0.1967727690935135,
      "learning_rate": 4.094036723760875e-05,
      "loss": 0.55,
      "step": 7922
    },
    {
      "epoch": 1.6287388220783225,
      "grad_norm": 0.1625976264476776,
      "learning_rate": 4.09304199580072e-05,
      "loss": 0.5024,
      "step": 7923
    },
    {
      "epoch": 1.6289443930517011,
      "grad_norm": 0.16001035273075104,
      "learning_rate": 4.092047287889484e-05,
      "loss": 0.5347,
      "step": 7924
    },
    {
      "epoch": 1.6291499640250797,
      "grad_norm": 0.20354917645454407,
      "learning_rate": 4.0910526000761725e-05,
      "loss": 0.5271,
      "step": 7925
    },
    {
      "epoch": 1.6293555349984583,
      "grad_norm": 0.20167338848114014,
      "learning_rate": 4.0900579324097874e-05,
      "loss": 0.547,
      "step": 7926
    },
    {
      "epoch": 1.629561105971837,
      "grad_norm": 0.1940862089395523,
      "learning_rate": 4.08906328493933e-05,
      "loss": 0.5388,
      "step": 7927
    },
    {
      "epoch": 1.6297666769452155,
      "grad_norm": 0.20124763250350952,
      "learning_rate": 4.088068657713805e-05,
      "loss": 0.5325,
      "step": 7928
    },
    {
      "epoch": 1.6299722479185939,
      "grad_norm": 0.1647825688123703,
      "learning_rate": 4.087074050782213e-05,
      "loss": 0.5181,
      "step": 7929
    },
    {
      "epoch": 1.6301778188919724,
      "grad_norm": 0.13776123523712158,
      "learning_rate": 4.0860794641935524e-05,
      "loss": 0.5102,
      "step": 7930
    },
    {
      "epoch": 1.630383389865351,
      "grad_norm": 0.1585695892572403,
      "learning_rate": 4.0850848979968205e-05,
      "loss": 0.5194,
      "step": 7931
    },
    {
      "epoch": 1.6305889608387296,
      "grad_norm": 0.19522860646247864,
      "learning_rate": 4.084090352241017e-05,
      "loss": 0.5335,
      "step": 7932
    },
    {
      "epoch": 1.630794531812108,
      "grad_norm": 0.200296089053154,
      "learning_rate": 4.0830958269751385e-05,
      "loss": 0.5442,
      "step": 7933
    },
    {
      "epoch": 1.6310001027854866,
      "grad_norm": 0.19578911364078522,
      "learning_rate": 4.0821013222481786e-05,
      "loss": 0.5368,
      "step": 7934
    },
    {
      "epoch": 1.6312056737588652,
      "grad_norm": 0.19223348796367645,
      "learning_rate": 4.0811068381091336e-05,
      "loss": 0.5389,
      "step": 7935
    },
    {
      "epoch": 1.6314112447322437,
      "grad_norm": 0.19662773609161377,
      "learning_rate": 4.080112374606998e-05,
      "loss": 0.5493,
      "step": 7936
    },
    {
      "epoch": 1.6316168157056223,
      "grad_norm": 0.1632963865995407,
      "learning_rate": 4.0791179317907626e-05,
      "loss": 0.4949,
      "step": 7937
    },
    {
      "epoch": 1.631822386679001,
      "grad_norm": 0.16675293445587158,
      "learning_rate": 4.0781235097094205e-05,
      "loss": 0.5681,
      "step": 7938
    },
    {
      "epoch": 1.6320279576523795,
      "grad_norm": 0.2007942795753479,
      "learning_rate": 4.0771291084119603e-05,
      "loss": 0.551,
      "step": 7939
    },
    {
      "epoch": 1.632233528625758,
      "grad_norm": 0.1977294385433197,
      "learning_rate": 4.076134727947373e-05,
      "loss": 0.5417,
      "step": 7940
    },
    {
      "epoch": 1.6324390995991367,
      "grad_norm": 0.20973463356494904,
      "learning_rate": 4.075140368364644e-05,
      "loss": 0.5623,
      "step": 7941
    },
    {
      "epoch": 1.6326446705725153,
      "grad_norm": 0.2024088203907013,
      "learning_rate": 4.074146029712765e-05,
      "loss": 0.5461,
      "step": 7942
    },
    {
      "epoch": 1.6328502415458939,
      "grad_norm": 0.18343862891197205,
      "learning_rate": 4.0731517120407205e-05,
      "loss": 0.5329,
      "step": 7943
    },
    {
      "epoch": 1.6330558125192722,
      "grad_norm": 0.19498711824417114,
      "learning_rate": 4.0721574153974966e-05,
      "loss": 0.5226,
      "step": 7944
    },
    {
      "epoch": 1.6332613834926508,
      "grad_norm": 0.1982509046792984,
      "learning_rate": 4.071163139832077e-05,
      "loss": 0.5745,
      "step": 7945
    },
    {
      "epoch": 1.6334669544660294,
      "grad_norm": 0.19435621798038483,
      "learning_rate": 4.0701688853934454e-05,
      "loss": 0.5448,
      "step": 7946
    },
    {
      "epoch": 1.633672525439408,
      "grad_norm": 0.19986435770988464,
      "learning_rate": 4.069174652130582e-05,
      "loss": 0.535,
      "step": 7947
    },
    {
      "epoch": 1.6338780964127864,
      "grad_norm": 0.2016473263502121,
      "learning_rate": 4.068180440092471e-05,
      "loss": 0.5354,
      "step": 7948
    },
    {
      "epoch": 1.634083667386165,
      "grad_norm": 0.1967112123966217,
      "learning_rate": 4.067186249328092e-05,
      "loss": 0.5405,
      "step": 7949
    },
    {
      "epoch": 1.6342892383595435,
      "grad_norm": 0.1958150565624237,
      "learning_rate": 4.0661920798864236e-05,
      "loss": 0.5235,
      "step": 7950
    },
    {
      "epoch": 1.6344948093329221,
      "grad_norm": 0.19553299248218536,
      "learning_rate": 4.065197931816444e-05,
      "loss": 0.5356,
      "step": 7951
    },
    {
      "epoch": 1.6347003803063007,
      "grad_norm": 0.19405850768089294,
      "learning_rate": 4.064203805167129e-05,
      "loss": 0.536,
      "step": 7952
    },
    {
      "epoch": 1.6349059512796793,
      "grad_norm": 0.20262351632118225,
      "learning_rate": 4.0632096999874556e-05,
      "loss": 0.546,
      "step": 7953
    },
    {
      "epoch": 1.635111522253058,
      "grad_norm": 0.1994638741016388,
      "learning_rate": 4.0622156163263986e-05,
      "loss": 0.5446,
      "step": 7954
    },
    {
      "epoch": 1.6353170932264365,
      "grad_norm": 0.19563588500022888,
      "learning_rate": 4.0612215542329316e-05,
      "loss": 0.5533,
      "step": 7955
    },
    {
      "epoch": 1.635522664199815,
      "grad_norm": 0.19695055484771729,
      "learning_rate": 4.060227513756029e-05,
      "loss": 0.522,
      "step": 7956
    },
    {
      "epoch": 1.6357282351731937,
      "grad_norm": 0.1933106780052185,
      "learning_rate": 4.059233494944662e-05,
      "loss": 0.5295,
      "step": 7957
    },
    {
      "epoch": 1.6359338061465722,
      "grad_norm": 0.1970299780368805,
      "learning_rate": 4.0582394978477997e-05,
      "loss": 0.5533,
      "step": 7958
    },
    {
      "epoch": 1.6361393771199506,
      "grad_norm": 0.19385181367397308,
      "learning_rate": 4.0572455225144124e-05,
      "loss": 0.5575,
      "step": 7959
    },
    {
      "epoch": 1.6363449480933292,
      "grad_norm": 0.19552960991859436,
      "learning_rate": 4.056251568993469e-05,
      "loss": 0.5427,
      "step": 7960
    },
    {
      "epoch": 1.6365505190667078,
      "grad_norm": 0.20119963586330414,
      "learning_rate": 4.055257637333935e-05,
      "loss": 0.5481,
      "step": 7961
    },
    {
      "epoch": 1.6367560900400864,
      "grad_norm": 0.19214770197868347,
      "learning_rate": 4.05426372758478e-05,
      "loss": 0.5258,
      "step": 7962
    },
    {
      "epoch": 1.6369616610134647,
      "grad_norm": 0.19121824204921722,
      "learning_rate": 4.0532698397949686e-05,
      "loss": 0.5093,
      "step": 7963
    },
    {
      "epoch": 1.6371672319868433,
      "grad_norm": 0.18680913746356964,
      "learning_rate": 4.052275974013464e-05,
      "loss": 0.54,
      "step": 7964
    },
    {
      "epoch": 1.637372802960222,
      "grad_norm": 0.1893320232629776,
      "learning_rate": 4.051282130289228e-05,
      "loss": 0.5448,
      "step": 7965
    },
    {
      "epoch": 1.6375783739336005,
      "grad_norm": 0.1885337233543396,
      "learning_rate": 4.050288308671225e-05,
      "loss": 0.5424,
      "step": 7966
    },
    {
      "epoch": 1.637783944906979,
      "grad_norm": 0.1980556845664978,
      "learning_rate": 4.049294509208415e-05,
      "loss": 0.5693,
      "step": 7967
    },
    {
      "epoch": 1.6379895158803577,
      "grad_norm": 0.194559246301651,
      "learning_rate": 4.0483007319497566e-05,
      "loss": 0.5378,
      "step": 7968
    },
    {
      "epoch": 1.6381950868537363,
      "grad_norm": 0.1900004744529724,
      "learning_rate": 4.047306976944211e-05,
      "loss": 0.5471,
      "step": 7969
    },
    {
      "epoch": 1.6384006578271149,
      "grad_norm": 0.19014038145542145,
      "learning_rate": 4.0463132442407365e-05,
      "loss": 0.5482,
      "step": 7970
    },
    {
      "epoch": 1.6386062288004934,
      "grad_norm": 0.17057844996452332,
      "learning_rate": 4.0453195338882867e-05,
      "loss": 0.5261,
      "step": 7971
    },
    {
      "epoch": 1.638811799773872,
      "grad_norm": 0.1279505342245102,
      "learning_rate": 4.044325845935818e-05,
      "loss": 0.5028,
      "step": 7972
    },
    {
      "epoch": 1.6390173707472506,
      "grad_norm": 0.16361773014068604,
      "learning_rate": 4.043332180432286e-05,
      "loss": 0.5524,
      "step": 7973
    },
    {
      "epoch": 1.6392229417206292,
      "grad_norm": 0.20223143696784973,
      "learning_rate": 4.042338537426641e-05,
      "loss": 0.5304,
      "step": 7974
    },
    {
      "epoch": 1.6394285126940076,
      "grad_norm": 0.20231173932552338,
      "learning_rate": 4.041344916967838e-05,
      "loss": 0.5483,
      "step": 7975
    },
    {
      "epoch": 1.6396340836673862,
      "grad_norm": 0.20245333015918732,
      "learning_rate": 4.040351319104828e-05,
      "loss": 0.5334,
      "step": 7976
    },
    {
      "epoch": 1.6398396546407648,
      "grad_norm": 0.15880252420902252,
      "learning_rate": 4.039357743886559e-05,
      "loss": 0.5144,
      "step": 7977
    },
    {
      "epoch": 1.6400452256141431,
      "grad_norm": 0.1283801794052124,
      "learning_rate": 4.0383641913619816e-05,
      "loss": 0.5137,
      "step": 7978
    },
    {
      "epoch": 1.6402507965875217,
      "grad_norm": 0.1789664328098297,
      "learning_rate": 4.0373706615800426e-05,
      "loss": 0.5355,
      "step": 7979
    },
    {
      "epoch": 1.6404563675609003,
      "grad_norm": 0.196334108710289,
      "learning_rate": 4.0363771545896894e-05,
      "loss": 0.5392,
      "step": 7980
    },
    {
      "epoch": 1.6406619385342789,
      "grad_norm": 0.19602881371974945,
      "learning_rate": 4.035383670439867e-05,
      "loss": 0.5369,
      "step": 7981
    },
    {
      "epoch": 1.6408675095076575,
      "grad_norm": 0.19509628415107727,
      "learning_rate": 4.0343902091795174e-05,
      "loss": 0.5494,
      "step": 7982
    },
    {
      "epoch": 1.641073080481036,
      "grad_norm": 0.19635361433029175,
      "learning_rate": 4.033396770857588e-05,
      "loss": 0.5527,
      "step": 7983
    },
    {
      "epoch": 1.6412786514544146,
      "grad_norm": 0.19803519546985626,
      "learning_rate": 4.0324033555230184e-05,
      "loss": 0.537,
      "step": 7984
    },
    {
      "epoch": 1.6414842224277932,
      "grad_norm": 0.20085453987121582,
      "learning_rate": 4.03140996322475e-05,
      "loss": 0.5394,
      "step": 7985
    },
    {
      "epoch": 1.6416897934011718,
      "grad_norm": 0.18997138738632202,
      "learning_rate": 4.030416594011722e-05,
      "loss": 0.5145,
      "step": 7986
    },
    {
      "epoch": 1.6418953643745504,
      "grad_norm": 0.16585613787174225,
      "learning_rate": 4.029423247932874e-05,
      "loss": 0.511,
      "step": 7987
    },
    {
      "epoch": 1.642100935347929,
      "grad_norm": 0.1544012725353241,
      "learning_rate": 4.028429925037143e-05,
      "loss": 0.5345,
      "step": 7988
    },
    {
      "epoch": 1.6423065063213076,
      "grad_norm": 0.2430618703365326,
      "learning_rate": 4.0274366253734644e-05,
      "loss": 0.5486,
      "step": 7989
    },
    {
      "epoch": 1.642512077294686,
      "grad_norm": 0.19470450282096863,
      "learning_rate": 4.0264433489907753e-05,
      "loss": 0.5318,
      "step": 7990
    },
    {
      "epoch": 1.6427176482680645,
      "grad_norm": 0.196413055062294,
      "learning_rate": 4.0254500959380096e-05,
      "loss": 0.5383,
      "step": 7991
    },
    {
      "epoch": 1.6429232192414431,
      "grad_norm": 0.19302628934383392,
      "learning_rate": 4.0244568662641e-05,
      "loss": 0.5368,
      "step": 7992
    },
    {
      "epoch": 1.6431287902148217,
      "grad_norm": 0.19250887632369995,
      "learning_rate": 4.023463660017978e-05,
      "loss": 0.5225,
      "step": 7993
    },
    {
      "epoch": 1.6433343611882,
      "grad_norm": 0.18870443105697632,
      "learning_rate": 4.022470477248573e-05,
      "loss": 0.5322,
      "step": 7994
    },
    {
      "epoch": 1.6435399321615787,
      "grad_norm": 0.19748498499393463,
      "learning_rate": 4.0214773180048155e-05,
      "loss": 0.5266,
      "step": 7995
    },
    {
      "epoch": 1.6437455031349573,
      "grad_norm": 0.19181567430496216,
      "learning_rate": 4.020484182335634e-05,
      "loss": 0.5553,
      "step": 7996
    },
    {
      "epoch": 1.6439510741083359,
      "grad_norm": 0.18883375823497772,
      "learning_rate": 4.019491070289956e-05,
      "loss": 0.536,
      "step": 7997
    },
    {
      "epoch": 1.6441566450817144,
      "grad_norm": 0.19764509797096252,
      "learning_rate": 4.0184979819167066e-05,
      "loss": 0.5231,
      "step": 7998
    },
    {
      "epoch": 1.644362216055093,
      "grad_norm": 0.1661233752965927,
      "learning_rate": 4.017504917264812e-05,
      "loss": 0.5223,
      "step": 7999
    },
    {
      "epoch": 1.6445677870284716,
      "grad_norm": 0.1361915022134781,
      "learning_rate": 4.016511876383195e-05,
      "loss": 0.5278,
      "step": 8000
    },
    {
      "epoch": 1.6447733580018502,
      "grad_norm": 0.16932383179664612,
      "learning_rate": 4.015518859320778e-05,
      "loss": 0.5341,
      "step": 8001
    },
    {
      "epoch": 1.6449789289752288,
      "grad_norm": 0.19935861229896545,
      "learning_rate": 4.014525866126482e-05,
      "loss": 0.5358,
      "step": 8002
    },
    {
      "epoch": 1.6451844999486074,
      "grad_norm": 0.20055261254310608,
      "learning_rate": 4.013532896849226e-05,
      "loss": 0.5611,
      "step": 8003
    },
    {
      "epoch": 1.645390070921986,
      "grad_norm": 0.1982363760471344,
      "learning_rate": 4.012539951537932e-05,
      "loss": 0.5271,
      "step": 8004
    },
    {
      "epoch": 1.6455956418953643,
      "grad_norm": 0.16576005518436432,
      "learning_rate": 4.011547030241516e-05,
      "loss": 0.5156,
      "step": 8005
    },
    {
      "epoch": 1.645801212868743,
      "grad_norm": 0.13087031245231628,
      "learning_rate": 4.010554133008895e-05,
      "loss": 0.5298,
      "step": 8006
    },
    {
      "epoch": 1.6460067838421215,
      "grad_norm": 0.16294503211975098,
      "learning_rate": 4.0095612598889837e-05,
      "loss": 0.5526,
      "step": 8007
    },
    {
      "epoch": 1.6462123548155,
      "grad_norm": 0.20266200602054596,
      "learning_rate": 4.008568410930698e-05,
      "loss": 0.5262,
      "step": 8008
    },
    {
      "epoch": 1.6464179257888785,
      "grad_norm": 0.16137059032917023,
      "learning_rate": 4.007575586182949e-05,
      "loss": 0.5215,
      "step": 8009
    },
    {
      "epoch": 1.646623496762257,
      "grad_norm": 0.16377897560596466,
      "learning_rate": 4.006582785694648e-05,
      "loss": 0.5292,
      "step": 8010
    },
    {
      "epoch": 1.6468290677356356,
      "grad_norm": 0.19530196487903595,
      "learning_rate": 4.005590009514708e-05,
      "loss": 0.5454,
      "step": 8011
    },
    {
      "epoch": 1.6470346387090142,
      "grad_norm": 0.19677075743675232,
      "learning_rate": 4.0045972576920374e-05,
      "loss": 0.5499,
      "step": 8012
    },
    {
      "epoch": 1.6472402096823928,
      "grad_norm": 0.19411884248256683,
      "learning_rate": 4.003604530275545e-05,
      "loss": 0.5623,
      "step": 8013
    },
    {
      "epoch": 1.6474457806557714,
      "grad_norm": 0.15870682895183563,
      "learning_rate": 4.002611827314137e-05,
      "loss": 0.5135,
      "step": 8014
    },
    {
      "epoch": 1.64765135162915,
      "grad_norm": 0.1609289050102234,
      "learning_rate": 4.0016191488567195e-05,
      "loss": 0.5723,
      "step": 8015
    },
    {
      "epoch": 1.6478569226025286,
      "grad_norm": 0.19486412405967712,
      "learning_rate": 4.000626494952196e-05,
      "loss": 0.5615,
      "step": 8016
    },
    {
      "epoch": 1.6480624935759072,
      "grad_norm": 0.20491555333137512,
      "learning_rate": 3.9996338656494715e-05,
      "loss": 0.5451,
      "step": 8017
    },
    {
      "epoch": 1.6482680645492858,
      "grad_norm": 0.19133470952510834,
      "learning_rate": 3.998641260997449e-05,
      "loss": 0.5251,
      "step": 8018
    },
    {
      "epoch": 1.6484736355226643,
      "grad_norm": 0.1599549949169159,
      "learning_rate": 3.997648681045026e-05,
      "loss": 0.5172,
      "step": 8019
    },
    {
      "epoch": 1.6486792064960427,
      "grad_norm": 0.1676701456308365,
      "learning_rate": 3.996656125841106e-05,
      "loss": 0.5404,
      "step": 8020
    },
    {
      "epoch": 1.6488847774694213,
      "grad_norm": 0.1984013020992279,
      "learning_rate": 3.995663595434587e-05,
      "loss": 0.5757,
      "step": 8021
    },
    {
      "epoch": 1.6490903484428,
      "grad_norm": 0.1664489060640335,
      "learning_rate": 3.994671089874364e-05,
      "loss": 0.5177,
      "step": 8022
    },
    {
      "epoch": 1.6492959194161785,
      "grad_norm": 0.15646716952323914,
      "learning_rate": 3.993678609209333e-05,
      "loss": 0.548,
      "step": 8023
    },
    {
      "epoch": 1.6495014903895568,
      "grad_norm": 0.1926644891500473,
      "learning_rate": 3.9926861534883924e-05,
      "loss": 0.5528,
      "step": 8024
    },
    {
      "epoch": 1.6497070613629354,
      "grad_norm": 0.20535780489444733,
      "learning_rate": 3.991693722760434e-05,
      "loss": 0.5611,
      "step": 8025
    },
    {
      "epoch": 1.649912632336314,
      "grad_norm": 0.19756321609020233,
      "learning_rate": 3.9907013170743504e-05,
      "loss": 0.5244,
      "step": 8026
    },
    {
      "epoch": 1.6501182033096926,
      "grad_norm": 0.16199225187301636,
      "learning_rate": 3.9897089364790315e-05,
      "loss": 0.5097,
      "step": 8027
    },
    {
      "epoch": 1.6503237742830712,
      "grad_norm": 0.1658937931060791,
      "learning_rate": 3.988716581023368e-05,
      "loss": 0.556,
      "step": 8028
    },
    {
      "epoch": 1.6505293452564498,
      "grad_norm": 0.1961878091096878,
      "learning_rate": 3.98772425075625e-05,
      "loss": 0.5337,
      "step": 8029
    },
    {
      "epoch": 1.6507349162298284,
      "grad_norm": 0.1957957148551941,
      "learning_rate": 3.9867319457265616e-05,
      "loss": 0.5322,
      "step": 8030
    },
    {
      "epoch": 1.650940487203207,
      "grad_norm": 0.19228583574295044,
      "learning_rate": 3.985739665983192e-05,
      "loss": 0.5356,
      "step": 8031
    },
    {
      "epoch": 1.6511460581765856,
      "grad_norm": 0.20034292340278625,
      "learning_rate": 3.984747411575027e-05,
      "loss": 0.557,
      "step": 8032
    },
    {
      "epoch": 1.6513516291499641,
      "grad_norm": 0.1647980958223343,
      "learning_rate": 3.983755182550948e-05,
      "loss": 0.5102,
      "step": 8033
    },
    {
      "epoch": 1.6515572001233427,
      "grad_norm": 0.16599765419960022,
      "learning_rate": 3.982762978959838e-05,
      "loss": 0.5501,
      "step": 8034
    },
    {
      "epoch": 1.651762771096721,
      "grad_norm": 0.193580761551857,
      "learning_rate": 3.981770800850579e-05,
      "loss": 0.5507,
      "step": 8035
    },
    {
      "epoch": 1.6519683420700997,
      "grad_norm": 0.1937427669763565,
      "learning_rate": 3.98077864827205e-05,
      "loss": 0.5389,
      "step": 8036
    },
    {
      "epoch": 1.6521739130434783,
      "grad_norm": 0.1987418383359909,
      "learning_rate": 3.9797865212731286e-05,
      "loss": 0.5353,
      "step": 8037
    },
    {
      "epoch": 1.6523794840168569,
      "grad_norm": 0.1963115632534027,
      "learning_rate": 3.978794419902696e-05,
      "loss": 0.5466,
      "step": 8038
    },
    {
      "epoch": 1.6525850549902352,
      "grad_norm": 0.16360154747962952,
      "learning_rate": 3.977802344209626e-05,
      "loss": 0.4988,
      "step": 8039
    },
    {
      "epoch": 1.6527906259636138,
      "grad_norm": 0.18510453402996063,
      "learning_rate": 3.976810294242792e-05,
      "loss": 0.5791,
      "step": 8040
    },
    {
      "epoch": 1.6529961969369924,
      "grad_norm": 0.2012956142425537,
      "learning_rate": 3.9758182700510714e-05,
      "loss": 0.5532,
      "step": 8041
    },
    {
      "epoch": 1.653201767910371,
      "grad_norm": 0.19226433336734772,
      "learning_rate": 3.974826271683334e-05,
      "loss": 0.5284,
      "step": 8042
    },
    {
      "epoch": 1.6534073388837496,
      "grad_norm": 0.19937126338481903,
      "learning_rate": 3.973834299188452e-05,
      "loss": 0.5377,
      "step": 8043
    },
    {
      "epoch": 1.6536129098571282,
      "grad_norm": 0.19442661106586456,
      "learning_rate": 3.9728423526152927e-05,
      "loss": 0.5465,
      "step": 8044
    },
    {
      "epoch": 1.6538184808305068,
      "grad_norm": 0.19324155151844025,
      "learning_rate": 3.971850432012729e-05,
      "loss": 0.5376,
      "step": 8045
    },
    {
      "epoch": 1.6540240518038853,
      "grad_norm": 0.16883355379104614,
      "learning_rate": 3.970858537429625e-05,
      "loss": 0.5119,
      "step": 8046
    },
    {
      "epoch": 1.654229622777264,
      "grad_norm": 0.16271071135997772,
      "learning_rate": 3.969866668914848e-05,
      "loss": 0.5545,
      "step": 8047
    },
    {
      "epoch": 1.6544351937506425,
      "grad_norm": 0.18823817372322083,
      "learning_rate": 3.9688748265172625e-05,
      "loss": 0.5345,
      "step": 8048
    },
    {
      "epoch": 1.654640764724021,
      "grad_norm": 0.19708384573459625,
      "learning_rate": 3.9678830102857324e-05,
      "loss": 0.5677,
      "step": 8049
    },
    {
      "epoch": 1.6548463356973995,
      "grad_norm": 0.2034367471933365,
      "learning_rate": 3.966891220269118e-05,
      "loss": 0.5178,
      "step": 8050
    },
    {
      "epoch": 1.655051906670778,
      "grad_norm": 0.1986580491065979,
      "learning_rate": 3.9658994565162816e-05,
      "loss": 0.5545,
      "step": 8051
    },
    {
      "epoch": 1.6552574776441566,
      "grad_norm": 0.20059730112552643,
      "learning_rate": 3.964907719076083e-05,
      "loss": 0.5635,
      "step": 8052
    },
    {
      "epoch": 1.6554630486175352,
      "grad_norm": 0.19054940342903137,
      "learning_rate": 3.963916007997379e-05,
      "loss": 0.5542,
      "step": 8053
    },
    {
      "epoch": 1.6556686195909136,
      "grad_norm": 0.19373731315135956,
      "learning_rate": 3.962924323329029e-05,
      "loss": 0.5499,
      "step": 8054
    },
    {
      "epoch": 1.6558741905642922,
      "grad_norm": 0.19648055732250214,
      "learning_rate": 3.9619326651198875e-05,
      "loss": 0.5399,
      "step": 8055
    },
    {
      "epoch": 1.6560797615376708,
      "grad_norm": 0.21236325800418854,
      "learning_rate": 3.960941033418808e-05,
      "loss": 0.5433,
      "step": 8056
    },
    {
      "epoch": 1.6562853325110494,
      "grad_norm": 0.18751861155033112,
      "learning_rate": 3.959949428274645e-05,
      "loss": 0.521,
      "step": 8057
    },
    {
      "epoch": 1.656490903484428,
      "grad_norm": 0.1983969807624817,
      "learning_rate": 3.958957849736247e-05,
      "loss": 0.5316,
      "step": 8058
    },
    {
      "epoch": 1.6566964744578065,
      "grad_norm": 0.6342448592185974,
      "learning_rate": 3.9579662978524695e-05,
      "loss": 0.5639,
      "step": 8059
    },
    {
      "epoch": 1.6569020454311851,
      "grad_norm": 0.19162461161613464,
      "learning_rate": 3.9569747726721584e-05,
      "loss": 0.5219,
      "step": 8060
    },
    {
      "epoch": 1.6571076164045637,
      "grad_norm": 0.152262344956398,
      "learning_rate": 3.9559832742441625e-05,
      "loss": 0.4952,
      "step": 8061
    },
    {
      "epoch": 1.6573131873779423,
      "grad_norm": 0.16122353076934814,
      "learning_rate": 3.9549918026173265e-05,
      "loss": 0.548,
      "step": 8062
    },
    {
      "epoch": 1.657518758351321,
      "grad_norm": 0.19267982244491577,
      "learning_rate": 3.9540003578404985e-05,
      "loss": 0.5452,
      "step": 8063
    },
    {
      "epoch": 1.6577243293246995,
      "grad_norm": 0.17134782671928406,
      "learning_rate": 3.953008939962521e-05,
      "loss": 0.5009,
      "step": 8064
    },
    {
      "epoch": 1.657929900298078,
      "grad_norm": 0.16193920373916626,
      "learning_rate": 3.952017549032234e-05,
      "loss": 0.5392,
      "step": 8065
    },
    {
      "epoch": 1.6581354712714564,
      "grad_norm": 0.1981363743543625,
      "learning_rate": 3.951026185098483e-05,
      "loss": 0.5403,
      "step": 8066
    },
    {
      "epoch": 1.658341042244835,
      "grad_norm": 0.19924452900886536,
      "learning_rate": 3.950034848210107e-05,
      "loss": 0.5773,
      "step": 8067
    },
    {
      "epoch": 1.6585466132182136,
      "grad_norm": 0.16161105036735535,
      "learning_rate": 3.949043538415942e-05,
      "loss": 0.5061,
      "step": 8068
    },
    {
      "epoch": 1.658752184191592,
      "grad_norm": 0.16612055897712708,
      "learning_rate": 3.948052255764828e-05,
      "loss": 0.5527,
      "step": 8069
    },
    {
      "epoch": 1.6589577551649706,
      "grad_norm": 0.20378176867961884,
      "learning_rate": 3.947061000305599e-05,
      "loss": 0.5445,
      "step": 8070
    },
    {
      "epoch": 1.6591633261383492,
      "grad_norm": 0.21650046110153198,
      "learning_rate": 3.946069772087089e-05,
      "loss": 0.5334,
      "step": 8071
    },
    {
      "epoch": 1.6593688971117277,
      "grad_norm": 0.1963663250207901,
      "learning_rate": 3.9450785711581324e-05,
      "loss": 0.5466,
      "step": 8072
    },
    {
      "epoch": 1.6595744680851063,
      "grad_norm": 0.19677862524986267,
      "learning_rate": 3.944087397567561e-05,
      "loss": 0.5542,
      "step": 8073
    },
    {
      "epoch": 1.659780039058485,
      "grad_norm": 0.19894835352897644,
      "learning_rate": 3.943096251364205e-05,
      "loss": 0.5259,
      "step": 8074
    },
    {
      "epoch": 1.6599856100318635,
      "grad_norm": 0.20943677425384521,
      "learning_rate": 3.942105132596895e-05,
      "loss": 0.5323,
      "step": 8075
    },
    {
      "epoch": 1.660191181005242,
      "grad_norm": 0.20376256108283997,
      "learning_rate": 3.941114041314458e-05,
      "loss": 0.5369,
      "step": 8076
    },
    {
      "epoch": 1.6603967519786207,
      "grad_norm": 0.1930057853460312,
      "learning_rate": 3.9401229775657185e-05,
      "loss": 0.5351,
      "step": 8077
    },
    {
      "epoch": 1.6606023229519993,
      "grad_norm": 0.19255690276622772,
      "learning_rate": 3.939131941399504e-05,
      "loss": 0.534,
      "step": 8078
    },
    {
      "epoch": 1.6608078939253779,
      "grad_norm": 0.18883344531059265,
      "learning_rate": 3.938140932864635e-05,
      "loss": 0.5353,
      "step": 8079
    },
    {
      "epoch": 1.6610134648987565,
      "grad_norm": 0.20242716372013092,
      "learning_rate": 3.937149952009938e-05,
      "loss": 0.5459,
      "step": 8080
    },
    {
      "epoch": 1.6612190358721348,
      "grad_norm": 0.19481943547725677,
      "learning_rate": 3.9361589988842325e-05,
      "loss": 0.5526,
      "step": 8081
    },
    {
      "epoch": 1.6614246068455134,
      "grad_norm": 0.19463589787483215,
      "learning_rate": 3.935168073536337e-05,
      "loss": 0.5376,
      "step": 8082
    },
    {
      "epoch": 1.661630177818892,
      "grad_norm": 0.1837586909532547,
      "learning_rate": 3.93417717601507e-05,
      "loss": 0.5,
      "step": 8083
    },
    {
      "epoch": 1.6618357487922706,
      "grad_norm": 0.19010527431964874,
      "learning_rate": 3.9331863063692494e-05,
      "loss": 0.5356,
      "step": 8084
    },
    {
      "epoch": 1.662041319765649,
      "grad_norm": 0.19221745431423187,
      "learning_rate": 3.932195464647691e-05,
      "loss": 0.5369,
      "step": 8085
    },
    {
      "epoch": 1.6622468907390275,
      "grad_norm": 0.19402176141738892,
      "learning_rate": 3.9312046508992064e-05,
      "loss": 0.5403,
      "step": 8086
    },
    {
      "epoch": 1.6624524617124061,
      "grad_norm": 0.19770248234272003,
      "learning_rate": 3.930213865172611e-05,
      "loss": 0.5719,
      "step": 8087
    },
    {
      "epoch": 1.6626580326857847,
      "grad_norm": 0.16622693836688995,
      "learning_rate": 3.929223107516716e-05,
      "loss": 0.5024,
      "step": 8088
    },
    {
      "epoch": 1.6628636036591633,
      "grad_norm": 0.15783652663230896,
      "learning_rate": 3.92823237798033e-05,
      "loss": 0.5163,
      "step": 8089
    },
    {
      "epoch": 1.663069174632542,
      "grad_norm": 0.19830361008644104,
      "learning_rate": 3.927241676612263e-05,
      "loss": 0.5191,
      "step": 8090
    },
    {
      "epoch": 1.6632747456059205,
      "grad_norm": 0.20356783270835876,
      "learning_rate": 3.9262510034613215e-05,
      "loss": 0.5717,
      "step": 8091
    },
    {
      "epoch": 1.663480316579299,
      "grad_norm": 0.17174309492111206,
      "learning_rate": 3.92526035857631e-05,
      "loss": 0.5261,
      "step": 8092
    },
    {
      "epoch": 1.6636858875526777,
      "grad_norm": 0.1699124574661255,
      "learning_rate": 3.924269742006035e-05,
      "loss": 0.5411,
      "step": 8093
    },
    {
      "epoch": 1.6638914585260562,
      "grad_norm": 0.19385066628456116,
      "learning_rate": 3.923279153799299e-05,
      "loss": 0.5143,
      "step": 8094
    },
    {
      "epoch": 1.6640970294994348,
      "grad_norm": 0.1945018619298935,
      "learning_rate": 3.922288594004903e-05,
      "loss": 0.5342,
      "step": 8095
    },
    {
      "epoch": 1.6643026004728132,
      "grad_norm": 0.19037404656410217,
      "learning_rate": 3.921298062671649e-05,
      "loss": 0.5635,
      "step": 8096
    },
    {
      "epoch": 1.6645081714461918,
      "grad_norm": 0.1975833922624588,
      "learning_rate": 3.9203075598483335e-05,
      "loss": 0.5451,
      "step": 8097
    },
    {
      "epoch": 1.6647137424195704,
      "grad_norm": 0.1882157325744629,
      "learning_rate": 3.9193170855837564e-05,
      "loss": 0.5283,
      "step": 8098
    },
    {
      "epoch": 1.664919313392949,
      "grad_norm": 0.19174973666667938,
      "learning_rate": 3.9183266399267094e-05,
      "loss": 0.5513,
      "step": 8099
    },
    {
      "epoch": 1.6651248843663273,
      "grad_norm": 0.19739782810211182,
      "learning_rate": 3.9173362229259926e-05,
      "loss": 0.5301,
      "step": 8100
    },
    {
      "epoch": 1.665330455339706,
      "grad_norm": 0.16633886098861694,
      "learning_rate": 3.916345834630396e-05,
      "loss": 0.541,
      "step": 8101
    },
    {
      "epoch": 1.6655360263130845,
      "grad_norm": 0.16310401260852814,
      "learning_rate": 3.915355475088714e-05,
      "loss": 0.5662,
      "step": 8102
    },
    {
      "epoch": 1.665741597286463,
      "grad_norm": 0.18664813041687012,
      "learning_rate": 3.914365144349733e-05,
      "loss": 0.5332,
      "step": 8103
    },
    {
      "epoch": 1.6659471682598417,
      "grad_norm": 0.19100825488567352,
      "learning_rate": 3.913374842462244e-05,
      "loss": 0.5315,
      "step": 8104
    },
    {
      "epoch": 1.6661527392332203,
      "grad_norm": 0.20404808223247528,
      "learning_rate": 3.912384569475036e-05,
      "loss": 0.5542,
      "step": 8105
    },
    {
      "epoch": 1.6663583102065989,
      "grad_norm": 0.1687227189540863,
      "learning_rate": 3.9113943254368916e-05,
      "loss": 0.5423,
      "step": 8106
    },
    {
      "epoch": 1.6665638811799774,
      "grad_norm": 0.1573527455329895,
      "learning_rate": 3.9104041103965985e-05,
      "loss": 0.5323,
      "step": 8107
    },
    {
      "epoch": 1.666769452153356,
      "grad_norm": 0.16023261845111847,
      "learning_rate": 3.90941392440294e-05,
      "loss": 0.5108,
      "step": 8108
    },
    {
      "epoch": 1.6669750231267346,
      "grad_norm": 0.15852369368076324,
      "learning_rate": 3.9084237675046975e-05,
      "loss": 0.5464,
      "step": 8109
    },
    {
      "epoch": 1.6671805941001132,
      "grad_norm": 0.19316738843917847,
      "learning_rate": 3.90743363975065e-05,
      "loss": 0.5283,
      "step": 8110
    },
    {
      "epoch": 1.6673861650734916,
      "grad_norm": 0.1973247081041336,
      "learning_rate": 3.906443541189578e-05,
      "loss": 0.5398,
      "step": 8111
    },
    {
      "epoch": 1.6675917360468702,
      "grad_norm": 0.1693935990333557,
      "learning_rate": 3.905453471870259e-05,
      "loss": 0.509,
      "step": 8112
    },
    {
      "epoch": 1.6677973070202488,
      "grad_norm": 0.1599174290895462,
      "learning_rate": 3.9044634318414656e-05,
      "loss": 0.5468,
      "step": 8113
    },
    {
      "epoch": 1.6680028779936273,
      "grad_norm": 0.18429811298847198,
      "learning_rate": 3.903473421151978e-05,
      "loss": 0.5272,
      "step": 8114
    },
    {
      "epoch": 1.6682084489670057,
      "grad_norm": 0.19133618474006653,
      "learning_rate": 3.902483439850566e-05,
      "loss": 0.538,
      "step": 8115
    },
    {
      "epoch": 1.6684140199403843,
      "grad_norm": 0.19194607436656952,
      "learning_rate": 3.901493487986002e-05,
      "loss": 0.5341,
      "step": 8116
    },
    {
      "epoch": 1.6686195909137629,
      "grad_norm": 0.16348059475421906,
      "learning_rate": 3.900503565607057e-05,
      "loss": 0.5021,
      "step": 8117
    },
    {
      "epoch": 1.6688251618871415,
      "grad_norm": 0.16237923502922058,
      "learning_rate": 3.899513672762499e-05,
      "loss": 0.5647,
      "step": 8118
    },
    {
      "epoch": 1.66903073286052,
      "grad_norm": 0.19955293834209442,
      "learning_rate": 3.8985238095010965e-05,
      "loss": 0.5687,
      "step": 8119
    },
    {
      "epoch": 1.6692363038338986,
      "grad_norm": 0.16089332103729248,
      "learning_rate": 3.897533975871612e-05,
      "loss": 0.51,
      "step": 8120
    },
    {
      "epoch": 1.6694418748072772,
      "grad_norm": 0.161229208111763,
      "learning_rate": 3.896544171922815e-05,
      "loss": 0.5291,
      "step": 8121
    },
    {
      "epoch": 1.6696474457806558,
      "grad_norm": 0.19278062880039215,
      "learning_rate": 3.895554397703466e-05,
      "loss": 0.5307,
      "step": 8122
    },
    {
      "epoch": 1.6698530167540344,
      "grad_norm": 0.16215354204177856,
      "learning_rate": 3.8945646532623256e-05,
      "loss": 0.5,
      "step": 8123
    },
    {
      "epoch": 1.670058587727413,
      "grad_norm": 0.16377978026866913,
      "learning_rate": 3.893574938648156e-05,
      "loss": 0.5362,
      "step": 8124
    },
    {
      "epoch": 1.6702641587007916,
      "grad_norm": 0.19552935659885406,
      "learning_rate": 3.892585253909714e-05,
      "loss": 0.5255,
      "step": 8125
    },
    {
      "epoch": 1.67046972967417,
      "grad_norm": 0.164475217461586,
      "learning_rate": 3.8915955990957575e-05,
      "loss": 0.4989,
      "step": 8126
    },
    {
      "epoch": 1.6706753006475485,
      "grad_norm": 0.15810781717300415,
      "learning_rate": 3.890605974255042e-05,
      "loss": 0.5276,
      "step": 8127
    },
    {
      "epoch": 1.6708808716209271,
      "grad_norm": 0.1982525885105133,
      "learning_rate": 3.889616379436321e-05,
      "loss": 0.5349,
      "step": 8128
    },
    {
      "epoch": 1.6710864425943057,
      "grad_norm": 0.15992006659507751,
      "learning_rate": 3.88862681468835e-05,
      "loss": 0.5116,
      "step": 8129
    },
    {
      "epoch": 1.671292013567684,
      "grad_norm": 0.15967024862766266,
      "learning_rate": 3.887637280059878e-05,
      "loss": 0.5657,
      "step": 8130
    },
    {
      "epoch": 1.6714975845410627,
      "grad_norm": 0.1937428116798401,
      "learning_rate": 3.886647775599655e-05,
      "loss": 0.5581,
      "step": 8131
    },
    {
      "epoch": 1.6717031555144413,
      "grad_norm": 0.18604367971420288,
      "learning_rate": 3.885658301356429e-05,
      "loss": 0.5246,
      "step": 8132
    },
    {
      "epoch": 1.6719087264878199,
      "grad_norm": 0.18401312828063965,
      "learning_rate": 3.884668857378947e-05,
      "loss": 0.5059,
      "step": 8133
    },
    {
      "epoch": 1.6721142974611984,
      "grad_norm": 0.20079663395881653,
      "learning_rate": 3.883679443715953e-05,
      "loss": 0.539,
      "step": 8134
    },
    {
      "epoch": 1.672319868434577,
      "grad_norm": 0.18646441400051117,
      "learning_rate": 3.882690060416194e-05,
      "loss": 0.548,
      "step": 8135
    },
    {
      "epoch": 1.6725254394079556,
      "grad_norm": 0.19753128290176392,
      "learning_rate": 3.88170070752841e-05,
      "loss": 0.5599,
      "step": 8136
    },
    {
      "epoch": 1.6727310103813342,
      "grad_norm": 0.19681565463542938,
      "learning_rate": 3.8807113851013425e-05,
      "loss": 0.543,
      "step": 8137
    },
    {
      "epoch": 1.6729365813547128,
      "grad_norm": 0.16268804669380188,
      "learning_rate": 3.879722093183729e-05,
      "loss": 0.5131,
      "step": 8138
    },
    {
      "epoch": 1.6731421523280914,
      "grad_norm": 0.16209396719932556,
      "learning_rate": 3.87873283182431e-05,
      "loss": 0.5467,
      "step": 8139
    },
    {
      "epoch": 1.67334772330147,
      "grad_norm": 0.19812874495983124,
      "learning_rate": 3.877743601071821e-05,
      "loss": 0.5457,
      "step": 8140
    },
    {
      "epoch": 1.6735532942748486,
      "grad_norm": 0.15637758374214172,
      "learning_rate": 3.8767544009749944e-05,
      "loss": 0.5099,
      "step": 8141
    },
    {
      "epoch": 1.673758865248227,
      "grad_norm": 0.15744930505752563,
      "learning_rate": 3.875765231582568e-05,
      "loss": 0.5549,
      "step": 8142
    },
    {
      "epoch": 1.6739644362216055,
      "grad_norm": 0.19686995446681976,
      "learning_rate": 3.874776092943269e-05,
      "loss": 0.5183,
      "step": 8143
    },
    {
      "epoch": 1.674170007194984,
      "grad_norm": 0.1597413569688797,
      "learning_rate": 3.8737869851058315e-05,
      "loss": 0.5043,
      "step": 8144
    },
    {
      "epoch": 1.6743755781683625,
      "grad_norm": 0.1251799464225769,
      "learning_rate": 3.872797908118982e-05,
      "loss": 0.5108,
      "step": 8145
    },
    {
      "epoch": 1.674581149141741,
      "grad_norm": 0.16012680530548096,
      "learning_rate": 3.8718088620314474e-05,
      "loss": 0.5168,
      "step": 8146
    },
    {
      "epoch": 1.6747867201151196,
      "grad_norm": 0.19369451701641083,
      "learning_rate": 3.870819846891953e-05,
      "loss": 0.5266,
      "step": 8147
    },
    {
      "epoch": 1.6749922910884982,
      "grad_norm": 0.19420257210731506,
      "learning_rate": 3.869830862749224e-05,
      "loss": 0.5269,
      "step": 8148
    },
    {
      "epoch": 1.6751978620618768,
      "grad_norm": 0.16983704268932343,
      "learning_rate": 3.8688419096519844e-05,
      "loss": 0.5027,
      "step": 8149
    },
    {
      "epoch": 1.6754034330352554,
      "grad_norm": 0.16026097536087036,
      "learning_rate": 3.8678529876489526e-05,
      "loss": 0.5368,
      "step": 8150
    },
    {
      "epoch": 1.675609004008634,
      "grad_norm": 0.1565508395433426,
      "learning_rate": 3.86686409678885e-05,
      "loss": 0.5045,
      "step": 8151
    },
    {
      "epoch": 1.6758145749820126,
      "grad_norm": 0.15121006965637207,
      "learning_rate": 3.865875237120395e-05,
      "loss": 0.5409,
      "step": 8152
    },
    {
      "epoch": 1.6760201459553912,
      "grad_norm": 0.19622927904129028,
      "learning_rate": 3.864886408692303e-05,
      "loss": 0.5297,
      "step": 8153
    },
    {
      "epoch": 1.6762257169287698,
      "grad_norm": 0.20082417130470276,
      "learning_rate": 3.863897611553289e-05,
      "loss": 0.5516,
      "step": 8154
    },
    {
      "epoch": 1.6764312879021483,
      "grad_norm": 0.19279861450195312,
      "learning_rate": 3.8629088457520645e-05,
      "loss": 0.5286,
      "step": 8155
    },
    {
      "epoch": 1.676636858875527,
      "grad_norm": 0.18971529603004456,
      "learning_rate": 3.861920111337345e-05,
      "loss": 0.5381,
      "step": 8156
    },
    {
      "epoch": 1.6768424298489053,
      "grad_norm": 0.18667519092559814,
      "learning_rate": 3.8609314083578396e-05,
      "loss": 0.529,
      "step": 8157
    },
    {
      "epoch": 1.677048000822284,
      "grad_norm": 0.18965506553649902,
      "learning_rate": 3.859942736862257e-05,
      "loss": 0.5504,
      "step": 8158
    },
    {
      "epoch": 1.6772535717956625,
      "grad_norm": 0.1879250854253769,
      "learning_rate": 3.858954096899303e-05,
      "loss": 0.521,
      "step": 8159
    },
    {
      "epoch": 1.6774591427690408,
      "grad_norm": 0.16116970777511597,
      "learning_rate": 3.8579654885176854e-05,
      "loss": 0.5171,
      "step": 8160
    },
    {
      "epoch": 1.6776647137424194,
      "grad_norm": 0.16163001954555511,
      "learning_rate": 3.856976911766107e-05,
      "loss": 0.5526,
      "step": 8161
    },
    {
      "epoch": 1.677870284715798,
      "grad_norm": 0.19858844578266144,
      "learning_rate": 3.855988366693269e-05,
      "loss": 0.5105,
      "step": 8162
    },
    {
      "epoch": 1.6780758556891766,
      "grad_norm": 0.19145843386650085,
      "learning_rate": 3.854999853347876e-05,
      "loss": 0.5701,
      "step": 8163
    },
    {
      "epoch": 1.6782814266625552,
      "grad_norm": 0.19304659962654114,
      "learning_rate": 3.854011371778625e-05,
      "loss": 0.5276,
      "step": 8164
    },
    {
      "epoch": 1.6784869976359338,
      "grad_norm": 0.19083738327026367,
      "learning_rate": 3.853022922034215e-05,
      "loss": 0.5204,
      "step": 8165
    },
    {
      "epoch": 1.6786925686093124,
      "grad_norm": 0.18819309771060944,
      "learning_rate": 3.852034504163341e-05,
      "loss": 0.5283,
      "step": 8166
    },
    {
      "epoch": 1.678898139582691,
      "grad_norm": 0.19191038608551025,
      "learning_rate": 3.851046118214699e-05,
      "loss": 0.5261,
      "step": 8167
    },
    {
      "epoch": 1.6791037105560696,
      "grad_norm": 0.19225665926933289,
      "learning_rate": 3.850057764236981e-05,
      "loss": 0.5282,
      "step": 8168
    },
    {
      "epoch": 1.6793092815294481,
      "grad_norm": 0.19503363966941833,
      "learning_rate": 3.849069442278878e-05,
      "loss": 0.5355,
      "step": 8169
    },
    {
      "epoch": 1.6795148525028267,
      "grad_norm": 0.19625093042850494,
      "learning_rate": 3.848081152389083e-05,
      "loss": 0.5557,
      "step": 8170
    },
    {
      "epoch": 1.6797204234762053,
      "grad_norm": 0.19365637004375458,
      "learning_rate": 3.8470928946162813e-05,
      "loss": 0.5369,
      "step": 8171
    },
    {
      "epoch": 1.6799259944495837,
      "grad_norm": 0.19885706901550293,
      "learning_rate": 3.8461046690091616e-05,
      "loss": 0.5276,
      "step": 8172
    },
    {
      "epoch": 1.6801315654229623,
      "grad_norm": 0.19316908717155457,
      "learning_rate": 3.845116475616409e-05,
      "loss": 0.5332,
      "step": 8173
    },
    {
      "epoch": 1.6803371363963409,
      "grad_norm": 0.1912158727645874,
      "learning_rate": 3.844128314486706e-05,
      "loss": 0.542,
      "step": 8174
    },
    {
      "epoch": 1.6805427073697194,
      "grad_norm": 0.1975051760673523,
      "learning_rate": 3.843140185668737e-05,
      "loss": 0.5467,
      "step": 8175
    },
    {
      "epoch": 1.6807482783430978,
      "grad_norm": 0.16475236415863037,
      "learning_rate": 3.8421520892111776e-05,
      "loss": 0.5106,
      "step": 8176
    },
    {
      "epoch": 1.6809538493164764,
      "grad_norm": 0.16820210218429565,
      "learning_rate": 3.841164025162713e-05,
      "loss": 0.5522,
      "step": 8177
    },
    {
      "epoch": 1.681159420289855,
      "grad_norm": 0.19619794189929962,
      "learning_rate": 3.840175993572016e-05,
      "loss": 0.5367,
      "step": 8178
    },
    {
      "epoch": 1.6813649912632336,
      "grad_norm": 0.19805863499641418,
      "learning_rate": 3.839187994487765e-05,
      "loss": 0.5383,
      "step": 8179
    },
    {
      "epoch": 1.6815705622366122,
      "grad_norm": 0.18975287675857544,
      "learning_rate": 3.838200027958632e-05,
      "loss": 0.5476,
      "step": 8180
    },
    {
      "epoch": 1.6817761332099908,
      "grad_norm": 0.18960921466350555,
      "learning_rate": 3.837212094033291e-05,
      "loss": 0.5452,
      "step": 8181
    },
    {
      "epoch": 1.6819817041833693,
      "grad_norm": 0.1594635397195816,
      "learning_rate": 3.8362241927604106e-05,
      "loss": 0.5045,
      "step": 8182
    },
    {
      "epoch": 1.682187275156748,
      "grad_norm": 0.1598910242319107,
      "learning_rate": 3.835236324188662e-05,
      "loss": 0.5456,
      "step": 8183
    },
    {
      "epoch": 1.6823928461301265,
      "grad_norm": 0.19848540425300598,
      "learning_rate": 3.834248488366714e-05,
      "loss": 0.5193,
      "step": 8184
    },
    {
      "epoch": 1.682598417103505,
      "grad_norm": 0.2017425149679184,
      "learning_rate": 3.833260685343231e-05,
      "loss": 0.5427,
      "step": 8185
    },
    {
      "epoch": 1.6828039880768837,
      "grad_norm": 0.19509434700012207,
      "learning_rate": 3.832272915166878e-05,
      "loss": 0.5208,
      "step": 8186
    },
    {
      "epoch": 1.683009559050262,
      "grad_norm": 0.19122706353664398,
      "learning_rate": 3.8312851778863176e-05,
      "loss": 0.5213,
      "step": 8187
    },
    {
      "epoch": 1.6832151300236406,
      "grad_norm": 0.18763068318367004,
      "learning_rate": 3.8302974735502104e-05,
      "loss": 0.5363,
      "step": 8188
    },
    {
      "epoch": 1.6834207009970192,
      "grad_norm": 0.2000308781862259,
      "learning_rate": 3.829309802207215e-05,
      "loss": 0.5397,
      "step": 8189
    },
    {
      "epoch": 1.6836262719703978,
      "grad_norm": 0.19013464450836182,
      "learning_rate": 3.828322163905993e-05,
      "loss": 0.5073,
      "step": 8190
    },
    {
      "epoch": 1.6838318429437762,
      "grad_norm": 0.19034752249717712,
      "learning_rate": 3.827334558695198e-05,
      "loss": 0.5318,
      "step": 8191
    },
    {
      "epoch": 1.6840374139171548,
      "grad_norm": 0.16001807153224945,
      "learning_rate": 3.8263469866234844e-05,
      "loss": 0.4987,
      "step": 8192
    },
    {
      "epoch": 1.6842429848905334,
      "grad_norm": 0.15920346975326538,
      "learning_rate": 3.825359447739507e-05,
      "loss": 0.5404,
      "step": 8193
    },
    {
      "epoch": 1.684448555863912,
      "grad_norm": 0.19532343745231628,
      "learning_rate": 3.8243719420919165e-05,
      "loss": 0.5134,
      "step": 8194
    },
    {
      "epoch": 1.6846541268372905,
      "grad_norm": 0.19484242796897888,
      "learning_rate": 3.823384469729363e-05,
      "loss": 0.5334,
      "step": 8195
    },
    {
      "epoch": 1.6848596978106691,
      "grad_norm": 0.20333658158779144,
      "learning_rate": 3.822397030700491e-05,
      "loss": 0.5491,
      "step": 8196
    },
    {
      "epoch": 1.6850652687840477,
      "grad_norm": 0.20554953813552856,
      "learning_rate": 3.821409625053953e-05,
      "loss": 0.5479,
      "step": 8197
    },
    {
      "epoch": 1.6852708397574263,
      "grad_norm": 0.19656214118003845,
      "learning_rate": 3.820422252838391e-05,
      "loss": 0.5334,
      "step": 8198
    },
    {
      "epoch": 1.685476410730805,
      "grad_norm": 0.19906407594680786,
      "learning_rate": 3.819434914102448e-05,
      "loss": 0.5302,
      "step": 8199
    },
    {
      "epoch": 1.6856819817041835,
      "grad_norm": 0.16761255264282227,
      "learning_rate": 3.818447608894767e-05,
      "loss": 0.5145,
      "step": 8200
    },
    {
      "epoch": 1.685887552677562,
      "grad_norm": 0.16284339129924774,
      "learning_rate": 3.8174603372639846e-05,
      "loss": 0.5399,
      "step": 8201
    },
    {
      "epoch": 1.6860931236509404,
      "grad_norm": 0.19720837473869324,
      "learning_rate": 3.816473099258742e-05,
      "loss": 0.5452,
      "step": 8202
    },
    {
      "epoch": 1.686298694624319,
      "grad_norm": 0.19352254271507263,
      "learning_rate": 3.8154858949276744e-05,
      "loss": 0.5399,
      "step": 8203
    },
    {
      "epoch": 1.6865042655976976,
      "grad_norm": 0.16425921022891998,
      "learning_rate": 3.814498724319418e-05,
      "loss": 0.5016,
      "step": 8204
    },
    {
      "epoch": 1.6867098365710762,
      "grad_norm": 0.15797263383865356,
      "learning_rate": 3.813511587482606e-05,
      "loss": 0.5325,
      "step": 8205
    },
    {
      "epoch": 1.6869154075444546,
      "grad_norm": 0.16672199964523315,
      "learning_rate": 3.812524484465869e-05,
      "loss": 0.4982,
      "step": 8206
    },
    {
      "epoch": 1.6871209785178332,
      "grad_norm": 0.2091359794139862,
      "learning_rate": 3.811537415317837e-05,
      "loss": 0.5206,
      "step": 8207
    },
    {
      "epoch": 1.6873265494912117,
      "grad_norm": 0.19015903770923615,
      "learning_rate": 3.81055038008714e-05,
      "loss": 0.5234,
      "step": 8208
    },
    {
      "epoch": 1.6875321204645903,
      "grad_norm": 0.20703433454036713,
      "learning_rate": 3.8095633788224024e-05,
      "loss": 0.5743,
      "step": 8209
    },
    {
      "epoch": 1.687737691437969,
      "grad_norm": 0.19770927727222443,
      "learning_rate": 3.8085764115722484e-05,
      "loss": 0.5482,
      "step": 8210
    },
    {
      "epoch": 1.6879432624113475,
      "grad_norm": 0.15969951450824738,
      "learning_rate": 3.8075894783853054e-05,
      "loss": 0.4893,
      "step": 8211
    },
    {
      "epoch": 1.688148833384726,
      "grad_norm": 0.15302079916000366,
      "learning_rate": 3.806602579310191e-05,
      "loss": 0.5153,
      "step": 8212
    },
    {
      "epoch": 1.6883544043581047,
      "grad_norm": 0.19498853385448456,
      "learning_rate": 3.805615714395527e-05,
      "loss": 0.546,
      "step": 8213
    },
    {
      "epoch": 1.6885599753314833,
      "grad_norm": 0.1922113597393036,
      "learning_rate": 3.804628883689931e-05,
      "loss": 0.5351,
      "step": 8214
    },
    {
      "epoch": 1.6887655463048619,
      "grad_norm": 0.19428758323192596,
      "learning_rate": 3.803642087242021e-05,
      "loss": 0.5452,
      "step": 8215
    },
    {
      "epoch": 1.6889711172782405,
      "grad_norm": 0.19760240614414215,
      "learning_rate": 3.8026553251004096e-05,
      "loss": 0.5356,
      "step": 8216
    },
    {
      "epoch": 1.6891766882516188,
      "grad_norm": 0.16687412559986115,
      "learning_rate": 3.8016685973137095e-05,
      "loss": 0.5093,
      "step": 8217
    },
    {
      "epoch": 1.6893822592249974,
      "grad_norm": 0.1642359495162964,
      "learning_rate": 3.800681903930535e-05,
      "loss": 0.5485,
      "step": 8218
    },
    {
      "epoch": 1.689587830198376,
      "grad_norm": 0.1901901662349701,
      "learning_rate": 3.799695244999495e-05,
      "loss": 0.5102,
      "step": 8219
    },
    {
      "epoch": 1.6897934011717546,
      "grad_norm": 0.19654683768749237,
      "learning_rate": 3.798708620569197e-05,
      "loss": 0.5338,
      "step": 8220
    },
    {
      "epoch": 1.689998972145133,
      "grad_norm": 0.1945556253194809,
      "learning_rate": 3.797722030688248e-05,
      "loss": 0.5369,
      "step": 8221
    },
    {
      "epoch": 1.6902045431185115,
      "grad_norm": 0.19918568432331085,
      "learning_rate": 3.7967354754052514e-05,
      "loss": 0.5409,
      "step": 8222
    },
    {
      "epoch": 1.6904101140918901,
      "grad_norm": 0.19335860013961792,
      "learning_rate": 3.7957489547688096e-05,
      "loss": 0.5451,
      "step": 8223
    },
    {
      "epoch": 1.6906156850652687,
      "grad_norm": 0.19655676186084747,
      "learning_rate": 3.794762468827526e-05,
      "loss": 0.5484,
      "step": 8224
    },
    {
      "epoch": 1.6908212560386473,
      "grad_norm": 0.20534905791282654,
      "learning_rate": 3.79377601763e-05,
      "loss": 0.5321,
      "step": 8225
    },
    {
      "epoch": 1.691026827012026,
      "grad_norm": 0.16838058829307556,
      "learning_rate": 3.7927896012248275e-05,
      "loss": 0.4978,
      "step": 8226
    },
    {
      "epoch": 1.6912323979854045,
      "grad_norm": 0.16190923750400543,
      "learning_rate": 3.7918032196606064e-05,
      "loss": 0.5627,
      "step": 8227
    },
    {
      "epoch": 1.691437968958783,
      "grad_norm": 0.197221040725708,
      "learning_rate": 3.790816872985931e-05,
      "loss": 0.5287,
      "step": 8228
    },
    {
      "epoch": 1.6916435399321617,
      "grad_norm": 0.19407358765602112,
      "learning_rate": 3.789830561249394e-05,
      "loss": 0.5409,
      "step": 8229
    },
    {
      "epoch": 1.6918491109055402,
      "grad_norm": 0.19359079003334045,
      "learning_rate": 3.7888442844995856e-05,
      "loss": 0.5378,
      "step": 8230
    },
    {
      "epoch": 1.6920546818789188,
      "grad_norm": 0.21710537374019623,
      "learning_rate": 3.7878580427850937e-05,
      "loss": 0.5545,
      "step": 8231
    },
    {
      "epoch": 1.6922602528522974,
      "grad_norm": 0.19026683270931244,
      "learning_rate": 3.786871836154509e-05,
      "loss": 0.549,
      "step": 8232
    },
    {
      "epoch": 1.6924658238256758,
      "grad_norm": 0.19044183194637299,
      "learning_rate": 3.785885664656415e-05,
      "loss": 0.5286,
      "step": 8233
    },
    {
      "epoch": 1.6926713947990544,
      "grad_norm": 0.20085959136486053,
      "learning_rate": 3.7848995283393984e-05,
      "loss": 0.5414,
      "step": 8234
    },
    {
      "epoch": 1.692876965772433,
      "grad_norm": 0.16187427937984467,
      "learning_rate": 3.783913427252038e-05,
      "loss": 0.5116,
      "step": 8235
    },
    {
      "epoch": 1.6930825367458113,
      "grad_norm": 0.16329748928546906,
      "learning_rate": 3.782927361442916e-05,
      "loss": 0.5387,
      "step": 8236
    },
    {
      "epoch": 1.69328810771919,
      "grad_norm": 0.1952928751707077,
      "learning_rate": 3.781941330960612e-05,
      "loss": 0.569,
      "step": 8237
    },
    {
      "epoch": 1.6934936786925685,
      "grad_norm": 0.19381776452064514,
      "learning_rate": 3.780955335853701e-05,
      "loss": 0.5336,
      "step": 8238
    },
    {
      "epoch": 1.693699249665947,
      "grad_norm": 0.2035483717918396,
      "learning_rate": 3.779969376170761e-05,
      "loss": 0.5385,
      "step": 8239
    },
    {
      "epoch": 1.6939048206393257,
      "grad_norm": 0.19148887693881989,
      "learning_rate": 3.778983451960365e-05,
      "loss": 0.5156,
      "step": 8240
    },
    {
      "epoch": 1.6941103916127043,
      "grad_norm": 0.17306075990200043,
      "learning_rate": 3.7779975632710836e-05,
      "loss": 0.5245,
      "step": 8241
    },
    {
      "epoch": 1.6943159625860829,
      "grad_norm": 0.12030526250600815,
      "learning_rate": 3.7770117101514885e-05,
      "loss": 0.5117,
      "step": 8242
    },
    {
      "epoch": 1.6945215335594614,
      "grad_norm": 0.15814997255802155,
      "learning_rate": 3.776025892650147e-05,
      "loss": 0.55,
      "step": 8243
    },
    {
      "epoch": 1.69472710453284,
      "grad_norm": 0.18655110895633698,
      "learning_rate": 3.775040110815624e-05,
      "loss": 0.4924,
      "step": 8244
    },
    {
      "epoch": 1.6949326755062186,
      "grad_norm": 0.19482672214508057,
      "learning_rate": 3.7740543646964876e-05,
      "loss": 0.5431,
      "step": 8245
    },
    {
      "epoch": 1.6951382464795972,
      "grad_norm": 0.1627287119626999,
      "learning_rate": 3.7730686543412994e-05,
      "loss": 0.5191,
      "step": 8246
    },
    {
      "epoch": 1.6953438174529758,
      "grad_norm": 0.17188504338264465,
      "learning_rate": 3.772082979798621e-05,
      "loss": 0.5403,
      "step": 8247
    },
    {
      "epoch": 1.6955493884263542,
      "grad_norm": 0.21016332507133484,
      "learning_rate": 3.7710973411170126e-05,
      "loss": 0.5456,
      "step": 8248
    },
    {
      "epoch": 1.6957549593997328,
      "grad_norm": 0.1925675868988037,
      "learning_rate": 3.770111738345031e-05,
      "loss": 0.5214,
      "step": 8249
    },
    {
      "epoch": 1.6959605303731113,
      "grad_norm": 0.19163696467876434,
      "learning_rate": 3.769126171531232e-05,
      "loss": 0.5354,
      "step": 8250
    },
    {
      "epoch": 1.69616610134649,
      "grad_norm": 0.16222819685935974,
      "learning_rate": 3.7681406407241716e-05,
      "loss": 0.5241,
      "step": 8251
    },
    {
      "epoch": 1.6963716723198683,
      "grad_norm": 0.16099952161312103,
      "learning_rate": 3.767155145972399e-05,
      "loss": 0.5321,
      "step": 8252
    },
    {
      "epoch": 1.6965772432932469,
      "grad_norm": 0.1959654986858368,
      "learning_rate": 3.766169687324468e-05,
      "loss": 0.54,
      "step": 8253
    },
    {
      "epoch": 1.6967828142666255,
      "grad_norm": 0.19316841661930084,
      "learning_rate": 3.7651842648289276e-05,
      "loss": 0.5356,
      "step": 8254
    },
    {
      "epoch": 1.696988385240004,
      "grad_norm": 0.202810600399971,
      "learning_rate": 3.7641988785343236e-05,
      "loss": 0.5506,
      "step": 8255
    },
    {
      "epoch": 1.6971939562133826,
      "grad_norm": 0.1944981962442398,
      "learning_rate": 3.763213528489201e-05,
      "loss": 0.5019,
      "step": 8256
    },
    {
      "epoch": 1.6973995271867612,
      "grad_norm": 0.16003461182117462,
      "learning_rate": 3.762228214742105e-05,
      "loss": 0.504,
      "step": 8257
    },
    {
      "epoch": 1.6976050981601398,
      "grad_norm": 0.15627720952033997,
      "learning_rate": 3.7612429373415754e-05,
      "loss": 0.5165,
      "step": 8258
    },
    {
      "epoch": 1.6978106691335184,
      "grad_norm": 0.19209109246730804,
      "learning_rate": 3.760257696336154e-05,
      "loss": 0.5114,
      "step": 8259
    },
    {
      "epoch": 1.698016240106897,
      "grad_norm": 0.16496042907238007,
      "learning_rate": 3.759272491774378e-05,
      "loss": 0.5113,
      "step": 8260
    },
    {
      "epoch": 1.6982218110802756,
      "grad_norm": 0.16691668331623077,
      "learning_rate": 3.758287323704785e-05,
      "loss": 0.5469,
      "step": 8261
    },
    {
      "epoch": 1.6984273820536542,
      "grad_norm": 0.20020011067390442,
      "learning_rate": 3.757302192175909e-05,
      "loss": 0.5397,
      "step": 8262
    },
    {
      "epoch": 1.6986329530270325,
      "grad_norm": 0.19349105656147003,
      "learning_rate": 3.756317097236282e-05,
      "loss": 0.5422,
      "step": 8263
    },
    {
      "epoch": 1.6988385240004111,
      "grad_norm": 0.16651464998722076,
      "learning_rate": 3.755332038934436e-05,
      "loss": 0.4836,
      "step": 8264
    },
    {
      "epoch": 1.6990440949737897,
      "grad_norm": 0.12502692639827728,
      "learning_rate": 3.754347017318897e-05,
      "loss": 0.5132,
      "step": 8265
    },
    {
      "epoch": 1.6992496659471683,
      "grad_norm": 0.12334790080785751,
      "learning_rate": 3.7533620324381984e-05,
      "loss": 0.5108,
      "step": 8266
    },
    {
      "epoch": 1.6994552369205467,
      "grad_norm": 0.12631775438785553,
      "learning_rate": 3.752377084340863e-05,
      "loss": 0.5167,
      "step": 8267
    },
    {
      "epoch": 1.6996608078939253,
      "grad_norm": 0.16306687891483307,
      "learning_rate": 3.7513921730754125e-05,
      "loss": 0.5369,
      "step": 8268
    },
    {
      "epoch": 1.6998663788673039,
      "grad_norm": 0.19654233753681183,
      "learning_rate": 3.750407298690372e-05,
      "loss": 0.52,
      "step": 8269
    },
    {
      "epoch": 1.7000719498406824,
      "grad_norm": 0.1925351619720459,
      "learning_rate": 3.74942246123426e-05,
      "loss": 0.5356,
      "step": 8270
    },
    {
      "epoch": 1.700277520814061,
      "grad_norm": 0.165648952126503,
      "learning_rate": 3.7484376607555954e-05,
      "loss": 0.5244,
      "step": 8271
    },
    {
      "epoch": 1.7004830917874396,
      "grad_norm": 0.1643042266368866,
      "learning_rate": 3.747452897302892e-05,
      "loss": 0.5356,
      "step": 8272
    },
    {
      "epoch": 1.7006886627608182,
      "grad_norm": 0.1931808739900589,
      "learning_rate": 3.7464681709246696e-05,
      "loss": 0.5371,
      "step": 8273
    },
    {
      "epoch": 1.7008942337341968,
      "grad_norm": 0.19738541543483734,
      "learning_rate": 3.745483481669438e-05,
      "loss": 0.5506,
      "step": 8274
    },
    {
      "epoch": 1.7010998047075754,
      "grad_norm": 0.1938776969909668,
      "learning_rate": 3.744498829585709e-05,
      "loss": 0.5548,
      "step": 8275
    },
    {
      "epoch": 1.701305375680954,
      "grad_norm": 0.192849263548851,
      "learning_rate": 3.743514214721991e-05,
      "loss": 0.5506,
      "step": 8276
    },
    {
      "epoch": 1.7015109466543326,
      "grad_norm": 0.19882553815841675,
      "learning_rate": 3.742529637126791e-05,
      "loss": 0.5341,
      "step": 8277
    },
    {
      "epoch": 1.701716517627711,
      "grad_norm": 0.1962941288948059,
      "learning_rate": 3.741545096848617e-05,
      "loss": 0.5582,
      "step": 8278
    },
    {
      "epoch": 1.7019220886010895,
      "grad_norm": 0.1944989264011383,
      "learning_rate": 3.7405605939359694e-05,
      "loss": 0.5621,
      "step": 8279
    },
    {
      "epoch": 1.702127659574468,
      "grad_norm": 0.19153885543346405,
      "learning_rate": 3.7395761284373516e-05,
      "loss": 0.5256,
      "step": 8280
    },
    {
      "epoch": 1.7023332305478467,
      "grad_norm": 0.1736639142036438,
      "learning_rate": 3.738591700401265e-05,
      "loss": 0.5124,
      "step": 8281
    },
    {
      "epoch": 1.702538801521225,
      "grad_norm": 0.160393625497818,
      "learning_rate": 3.7376073098762065e-05,
      "loss": 0.5521,
      "step": 8282
    },
    {
      "epoch": 1.7027443724946036,
      "grad_norm": 0.17553849518299103,
      "learning_rate": 3.736622956910673e-05,
      "loss": 0.5196,
      "step": 8283
    },
    {
      "epoch": 1.7029499434679822,
      "grad_norm": 0.16473659873008728,
      "learning_rate": 3.735638641553157e-05,
      "loss": 0.5314,
      "step": 8284
    },
    {
      "epoch": 1.7031555144413608,
      "grad_norm": 0.15864580869674683,
      "learning_rate": 3.734654363852153e-05,
      "loss": 0.4975,
      "step": 8285
    },
    {
      "epoch": 1.7033610854147394,
      "grad_norm": 0.15445110201835632,
      "learning_rate": 3.7336701238561504e-05,
      "loss": 0.5165,
      "step": 8286
    },
    {
      "epoch": 1.703566656388118,
      "grad_norm": 0.19712099432945251,
      "learning_rate": 3.73268592161364e-05,
      "loss": 0.5323,
      "step": 8287
    },
    {
      "epoch": 1.7037722273614966,
      "grad_norm": 0.9865581393241882,
      "learning_rate": 3.731701757173108e-05,
      "loss": 0.5455,
      "step": 8288
    },
    {
      "epoch": 1.7039777983348752,
      "grad_norm": 0.1986110508441925,
      "learning_rate": 3.730717630583038e-05,
      "loss": 0.5355,
      "step": 8289
    },
    {
      "epoch": 1.7041833693082538,
      "grad_norm": 0.1951410174369812,
      "learning_rate": 3.729733541891917e-05,
      "loss": 0.5557,
      "step": 8290
    },
    {
      "epoch": 1.7043889402816323,
      "grad_norm": 0.19467894732952118,
      "learning_rate": 3.728749491148223e-05,
      "loss": 0.523,
      "step": 8291
    },
    {
      "epoch": 1.704594511255011,
      "grad_norm": 0.2036120444536209,
      "learning_rate": 3.727765478400437e-05,
      "loss": 0.5361,
      "step": 8292
    },
    {
      "epoch": 1.7048000822283893,
      "grad_norm": 0.17244306206703186,
      "learning_rate": 3.726781503697034e-05,
      "loss": 0.5073,
      "step": 8293
    },
    {
      "epoch": 1.705005653201768,
      "grad_norm": 0.18532809615135193,
      "learning_rate": 3.7257975670864954e-05,
      "loss": 0.5654,
      "step": 8294
    },
    {
      "epoch": 1.7052112241751465,
      "grad_norm": 0.1989675760269165,
      "learning_rate": 3.724813668617292e-05,
      "loss": 0.5094,
      "step": 8295
    },
    {
      "epoch": 1.705416795148525,
      "grad_norm": 0.15224121510982513,
      "learning_rate": 3.723829808337895e-05,
      "loss": 0.5202,
      "step": 8296
    },
    {
      "epoch": 1.7056223661219034,
      "grad_norm": 0.1623305380344391,
      "learning_rate": 3.722845986296776e-05,
      "loss": 0.5323,
      "step": 8297
    },
    {
      "epoch": 1.705827937095282,
      "grad_norm": 0.20320528745651245,
      "learning_rate": 3.721862202542403e-05,
      "loss": 0.5185,
      "step": 8298
    },
    {
      "epoch": 1.7060335080686606,
      "grad_norm": 0.17598193883895874,
      "learning_rate": 3.7208784571232404e-05,
      "loss": 0.5198,
      "step": 8299
    },
    {
      "epoch": 1.7062390790420392,
      "grad_norm": 0.1603892594575882,
      "learning_rate": 3.7198947500877554e-05,
      "loss": 0.5437,
      "step": 8300
    },
    {
      "epoch": 1.7064446500154178,
      "grad_norm": 0.16339556872844696,
      "learning_rate": 3.71891108148441e-05,
      "loss": 0.507,
      "step": 8301
    },
    {
      "epoch": 1.7066502209887964,
      "grad_norm": 0.12459365278482437,
      "learning_rate": 3.717927451361665e-05,
      "loss": 0.5091,
      "step": 8302
    },
    {
      "epoch": 1.706855791962175,
      "grad_norm": 0.15585310757160187,
      "learning_rate": 3.7169438597679804e-05,
      "loss": 0.5252,
      "step": 8303
    },
    {
      "epoch": 1.7070613629355536,
      "grad_norm": 0.19386261701583862,
      "learning_rate": 3.7159603067518105e-05,
      "loss": 0.552,
      "step": 8304
    },
    {
      "epoch": 1.7072669339089321,
      "grad_norm": 0.19203509390354156,
      "learning_rate": 3.714976792361612e-05,
      "loss": 0.5265,
      "step": 8305
    },
    {
      "epoch": 1.7074725048823107,
      "grad_norm": 0.1666734516620636,
      "learning_rate": 3.713993316645839e-05,
      "loss": 0.5117,
      "step": 8306
    },
    {
      "epoch": 1.7076780758556893,
      "grad_norm": 0.1642848700284958,
      "learning_rate": 3.713009879652938e-05,
      "loss": 0.5366,
      "step": 8307
    },
    {
      "epoch": 1.7078836468290677,
      "grad_norm": 0.19008807837963104,
      "learning_rate": 3.712026481431364e-05,
      "loss": 0.5266,
      "step": 8308
    },
    {
      "epoch": 1.7080892178024463,
      "grad_norm": 0.19728736579418182,
      "learning_rate": 3.711043122029563e-05,
      "loss": 0.5425,
      "step": 8309
    },
    {
      "epoch": 1.7082947887758249,
      "grad_norm": 0.1897844821214676,
      "learning_rate": 3.71005980149598e-05,
      "loss": 0.5437,
      "step": 8310
    },
    {
      "epoch": 1.7085003597492034,
      "grad_norm": 0.19176128506660461,
      "learning_rate": 3.709076519879057e-05,
      "loss": 0.5434,
      "step": 8311
    },
    {
      "epoch": 1.7087059307225818,
      "grad_norm": 0.1628829389810562,
      "learning_rate": 3.7080932772272376e-05,
      "loss": 0.503,
      "step": 8312
    },
    {
      "epoch": 1.7089115016959604,
      "grad_norm": 0.16427487134933472,
      "learning_rate": 3.707110073588962e-05,
      "loss": 0.5303,
      "step": 8313
    },
    {
      "epoch": 1.709117072669339,
      "grad_norm": 0.19906549155712128,
      "learning_rate": 3.706126909012664e-05,
      "loss": 0.5186,
      "step": 8314
    },
    {
      "epoch": 1.7093226436427176,
      "grad_norm": 0.19773396849632263,
      "learning_rate": 3.7051437835467854e-05,
      "loss": 0.544,
      "step": 8315
    },
    {
      "epoch": 1.7095282146160962,
      "grad_norm": 0.19623300433158875,
      "learning_rate": 3.7041606972397575e-05,
      "loss": 0.5391,
      "step": 8316
    },
    {
      "epoch": 1.7097337855894748,
      "grad_norm": 0.1944045126438141,
      "learning_rate": 3.703177650140011e-05,
      "loss": 0.5314,
      "step": 8317
    },
    {
      "epoch": 1.7099393565628533,
      "grad_norm": 0.187159925699234,
      "learning_rate": 3.702194642295979e-05,
      "loss": 0.5218,
      "step": 8318
    },
    {
      "epoch": 1.710144927536232,
      "grad_norm": 0.19343869388103485,
      "learning_rate": 3.701211673756087e-05,
      "loss": 0.5396,
      "step": 8319
    },
    {
      "epoch": 1.7103504985096105,
      "grad_norm": 0.1905796229839325,
      "learning_rate": 3.700228744568762e-05,
      "loss": 0.5374,
      "step": 8320
    },
    {
      "epoch": 1.710556069482989,
      "grad_norm": 0.19070343673229218,
      "learning_rate": 3.6992458547824285e-05,
      "loss": 0.5484,
      "step": 8321
    },
    {
      "epoch": 1.7107616404563677,
      "grad_norm": 0.1975802779197693,
      "learning_rate": 3.69826300444551e-05,
      "loss": 0.5266,
      "step": 8322
    },
    {
      "epoch": 1.7109672114297463,
      "grad_norm": 0.18827404081821442,
      "learning_rate": 3.6972801936064244e-05,
      "loss": 0.5176,
      "step": 8323
    },
    {
      "epoch": 1.7111727824031246,
      "grad_norm": 0.21654710173606873,
      "learning_rate": 3.6962974223135936e-05,
      "loss": 0.5777,
      "step": 8324
    },
    {
      "epoch": 1.7113783533765032,
      "grad_norm": 0.19230619072914124,
      "learning_rate": 3.695314690615432e-05,
      "loss": 0.5154,
      "step": 8325
    },
    {
      "epoch": 1.7115839243498818,
      "grad_norm": 0.16557452082633972,
      "learning_rate": 3.694331998560354e-05,
      "loss": 0.5188,
      "step": 8326
    },
    {
      "epoch": 1.7117894953232602,
      "grad_norm": 0.15893855690956116,
      "learning_rate": 3.693349346196773e-05,
      "loss": 0.5331,
      "step": 8327
    },
    {
      "epoch": 1.7119950662966388,
      "grad_norm": 0.19809909164905548,
      "learning_rate": 3.692366733573098e-05,
      "loss": 0.5245,
      "step": 8328
    },
    {
      "epoch": 1.7122006372700174,
      "grad_norm": 0.19923657178878784,
      "learning_rate": 3.691384160737741e-05,
      "loss": 0.5283,
      "step": 8329
    },
    {
      "epoch": 1.712406208243396,
      "grad_norm": 0.2039818912744522,
      "learning_rate": 3.690401627739107e-05,
      "loss": 0.5497,
      "step": 8330
    },
    {
      "epoch": 1.7126117792167745,
      "grad_norm": 0.194504514336586,
      "learning_rate": 3.6894191346255995e-05,
      "loss": 0.5249,
      "step": 8331
    },
    {
      "epoch": 1.7128173501901531,
      "grad_norm": 0.535821795463562,
      "learning_rate": 3.688436681445623e-05,
      "loss": 0.5977,
      "step": 8332
    },
    {
      "epoch": 1.7130229211635317,
      "grad_norm": 0.19687367975711823,
      "learning_rate": 3.687454268247578e-05,
      "loss": 0.5581,
      "step": 8333
    },
    {
      "epoch": 1.7132284921369103,
      "grad_norm": 0.1983097642660141,
      "learning_rate": 3.686471895079863e-05,
      "loss": 0.5311,
      "step": 8334
    },
    {
      "epoch": 1.713434063110289,
      "grad_norm": 0.19524888694286346,
      "learning_rate": 3.685489561990875e-05,
      "loss": 0.558,
      "step": 8335
    },
    {
      "epoch": 1.7136396340836675,
      "grad_norm": 0.19910888373851776,
      "learning_rate": 3.684507269029011e-05,
      "loss": 0.547,
      "step": 8336
    },
    {
      "epoch": 1.713845205057046,
      "grad_norm": 0.1981588751077652,
      "learning_rate": 3.683525016242662e-05,
      "loss": 0.5375,
      "step": 8337
    },
    {
      "epoch": 1.7140507760304247,
      "grad_norm": 0.20191727578639984,
      "learning_rate": 3.6825428036802184e-05,
      "loss": 0.5574,
      "step": 8338
    },
    {
      "epoch": 1.714256347003803,
      "grad_norm": 0.16322053968906403,
      "learning_rate": 3.681560631390071e-05,
      "loss": 0.5131,
      "step": 8339
    },
    {
      "epoch": 1.7144619179771816,
      "grad_norm": 0.19834211468696594,
      "learning_rate": 3.6805784994206056e-05,
      "loss": 0.5583,
      "step": 8340
    },
    {
      "epoch": 1.7146674889505602,
      "grad_norm": 0.20397832989692688,
      "learning_rate": 3.679596407820205e-05,
      "loss": 0.5192,
      "step": 8341
    },
    {
      "epoch": 1.7148730599239388,
      "grad_norm": 0.19556482136249542,
      "learning_rate": 3.678614356637258e-05,
      "loss": 0.528,
      "step": 8342
    },
    {
      "epoch": 1.7150786308973172,
      "grad_norm": 0.18456198275089264,
      "learning_rate": 3.6776323459201415e-05,
      "loss": 0.4952,
      "step": 8343
    },
    {
      "epoch": 1.7152842018706957,
      "grad_norm": 0.19880907237529755,
      "learning_rate": 3.676650375717235e-05,
      "loss": 0.5528,
      "step": 8344
    },
    {
      "epoch": 1.7154897728440743,
      "grad_norm": 0.19653092324733734,
      "learning_rate": 3.6756684460769175e-05,
      "loss": 0.5341,
      "step": 8345
    },
    {
      "epoch": 1.715695343817453,
      "grad_norm": 0.5179283022880554,
      "learning_rate": 3.674686557047562e-05,
      "loss": 0.5772,
      "step": 8346
    },
    {
      "epoch": 1.7159009147908315,
      "grad_norm": 0.1959078460931778,
      "learning_rate": 3.673704708677543e-05,
      "loss": 0.5262,
      "step": 8347
    },
    {
      "epoch": 1.71610648576421,
      "grad_norm": 0.19857066869735718,
      "learning_rate": 3.672722901015228e-05,
      "loss": 0.5572,
      "step": 8348
    },
    {
      "epoch": 1.7163120567375887,
      "grad_norm": 0.19778084754943848,
      "learning_rate": 3.6717411341089914e-05,
      "loss": 0.5264,
      "step": 8349
    },
    {
      "epoch": 1.7165176277109673,
      "grad_norm": 0.2006417065858841,
      "learning_rate": 3.670759408007199e-05,
      "loss": 0.5192,
      "step": 8350
    },
    {
      "epoch": 1.7167231986843459,
      "grad_norm": 0.19210219383239746,
      "learning_rate": 3.669777722758213e-05,
      "loss": 0.5253,
      "step": 8351
    },
    {
      "epoch": 1.7169287696577245,
      "grad_norm": 0.19173528254032135,
      "learning_rate": 3.668796078410399e-05,
      "loss": 0.5447,
      "step": 8352
    },
    {
      "epoch": 1.717134340631103,
      "grad_norm": 0.19798819720745087,
      "learning_rate": 3.667814475012116e-05,
      "loss": 0.5544,
      "step": 8353
    },
    {
      "epoch": 1.7173399116044814,
      "grad_norm": 0.19367478787899017,
      "learning_rate": 3.666832912611725e-05,
      "loss": 0.546,
      "step": 8354
    },
    {
      "epoch": 1.71754548257786,
      "grad_norm": 0.19712290167808533,
      "learning_rate": 3.665851391257582e-05,
      "loss": 0.5339,
      "step": 8355
    },
    {
      "epoch": 1.7177510535512386,
      "grad_norm": 0.19337862730026245,
      "learning_rate": 3.6648699109980416e-05,
      "loss": 0.5559,
      "step": 8356
    },
    {
      "epoch": 1.7179566245246172,
      "grad_norm": 0.19475507736206055,
      "learning_rate": 3.6638884718814584e-05,
      "loss": 0.5432,
      "step": 8357
    },
    {
      "epoch": 1.7181621954979955,
      "grad_norm": 0.18744108080863953,
      "learning_rate": 3.6629070739561816e-05,
      "loss": 0.5275,
      "step": 8358
    },
    {
      "epoch": 1.7183677664713741,
      "grad_norm": 0.18683594465255737,
      "learning_rate": 3.661925717270561e-05,
      "loss": 0.5345,
      "step": 8359
    },
    {
      "epoch": 1.7185733374447527,
      "grad_norm": 0.1923644095659256,
      "learning_rate": 3.660944401872944e-05,
      "loss": 0.534,
      "step": 8360
    },
    {
      "epoch": 1.7187789084181313,
      "grad_norm": 0.19515560567378998,
      "learning_rate": 3.6599631278116735e-05,
      "loss": 0.5591,
      "step": 8361
    },
    {
      "epoch": 1.71898447939151,
      "grad_norm": 0.19667771458625793,
      "learning_rate": 3.658981895135092e-05,
      "loss": 0.5374,
      "step": 8362
    },
    {
      "epoch": 1.7191900503648885,
      "grad_norm": 0.19800591468811035,
      "learning_rate": 3.6580007038915436e-05,
      "loss": 0.5309,
      "step": 8363
    },
    {
      "epoch": 1.719395621338267,
      "grad_norm": 0.1907908171415329,
      "learning_rate": 3.657019554129365e-05,
      "loss": 0.5261,
      "step": 8364
    },
    {
      "epoch": 1.7196011923116457,
      "grad_norm": 0.195295050740242,
      "learning_rate": 3.656038445896891e-05,
      "loss": 0.5586,
      "step": 8365
    },
    {
      "epoch": 1.7198067632850242,
      "grad_norm": 0.1947198510169983,
      "learning_rate": 3.6550573792424606e-05,
      "loss": 0.5474,
      "step": 8366
    },
    {
      "epoch": 1.7200123342584028,
      "grad_norm": 0.1944238245487213,
      "learning_rate": 3.654076354214403e-05,
      "loss": 0.543,
      "step": 8367
    },
    {
      "epoch": 1.7202179052317814,
      "grad_norm": 0.18714429438114166,
      "learning_rate": 3.6530953708610496e-05,
      "loss": 0.5199,
      "step": 8368
    },
    {
      "epoch": 1.7204234762051598,
      "grad_norm": 0.16641157865524292,
      "learning_rate": 3.652114429230727e-05,
      "loss": 0.5063,
      "step": 8369
    },
    {
      "epoch": 1.7206290471785384,
      "grad_norm": 0.1247912049293518,
      "learning_rate": 3.651133529371765e-05,
      "loss": 0.5055,
      "step": 8370
    },
    {
      "epoch": 1.720834618151917,
      "grad_norm": 0.12845945358276367,
      "learning_rate": 3.650152671332487e-05,
      "loss": 0.5052,
      "step": 8371
    },
    {
      "epoch": 1.7210401891252955,
      "grad_norm": 0.15474404394626617,
      "learning_rate": 3.6491718551612146e-05,
      "loss": 0.5453,
      "step": 8372
    },
    {
      "epoch": 1.721245760098674,
      "grad_norm": 0.19407010078430176,
      "learning_rate": 3.648191080906268e-05,
      "loss": 0.5121,
      "step": 8373
    },
    {
      "epoch": 1.7214513310720525,
      "grad_norm": 0.16241449117660522,
      "learning_rate": 3.647210348615964e-05,
      "loss": 0.5118,
      "step": 8374
    },
    {
      "epoch": 1.721656902045431,
      "grad_norm": 0.1552891731262207,
      "learning_rate": 3.6462296583386225e-05,
      "loss": 0.5206,
      "step": 8375
    },
    {
      "epoch": 1.7218624730188097,
      "grad_norm": 0.194035142660141,
      "learning_rate": 3.6452490101225536e-05,
      "loss": 0.5528,
      "step": 8376
    },
    {
      "epoch": 1.7220680439921883,
      "grad_norm": 0.2007959634065628,
      "learning_rate": 3.6442684040160704e-05,
      "loss": 0.5216,
      "step": 8377
    },
    {
      "epoch": 1.7222736149655669,
      "grad_norm": 0.17652183771133423,
      "learning_rate": 3.643287840067485e-05,
      "loss": 0.5151,
      "step": 8378
    },
    {
      "epoch": 1.7224791859389454,
      "grad_norm": 0.15610848367214203,
      "learning_rate": 3.6423073183251024e-05,
      "loss": 0.5385,
      "step": 8379
    },
    {
      "epoch": 1.722684756912324,
      "grad_norm": 0.19610700011253357,
      "learning_rate": 3.641326838837231e-05,
      "loss": 0.5363,
      "step": 8380
    },
    {
      "epoch": 1.7228903278857026,
      "grad_norm": 0.1605963259935379,
      "learning_rate": 3.6403464016521716e-05,
      "loss": 0.5102,
      "step": 8381
    },
    {
      "epoch": 1.7230958988590812,
      "grad_norm": 0.15825892984867096,
      "learning_rate": 3.639366006818227e-05,
      "loss": 0.5264,
      "step": 8382
    },
    {
      "epoch": 1.7233014698324598,
      "grad_norm": 0.156993106007576,
      "learning_rate": 3.638385654383695e-05,
      "loss": 0.4904,
      "step": 8383
    },
    {
      "epoch": 1.7235070408058382,
      "grad_norm": 0.1612616777420044,
      "learning_rate": 3.637405344396877e-05,
      "loss": 0.5488,
      "step": 8384
    },
    {
      "epoch": 1.7237126117792168,
      "grad_norm": 0.19856800138950348,
      "learning_rate": 3.6364250769060654e-05,
      "loss": 0.5246,
      "step": 8385
    },
    {
      "epoch": 1.7239181827525953,
      "grad_norm": 0.19383041560649872,
      "learning_rate": 3.6354448519595526e-05,
      "loss": 0.5251,
      "step": 8386
    },
    {
      "epoch": 1.724123753725974,
      "grad_norm": 0.15535280108451843,
      "learning_rate": 3.634464669605633e-05,
      "loss": 0.4938,
      "step": 8387
    },
    {
      "epoch": 1.7243293246993523,
      "grad_norm": 0.1630435734987259,
      "learning_rate": 3.633484529892593e-05,
      "loss": 0.5444,
      "step": 8388
    },
    {
      "epoch": 1.7245348956727309,
      "grad_norm": 0.2020839899778366,
      "learning_rate": 3.6325044328687194e-05,
      "loss": 0.5377,
      "step": 8389
    },
    {
      "epoch": 1.7247404666461095,
      "grad_norm": 0.16391253471374512,
      "learning_rate": 3.631524378582297e-05,
      "loss": 0.4937,
      "step": 8390
    },
    {
      "epoch": 1.724946037619488,
      "grad_norm": 0.12773092091083527,
      "learning_rate": 3.630544367081611e-05,
      "loss": 0.5292,
      "step": 8391
    },
    {
      "epoch": 1.7251516085928666,
      "grad_norm": 0.16146111488342285,
      "learning_rate": 3.62956439841494e-05,
      "loss": 0.5185,
      "step": 8392
    },
    {
      "epoch": 1.7253571795662452,
      "grad_norm": 0.19887956976890564,
      "learning_rate": 3.6285844726305624e-05,
      "loss": 0.5424,
      "step": 8393
    },
    {
      "epoch": 1.7255627505396238,
      "grad_norm": 0.19647051393985748,
      "learning_rate": 3.627604589776755e-05,
      "loss": 0.5365,
      "step": 8394
    },
    {
      "epoch": 1.7257683215130024,
      "grad_norm": 0.16327311098575592,
      "learning_rate": 3.626624749901792e-05,
      "loss": 0.523,
      "step": 8395
    },
    {
      "epoch": 1.725973892486381,
      "grad_norm": 0.16111861169338226,
      "learning_rate": 3.625644953053945e-05,
      "loss": 0.5296,
      "step": 8396
    },
    {
      "epoch": 1.7261794634597596,
      "grad_norm": 0.20019720494747162,
      "learning_rate": 3.624665199281483e-05,
      "loss": 0.5494,
      "step": 8397
    },
    {
      "epoch": 1.7263850344331382,
      "grad_norm": 0.1936234086751938,
      "learning_rate": 3.623685488632678e-05,
      "loss": 0.5291,
      "step": 8398
    },
    {
      "epoch": 1.7265906054065168,
      "grad_norm": 0.20377790927886963,
      "learning_rate": 3.6227058211557906e-05,
      "loss": 0.5492,
      "step": 8399
    },
    {
      "epoch": 1.7267961763798951,
      "grad_norm": 0.16904407739639282,
      "learning_rate": 3.621726196899089e-05,
      "loss": 0.5124,
      "step": 8400
    },
    {
      "epoch": 1.7270017473532737,
      "grad_norm": 0.1632084995508194,
      "learning_rate": 3.620746615910832e-05,
      "loss": 0.5514,
      "step": 8401
    },
    {
      "epoch": 1.7272073183266523,
      "grad_norm": 0.16341902315616608,
      "learning_rate": 3.61976707823928e-05,
      "loss": 0.5038,
      "step": 8402
    },
    {
      "epoch": 1.7274128893000307,
      "grad_norm": 0.1626911461353302,
      "learning_rate": 3.61878758393269e-05,
      "loss": 0.5417,
      "step": 8403
    },
    {
      "epoch": 1.7276184602734093,
      "grad_norm": 0.2055915892124176,
      "learning_rate": 3.617808133039314e-05,
      "loss": 0.531,
      "step": 8404
    },
    {
      "epoch": 1.7278240312467879,
      "grad_norm": 0.1969294250011444,
      "learning_rate": 3.616828725607411e-05,
      "loss": 0.5347,
      "step": 8405
    },
    {
      "epoch": 1.7280296022201664,
      "grad_norm": 0.1655907779932022,
      "learning_rate": 3.6158493616852276e-05,
      "loss": 0.5059,
      "step": 8406
    },
    {
      "epoch": 1.728235173193545,
      "grad_norm": 0.1626054346561432,
      "learning_rate": 3.6148700413210144e-05,
      "loss": 0.5243,
      "step": 8407
    },
    {
      "epoch": 1.7284407441669236,
      "grad_norm": 0.19298885762691498,
      "learning_rate": 3.613890764563016e-05,
      "loss": 0.5355,
      "step": 8408
    },
    {
      "epoch": 1.7286463151403022,
      "grad_norm": 0.20283274352550507,
      "learning_rate": 3.61291153145948e-05,
      "loss": 0.5398,
      "step": 8409
    },
    {
      "epoch": 1.7288518861136808,
      "grad_norm": 0.19936981797218323,
      "learning_rate": 3.6119323420586446e-05,
      "loss": 0.5374,
      "step": 8410
    },
    {
      "epoch": 1.7290574570870594,
      "grad_norm": 0.16159012913703918,
      "learning_rate": 3.610953196408752e-05,
      "loss": 0.4839,
      "step": 8411
    },
    {
      "epoch": 1.729263028060438,
      "grad_norm": 0.16305240988731384,
      "learning_rate": 3.609974094558041e-05,
      "loss": 0.5284,
      "step": 8412
    },
    {
      "epoch": 1.7294685990338166,
      "grad_norm": 0.1939508616924286,
      "learning_rate": 3.608995036554746e-05,
      "loss": 0.5127,
      "step": 8413
    },
    {
      "epoch": 1.7296741700071951,
      "grad_norm": 0.1960534304380417,
      "learning_rate": 3.608016022447102e-05,
      "loss": 0.5506,
      "step": 8414
    },
    {
      "epoch": 1.7298797409805735,
      "grad_norm": 0.18489257991313934,
      "learning_rate": 3.607037052283339e-05,
      "loss": 0.5321,
      "step": 8415
    },
    {
      "epoch": 1.730085311953952,
      "grad_norm": 0.1943347156047821,
      "learning_rate": 3.606058126111686e-05,
      "loss": 0.5447,
      "step": 8416
    },
    {
      "epoch": 1.7302908829273307,
      "grad_norm": 0.199358269572258,
      "learning_rate": 3.60507924398037e-05,
      "loss": 0.553,
      "step": 8417
    },
    {
      "epoch": 1.7304964539007093,
      "grad_norm": 0.16631248593330383,
      "learning_rate": 3.6041004059376176e-05,
      "loss": 0.4963,
      "step": 8418
    },
    {
      "epoch": 1.7307020248740876,
      "grad_norm": 0.210128515958786,
      "learning_rate": 3.603121612031652e-05,
      "loss": 0.5068,
      "step": 8419
    },
    {
      "epoch": 1.7309075958474662,
      "grad_norm": 0.16205939650535583,
      "learning_rate": 3.602142862310691e-05,
      "loss": 0.5304,
      "step": 8420
    },
    {
      "epoch": 1.7311131668208448,
      "grad_norm": 0.1637234389781952,
      "learning_rate": 3.601164156822956e-05,
      "loss": 0.498,
      "step": 8421
    },
    {
      "epoch": 1.7313187377942234,
      "grad_norm": 0.12157563865184784,
      "learning_rate": 3.600185495616661e-05,
      "loss": 0.5283,
      "step": 8422
    },
    {
      "epoch": 1.731524308767602,
      "grad_norm": 0.1593407392501831,
      "learning_rate": 3.599206878740021e-05,
      "loss": 0.5318,
      "step": 8423
    },
    {
      "epoch": 1.7317298797409806,
      "grad_norm": 0.16835933923721313,
      "learning_rate": 3.598228306241247e-05,
      "loss": 0.5268,
      "step": 8424
    },
    {
      "epoch": 1.7319354507143592,
      "grad_norm": 0.12342957407236099,
      "learning_rate": 3.59724977816855e-05,
      "loss": 0.5118,
      "step": 8425
    },
    {
      "epoch": 1.7321410216877378,
      "grad_norm": 0.15400569140911102,
      "learning_rate": 3.596271294570138e-05,
      "loss": 0.535,
      "step": 8426
    },
    {
      "epoch": 1.7323465926611163,
      "grad_norm": 0.19436071813106537,
      "learning_rate": 3.595292855494215e-05,
      "loss": 0.5485,
      "step": 8427
    },
    {
      "epoch": 1.732552163634495,
      "grad_norm": 0.16538384556770325,
      "learning_rate": 3.594314460988984e-05,
      "loss": 0.4909,
      "step": 8428
    },
    {
      "epoch": 1.7327577346078735,
      "grad_norm": 0.15564298629760742,
      "learning_rate": 3.5933361111026453e-05,
      "loss": 0.5438,
      "step": 8429
    },
    {
      "epoch": 1.732963305581252,
      "grad_norm": 0.19588908553123474,
      "learning_rate": 3.5923578058834e-05,
      "loss": 0.5485,
      "step": 8430
    },
    {
      "epoch": 1.7331688765546305,
      "grad_norm": 0.19124017655849457,
      "learning_rate": 3.5913795453794427e-05,
      "loss": 0.5295,
      "step": 8431
    },
    {
      "epoch": 1.733374447528009,
      "grad_norm": 0.15818458795547485,
      "learning_rate": 3.5904013296389686e-05,
      "loss": 0.5142,
      "step": 8432
    },
    {
      "epoch": 1.7335800185013877,
      "grad_norm": 0.15775617957115173,
      "learning_rate": 3.5894231587101694e-05,
      "loss": 0.5282,
      "step": 8433
    },
    {
      "epoch": 1.733785589474766,
      "grad_norm": 0.16275940835475922,
      "learning_rate": 3.588445032641236e-05,
      "loss": 0.5012,
      "step": 8434
    },
    {
      "epoch": 1.7339911604481446,
      "grad_norm": 0.15710069239139557,
      "learning_rate": 3.5874669514803545e-05,
      "loss": 0.5309,
      "step": 8435
    },
    {
      "epoch": 1.7341967314215232,
      "grad_norm": 0.19356967508792877,
      "learning_rate": 3.586488915275711e-05,
      "loss": 0.5344,
      "step": 8436
    },
    {
      "epoch": 1.7344023023949018,
      "grad_norm": 0.19396322965621948,
      "learning_rate": 3.58551092407549e-05,
      "loss": 0.5279,
      "step": 8437
    },
    {
      "epoch": 1.7346078733682804,
      "grad_norm": 0.20493246614933014,
      "learning_rate": 3.5845329779278694e-05,
      "loss": 0.5537,
      "step": 8438
    },
    {
      "epoch": 1.734813444341659,
      "grad_norm": 0.18893173336982727,
      "learning_rate": 3.583555076881031e-05,
      "loss": 0.5145,
      "step": 8439
    },
    {
      "epoch": 1.7350190153150375,
      "grad_norm": 0.19152796268463135,
      "learning_rate": 3.5825772209831517e-05,
      "loss": 0.514,
      "step": 8440
    },
    {
      "epoch": 1.7352245862884161,
      "grad_norm": 0.1870860904455185,
      "learning_rate": 3.581599410282403e-05,
      "loss": 0.5234,
      "step": 8441
    },
    {
      "epoch": 1.7354301572617947,
      "grad_norm": 0.1898457258939743,
      "learning_rate": 3.58062164482696e-05,
      "loss": 0.5324,
      "step": 8442
    },
    {
      "epoch": 1.7356357282351733,
      "grad_norm": 0.19367991387844086,
      "learning_rate": 3.579643924664991e-05,
      "loss": 0.5443,
      "step": 8443
    },
    {
      "epoch": 1.735841299208552,
      "grad_norm": 0.1994738131761551,
      "learning_rate": 3.5786662498446645e-05,
      "loss": 0.5449,
      "step": 8444
    },
    {
      "epoch": 1.7360468701819303,
      "grad_norm": 0.16127611696720123,
      "learning_rate": 3.577688620414143e-05,
      "loss": 0.5126,
      "step": 8445
    },
    {
      "epoch": 1.7362524411553089,
      "grad_norm": 0.16468468308448792,
      "learning_rate": 3.5767110364215954e-05,
      "loss": 0.5467,
      "step": 8446
    },
    {
      "epoch": 1.7364580121286874,
      "grad_norm": 0.20566272735595703,
      "learning_rate": 3.575733497915179e-05,
      "loss": 0.5503,
      "step": 8447
    },
    {
      "epoch": 1.736663583102066,
      "grad_norm": 0.2016957551240921,
      "learning_rate": 3.5747560049430526e-05,
      "loss": 0.534,
      "step": 8448
    },
    {
      "epoch": 1.7368691540754444,
      "grad_norm": 0.6397230625152588,
      "learning_rate": 3.573778557553374e-05,
      "loss": 0.5599,
      "step": 8449
    },
    {
      "epoch": 1.737074725048823,
      "grad_norm": 0.191917285323143,
      "learning_rate": 3.572801155794295e-05,
      "loss": 0.5208,
      "step": 8450
    },
    {
      "epoch": 1.7372802960222016,
      "grad_norm": 0.20242702960968018,
      "learning_rate": 3.571823799713971e-05,
      "loss": 0.5409,
      "step": 8451
    },
    {
      "epoch": 1.7374858669955802,
      "grad_norm": 0.16706420481204987,
      "learning_rate": 3.570846489360549e-05,
      "loss": 0.5102,
      "step": 8452
    },
    {
      "epoch": 1.7376914379689588,
      "grad_norm": 0.1416233628988266,
      "learning_rate": 3.569869224782177e-05,
      "loss": 0.5132,
      "step": 8453
    },
    {
      "epoch": 1.7378970089423373,
      "grad_norm": 0.16062797605991364,
      "learning_rate": 3.568892006027003e-05,
      "loss": 0.5522,
      "step": 8454
    },
    {
      "epoch": 1.738102579915716,
      "grad_norm": 0.16195809841156006,
      "learning_rate": 3.5679148331431666e-05,
      "loss": 0.4988,
      "step": 8455
    },
    {
      "epoch": 1.7383081508890945,
      "grad_norm": 0.13933859765529633,
      "learning_rate": 3.5669377061788104e-05,
      "loss": 0.5216,
      "step": 8456
    },
    {
      "epoch": 1.738513721862473,
      "grad_norm": 0.16723297536373138,
      "learning_rate": 3.565960625182073e-05,
      "loss": 0.5381,
      "step": 8457
    },
    {
      "epoch": 1.7387192928358517,
      "grad_norm": 0.20578205585479736,
      "learning_rate": 3.564983590201089e-05,
      "loss": 0.5171,
      "step": 8458
    },
    {
      "epoch": 1.7389248638092303,
      "grad_norm": 0.20782290399074554,
      "learning_rate": 3.564006601283992e-05,
      "loss": 0.5442,
      "step": 8459
    },
    {
      "epoch": 1.7391304347826086,
      "grad_norm": 0.19257017970085144,
      "learning_rate": 3.563029658478916e-05,
      "loss": 0.5502,
      "step": 8460
    },
    {
      "epoch": 1.7393360057559872,
      "grad_norm": 0.19143828749656677,
      "learning_rate": 3.56205276183399e-05,
      "loss": 0.5325,
      "step": 8461
    },
    {
      "epoch": 1.7395415767293658,
      "grad_norm": 0.19385689496994019,
      "learning_rate": 3.5610759113973395e-05,
      "loss": 0.5194,
      "step": 8462
    },
    {
      "epoch": 1.7397471477027444,
      "grad_norm": 0.1967114955186844,
      "learning_rate": 3.560099107217091e-05,
      "loss": 0.5313,
      "step": 8463
    },
    {
      "epoch": 1.7399527186761228,
      "grad_norm": 0.16215933859348297,
      "learning_rate": 3.559122349341366e-05,
      "loss": 0.5018,
      "step": 8464
    },
    {
      "epoch": 1.7401582896495014,
      "grad_norm": 0.1343732327222824,
      "learning_rate": 3.558145637818286e-05,
      "loss": 0.519,
      "step": 8465
    },
    {
      "epoch": 1.74036386062288,
      "grad_norm": 0.15892748534679413,
      "learning_rate": 3.557168972695966e-05,
      "loss": 0.5512,
      "step": 8466
    },
    {
      "epoch": 1.7405694315962585,
      "grad_norm": 0.2068302482366562,
      "learning_rate": 3.556192354022525e-05,
      "loss": 0.5618,
      "step": 8467
    },
    {
      "epoch": 1.7407750025696371,
      "grad_norm": 0.20231375098228455,
      "learning_rate": 3.555215781846077e-05,
      "loss": 0.5403,
      "step": 8468
    },
    {
      "epoch": 1.7409805735430157,
      "grad_norm": 0.18931826949119568,
      "learning_rate": 3.5542392562147305e-05,
      "loss": 0.5406,
      "step": 8469
    },
    {
      "epoch": 1.7411861445163943,
      "grad_norm": 0.1967364251613617,
      "learning_rate": 3.553262777176596e-05,
      "loss": 0.5488,
      "step": 8470
    },
    {
      "epoch": 1.741391715489773,
      "grad_norm": 0.16582554578781128,
      "learning_rate": 3.552286344779779e-05,
      "loss": 0.5162,
      "step": 8471
    },
    {
      "epoch": 1.7415972864631515,
      "grad_norm": 0.16116267442703247,
      "learning_rate": 3.551309959072383e-05,
      "loss": 0.5275,
      "step": 8472
    },
    {
      "epoch": 1.74180285743653,
      "grad_norm": 0.19118881225585938,
      "learning_rate": 3.550333620102512e-05,
      "loss": 0.5363,
      "step": 8473
    },
    {
      "epoch": 1.7420084284099087,
      "grad_norm": 0.1949569284915924,
      "learning_rate": 3.549357327918264e-05,
      "loss": 0.5546,
      "step": 8474
    },
    {
      "epoch": 1.742213999383287,
      "grad_norm": 0.2024715095758438,
      "learning_rate": 3.548381082567738e-05,
      "loss": 0.5318,
      "step": 8475
    },
    {
      "epoch": 1.7424195703566656,
      "grad_norm": 0.21180285513401031,
      "learning_rate": 3.5474048840990286e-05,
      "loss": 0.5362,
      "step": 8476
    },
    {
      "epoch": 1.7426251413300442,
      "grad_norm": 0.1865611970424652,
      "learning_rate": 3.546428732560228e-05,
      "loss": 0.4995,
      "step": 8477
    },
    {
      "epoch": 1.7428307123034228,
      "grad_norm": 0.17171883583068848,
      "learning_rate": 3.545452627999427e-05,
      "loss": 0.5554,
      "step": 8478
    },
    {
      "epoch": 1.7430362832768012,
      "grad_norm": 0.2119421362876892,
      "learning_rate": 3.544476570464713e-05,
      "loss": 0.5312,
      "step": 8479
    },
    {
      "epoch": 1.7432418542501797,
      "grad_norm": 0.1979297697544098,
      "learning_rate": 3.543500560004171e-05,
      "loss": 0.5277,
      "step": 8480
    },
    {
      "epoch": 1.7434474252235583,
      "grad_norm": 0.19431854784488678,
      "learning_rate": 3.542524596665887e-05,
      "loss": 0.5307,
      "step": 8481
    },
    {
      "epoch": 1.743652996196937,
      "grad_norm": 0.18718986213207245,
      "learning_rate": 3.5415486804979417e-05,
      "loss": 0.5164,
      "step": 8482
    },
    {
      "epoch": 1.7438585671703155,
      "grad_norm": 0.19408833980560303,
      "learning_rate": 3.540572811548412e-05,
      "loss": 0.5484,
      "step": 8483
    },
    {
      "epoch": 1.744064138143694,
      "grad_norm": 0.19318553805351257,
      "learning_rate": 3.539596989865375e-05,
      "loss": 0.523,
      "step": 8484
    },
    {
      "epoch": 1.7442697091170727,
      "grad_norm": 0.16727426648139954,
      "learning_rate": 3.538621215496907e-05,
      "loss": 0.5504,
      "step": 8485
    },
    {
      "epoch": 1.7444752800904513,
      "grad_norm": 0.16280822455883026,
      "learning_rate": 3.537645488491078e-05,
      "loss": 0.538,
      "step": 8486
    },
    {
      "epoch": 1.7446808510638299,
      "grad_norm": 0.19524060189723969,
      "learning_rate": 3.5366698088959557e-05,
      "loss": 0.5333,
      "step": 8487
    },
    {
      "epoch": 1.7448864220372085,
      "grad_norm": 0.1601538062095642,
      "learning_rate": 3.535694176759611e-05,
      "loss": 0.4854,
      "step": 8488
    },
    {
      "epoch": 1.745091993010587,
      "grad_norm": 0.16524933278560638,
      "learning_rate": 3.534718592130107e-05,
      "loss": 0.5261,
      "step": 8489
    },
    {
      "epoch": 1.7452975639839656,
      "grad_norm": 0.20658870041370392,
      "learning_rate": 3.5337430550555065e-05,
      "loss": 0.5592,
      "step": 8490
    },
    {
      "epoch": 1.745503134957344,
      "grad_norm": 0.20213808119297028,
      "learning_rate": 3.5327675655838694e-05,
      "loss": 0.5413,
      "step": 8491
    },
    {
      "epoch": 1.7457087059307226,
      "grad_norm": 0.20643405616283417,
      "learning_rate": 3.531792123763253e-05,
      "loss": 0.5504,
      "step": 8492
    },
    {
      "epoch": 1.7459142769041012,
      "grad_norm": 0.1972249150276184,
      "learning_rate": 3.5308167296417125e-05,
      "loss": 0.5359,
      "step": 8493
    },
    {
      "epoch": 1.7461198478774795,
      "grad_norm": 0.19785918295383453,
      "learning_rate": 3.529841383267303e-05,
      "loss": 0.5605,
      "step": 8494
    },
    {
      "epoch": 1.7463254188508581,
      "grad_norm": 0.3983357846736908,
      "learning_rate": 3.528866084688074e-05,
      "loss": 0.5958,
      "step": 8495
    },
    {
      "epoch": 1.7465309898242367,
      "grad_norm": 0.21267639100551605,
      "learning_rate": 3.527890833952073e-05,
      "loss": 0.5268,
      "step": 8496
    },
    {
      "epoch": 1.7467365607976153,
      "grad_norm": 0.16559986770153046,
      "learning_rate": 3.5269156311073484e-05,
      "loss": 0.5056,
      "step": 8497
    },
    {
      "epoch": 1.746942131770994,
      "grad_norm": 0.15838290750980377,
      "learning_rate": 3.5259404762019416e-05,
      "loss": 0.552,
      "step": 8498
    },
    {
      "epoch": 1.7471477027443725,
      "grad_norm": 0.19080090522766113,
      "learning_rate": 3.524965369283896e-05,
      "loss": 0.5234,
      "step": 8499
    },
    {
      "epoch": 1.747353273717751,
      "grad_norm": 0.166726753115654,
      "learning_rate": 3.5239903104012464e-05,
      "loss": 0.5099,
      "step": 8500
    },
    {
      "epoch": 1.7475588446911297,
      "grad_norm": 0.15904779732227325,
      "learning_rate": 3.5230152996020346e-05,
      "loss": 0.5136,
      "step": 8501
    },
    {
      "epoch": 1.7477644156645082,
      "grad_norm": 0.19941627979278564,
      "learning_rate": 3.522040336934293e-05,
      "loss": 0.5499,
      "step": 8502
    },
    {
      "epoch": 1.7479699866378868,
      "grad_norm": 0.20110583305358887,
      "learning_rate": 3.521065422446052e-05,
      "loss": 0.5503,
      "step": 8503
    },
    {
      "epoch": 1.7481755576112654,
      "grad_norm": 0.20107027888298035,
      "learning_rate": 3.520090556185343e-05,
      "loss": 0.5605,
      "step": 8504
    },
    {
      "epoch": 1.748381128584644,
      "grad_norm": 0.18705639243125916,
      "learning_rate": 3.51911573820019e-05,
      "loss": 0.5163,
      "step": 8505
    },
    {
      "epoch": 1.7485866995580224,
      "grad_norm": 0.19800741970539093,
      "learning_rate": 3.518140968538622e-05,
      "loss": 0.4896,
      "step": 8506
    },
    {
      "epoch": 1.748792270531401,
      "grad_norm": 0.19224296510219574,
      "learning_rate": 3.517166247248659e-05,
      "loss": 0.5034,
      "step": 8507
    },
    {
      "epoch": 1.7489978415047795,
      "grad_norm": 0.20960035920143127,
      "learning_rate": 3.51619157437832e-05,
      "loss": 0.5245,
      "step": 8508
    },
    {
      "epoch": 1.7492034124781581,
      "grad_norm": 0.19137395918369293,
      "learning_rate": 3.5152169499756256e-05,
      "loss": 0.5284,
      "step": 8509
    },
    {
      "epoch": 1.7494089834515365,
      "grad_norm": 0.19862139225006104,
      "learning_rate": 3.514242374088588e-05,
      "loss": 0.5506,
      "step": 8510
    },
    {
      "epoch": 1.749614554424915,
      "grad_norm": 0.19606275856494904,
      "learning_rate": 3.5132678467652226e-05,
      "loss": 0.5297,
      "step": 8511
    },
    {
      "epoch": 1.7498201253982937,
      "grad_norm": 0.1958342045545578,
      "learning_rate": 3.512293368053537e-05,
      "loss": 0.5255,
      "step": 8512
    },
    {
      "epoch": 1.7500256963716723,
      "grad_norm": 0.16824734210968018,
      "learning_rate": 3.511318938001542e-05,
      "loss": 0.5178,
      "step": 8513
    },
    {
      "epoch": 1.7502312673450509,
      "grad_norm": 0.162201389670372,
      "learning_rate": 3.510344556657239e-05,
      "loss": 0.5169,
      "step": 8514
    },
    {
      "epoch": 1.7504368383184294,
      "grad_norm": 0.20394515991210938,
      "learning_rate": 3.509370224068637e-05,
      "loss": 0.5215,
      "step": 8515
    },
    {
      "epoch": 1.750642409291808,
      "grad_norm": 0.2038257122039795,
      "learning_rate": 3.508395940283733e-05,
      "loss": 0.5277,
      "step": 8516
    },
    {
      "epoch": 1.7508479802651866,
      "grad_norm": 0.19794686138629913,
      "learning_rate": 3.507421705350526e-05,
      "loss": 0.5379,
      "step": 8517
    },
    {
      "epoch": 1.7510535512385652,
      "grad_norm": 0.19401569664478302,
      "learning_rate": 3.506447519317012e-05,
      "loss": 0.5313,
      "step": 8518
    },
    {
      "epoch": 1.7512591222119438,
      "grad_norm": 0.1934097856283188,
      "learning_rate": 3.5054733822311856e-05,
      "loss": 0.5291,
      "step": 8519
    },
    {
      "epoch": 1.7514646931853224,
      "grad_norm": 0.19061771035194397,
      "learning_rate": 3.5044992941410374e-05,
      "loss": 0.5239,
      "step": 8520
    },
    {
      "epoch": 1.7516702641587008,
      "grad_norm": 0.19829559326171875,
      "learning_rate": 3.503525255094554e-05,
      "loss": 0.5532,
      "step": 8521
    },
    {
      "epoch": 1.7518758351320793,
      "grad_norm": 0.1951601654291153,
      "learning_rate": 3.502551265139726e-05,
      "loss": 0.5366,
      "step": 8522
    },
    {
      "epoch": 1.752081406105458,
      "grad_norm": 0.18550780415534973,
      "learning_rate": 3.501577324324535e-05,
      "loss": 0.5199,
      "step": 8523
    },
    {
      "epoch": 1.7522869770788365,
      "grad_norm": 0.19197461009025574,
      "learning_rate": 3.500603432696962e-05,
      "loss": 0.5048,
      "step": 8524
    },
    {
      "epoch": 1.7524925480522149,
      "grad_norm": 0.1984768956899643,
      "learning_rate": 3.4996295903049874e-05,
      "loss": 0.5334,
      "step": 8525
    },
    {
      "epoch": 1.7526981190255935,
      "grad_norm": 0.1615784913301468,
      "learning_rate": 3.498655797196586e-05,
      "loss": 0.5212,
      "step": 8526
    },
    {
      "epoch": 1.752903689998972,
      "grad_norm": 0.16125060617923737,
      "learning_rate": 3.4976820534197335e-05,
      "loss": 0.5676,
      "step": 8527
    },
    {
      "epoch": 1.7531092609723506,
      "grad_norm": 0.19413301348686218,
      "learning_rate": 3.4967083590224016e-05,
      "loss": 0.5531,
      "step": 8528
    },
    {
      "epoch": 1.7533148319457292,
      "grad_norm": 0.19663669168949127,
      "learning_rate": 3.4957347140525585e-05,
      "loss": 0.5442,
      "step": 8529
    },
    {
      "epoch": 1.7535204029191078,
      "grad_norm": 0.16446875035762787,
      "learning_rate": 3.4947611185581735e-05,
      "loss": 0.5127,
      "step": 8530
    },
    {
      "epoch": 1.7537259738924864,
      "grad_norm": 0.15640254318714142,
      "learning_rate": 3.4937875725872095e-05,
      "loss": 0.5259,
      "step": 8531
    },
    {
      "epoch": 1.753931544865865,
      "grad_norm": 0.19805364310741425,
      "learning_rate": 3.492814076187629e-05,
      "loss": 0.535,
      "step": 8532
    },
    {
      "epoch": 1.7541371158392436,
      "grad_norm": 0.16201485693454742,
      "learning_rate": 3.491840629407391e-05,
      "loss": 0.4878,
      "step": 8533
    },
    {
      "epoch": 1.7543426868126222,
      "grad_norm": 0.1226087361574173,
      "learning_rate": 3.490867232294454e-05,
      "loss": 0.5218,
      "step": 8534
    },
    {
      "epoch": 1.7545482577860008,
      "grad_norm": 0.16910824179649353,
      "learning_rate": 3.4898938848967695e-05,
      "loss": 0.5276,
      "step": 8535
    },
    {
      "epoch": 1.7547538287593791,
      "grad_norm": 0.19596606492996216,
      "learning_rate": 3.4889205872622936e-05,
      "loss": 0.5526,
      "step": 8536
    },
    {
      "epoch": 1.7549593997327577,
      "grad_norm": 0.20501984655857086,
      "learning_rate": 3.4879473394389745e-05,
      "loss": 0.5593,
      "step": 8537
    },
    {
      "epoch": 1.7551649707061363,
      "grad_norm": 0.1966264247894287,
      "learning_rate": 3.486974141474759e-05,
      "loss": 0.5296,
      "step": 8538
    },
    {
      "epoch": 1.755370541679515,
      "grad_norm": 0.18841052055358887,
      "learning_rate": 3.4860009934175934e-05,
      "loss": 0.5222,
      "step": 8539
    },
    {
      "epoch": 1.7555761126528933,
      "grad_norm": 0.19071705639362335,
      "learning_rate": 3.48502789531542e-05,
      "loss": 0.5494,
      "step": 8540
    },
    {
      "epoch": 1.7557816836262718,
      "grad_norm": 0.20280326902866364,
      "learning_rate": 3.4840548472161777e-05,
      "loss": 0.547,
      "step": 8541
    },
    {
      "epoch": 1.7559872545996504,
      "grad_norm": 0.18994936347007751,
      "learning_rate": 3.483081849167803e-05,
      "loss": 0.5271,
      "step": 8542
    },
    {
      "epoch": 1.756192825573029,
      "grad_norm": 0.19104993343353271,
      "learning_rate": 3.482108901218234e-05,
      "loss": 0.5228,
      "step": 8543
    },
    {
      "epoch": 1.7563983965464076,
      "grad_norm": 0.19525660574436188,
      "learning_rate": 3.481136003415402e-05,
      "loss": 0.5298,
      "step": 8544
    },
    {
      "epoch": 1.7566039675197862,
      "grad_norm": 0.19333256781101227,
      "learning_rate": 3.4801631558072374e-05,
      "loss": 0.5217,
      "step": 8545
    },
    {
      "epoch": 1.7568095384931648,
      "grad_norm": 0.19645366072654724,
      "learning_rate": 3.4791903584416667e-05,
      "loss": 0.5334,
      "step": 8546
    },
    {
      "epoch": 1.7570151094665434,
      "grad_norm": 0.1938944011926651,
      "learning_rate": 3.478217611366615e-05,
      "loss": 0.5287,
      "step": 8547
    },
    {
      "epoch": 1.757220680439922,
      "grad_norm": 0.1910870373249054,
      "learning_rate": 3.477244914630007e-05,
      "loss": 0.5285,
      "step": 8548
    },
    {
      "epoch": 1.7574262514133006,
      "grad_norm": 0.20212024450302124,
      "learning_rate": 3.4762722682797614e-05,
      "loss": 0.5529,
      "step": 8549
    },
    {
      "epoch": 1.7576318223866791,
      "grad_norm": 0.19146008789539337,
      "learning_rate": 3.475299672363795e-05,
      "loss": 0.5124,
      "step": 8550
    },
    {
      "epoch": 1.7578373933600575,
      "grad_norm": 0.1885506808757782,
      "learning_rate": 3.474327126930026e-05,
      "loss": 0.4892,
      "step": 8551
    },
    {
      "epoch": 1.758042964333436,
      "grad_norm": 0.18597213923931122,
      "learning_rate": 3.473354632026365e-05,
      "loss": 0.5208,
      "step": 8552
    },
    {
      "epoch": 1.7582485353068147,
      "grad_norm": 0.19762767851352692,
      "learning_rate": 3.472382187700723e-05,
      "loss": 0.5474,
      "step": 8553
    },
    {
      "epoch": 1.7584541062801933,
      "grad_norm": 0.1776442676782608,
      "learning_rate": 3.471409794001008e-05,
      "loss": 0.5056,
      "step": 8554
    },
    {
      "epoch": 1.7586596772535716,
      "grad_norm": 0.16057129204273224,
      "learning_rate": 3.4704374509751246e-05,
      "loss": 0.5486,
      "step": 8555
    },
    {
      "epoch": 1.7588652482269502,
      "grad_norm": 0.1970880925655365,
      "learning_rate": 3.469465158670973e-05,
      "loss": 0.5392,
      "step": 8556
    },
    {
      "epoch": 1.7590708192003288,
      "grad_norm": 0.16427253186702728,
      "learning_rate": 3.4684929171364594e-05,
      "loss": 0.5139,
      "step": 8557
    },
    {
      "epoch": 1.7592763901737074,
      "grad_norm": 0.13471728563308716,
      "learning_rate": 3.4675207264194776e-05,
      "loss": 0.5029,
      "step": 8558
    },
    {
      "epoch": 1.759481961147086,
      "grad_norm": 0.15767881274223328,
      "learning_rate": 3.4665485865679233e-05,
      "loss": 0.5285,
      "step": 8559
    },
    {
      "epoch": 1.7596875321204646,
      "grad_norm": 0.19089291989803314,
      "learning_rate": 3.465576497629691e-05,
      "loss": 0.52,
      "step": 8560
    },
    {
      "epoch": 1.7598931030938432,
      "grad_norm": 0.1997915655374527,
      "learning_rate": 3.46460445965267e-05,
      "loss": 0.5741,
      "step": 8561
    },
    {
      "epoch": 1.7600986740672218,
      "grad_norm": 0.20209218561649323,
      "learning_rate": 3.4636324726847474e-05,
      "loss": 0.5263,
      "step": 8562
    },
    {
      "epoch": 1.7603042450406003,
      "grad_norm": 0.1946118324995041,
      "learning_rate": 3.4626605367738065e-05,
      "loss": 0.5425,
      "step": 8563
    },
    {
      "epoch": 1.760509816013979,
      "grad_norm": 0.1665966659784317,
      "learning_rate": 3.4616886519677345e-05,
      "loss": 0.4985,
      "step": 8564
    },
    {
      "epoch": 1.7607153869873575,
      "grad_norm": 0.16211137175559998,
      "learning_rate": 3.4607168183144104e-05,
      "loss": 0.5197,
      "step": 8565
    },
    {
      "epoch": 1.7609209579607359,
      "grad_norm": 0.20239417254924774,
      "learning_rate": 3.4597450358617106e-05,
      "loss": 0.5397,
      "step": 8566
    },
    {
      "epoch": 1.7611265289341145,
      "grad_norm": 0.20297926664352417,
      "learning_rate": 3.458773304657511e-05,
      "loss": 0.5313,
      "step": 8567
    },
    {
      "epoch": 1.761332099907493,
      "grad_norm": 0.1988510936498642,
      "learning_rate": 3.457801624749683e-05,
      "loss": 0.5136,
      "step": 8568
    },
    {
      "epoch": 1.7615376708808717,
      "grad_norm": 0.19971226155757904,
      "learning_rate": 3.4568299961860965e-05,
      "loss": 0.5421,
      "step": 8569
    },
    {
      "epoch": 1.76174324185425,
      "grad_norm": 0.19609498977661133,
      "learning_rate": 3.4558584190146226e-05,
      "loss": 0.5286,
      "step": 8570
    },
    {
      "epoch": 1.7619488128276286,
      "grad_norm": 0.20023983716964722,
      "learning_rate": 3.4548868932831235e-05,
      "loss": 0.5378,
      "step": 8571
    },
    {
      "epoch": 1.7621543838010072,
      "grad_norm": 0.20097847282886505,
      "learning_rate": 3.453915419039462e-05,
      "loss": 0.5509,
      "step": 8572
    },
    {
      "epoch": 1.7623599547743858,
      "grad_norm": 0.1950797438621521,
      "learning_rate": 3.452943996331499e-05,
      "loss": 0.5162,
      "step": 8573
    },
    {
      "epoch": 1.7625655257477644,
      "grad_norm": 0.19853217899799347,
      "learning_rate": 3.451972625207091e-05,
      "loss": 0.5326,
      "step": 8574
    },
    {
      "epoch": 1.762771096721143,
      "grad_norm": 0.19258736073970795,
      "learning_rate": 3.451001305714094e-05,
      "loss": 0.5194,
      "step": 8575
    },
    {
      "epoch": 1.7629766676945215,
      "grad_norm": 0.1989012360572815,
      "learning_rate": 3.450030037900357e-05,
      "loss": 0.5142,
      "step": 8576
    },
    {
      "epoch": 1.7631822386679001,
      "grad_norm": 0.19085493683815002,
      "learning_rate": 3.4490588218137356e-05,
      "loss": 0.5305,
      "step": 8577
    },
    {
      "epoch": 1.7633878096412787,
      "grad_norm": 0.16426925361156464,
      "learning_rate": 3.448087657502073e-05,
      "loss": 0.5152,
      "step": 8578
    },
    {
      "epoch": 1.7635933806146573,
      "grad_norm": 0.16579680144786835,
      "learning_rate": 3.447116545013215e-05,
      "loss": 0.5536,
      "step": 8579
    },
    {
      "epoch": 1.763798951588036,
      "grad_norm": 0.19505342841148376,
      "learning_rate": 3.4461454843950035e-05,
      "loss": 0.5208,
      "step": 8580
    },
    {
      "epoch": 1.7640045225614145,
      "grad_norm": 0.19521526992321014,
      "learning_rate": 3.445174475695277e-05,
      "loss": 0.5593,
      "step": 8581
    },
    {
      "epoch": 1.7642100935347929,
      "grad_norm": 0.19059976935386658,
      "learning_rate": 3.4442035189618756e-05,
      "loss": 0.5199,
      "step": 8582
    },
    {
      "epoch": 1.7644156645081714,
      "grad_norm": 0.1944594383239746,
      "learning_rate": 3.443232614242631e-05,
      "loss": 0.5527,
      "step": 8583
    },
    {
      "epoch": 1.76462123548155,
      "grad_norm": 0.1929233968257904,
      "learning_rate": 3.442261761585376e-05,
      "loss": 0.5206,
      "step": 8584
    },
    {
      "epoch": 1.7648268064549284,
      "grad_norm": 0.1909506469964981,
      "learning_rate": 3.441290961037941e-05,
      "loss": 0.518,
      "step": 8585
    },
    {
      "epoch": 1.765032377428307,
      "grad_norm": 0.20194295048713684,
      "learning_rate": 3.440320212648152e-05,
      "loss": 0.5559,
      "step": 8586
    },
    {
      "epoch": 1.7652379484016856,
      "grad_norm": 0.20173272490501404,
      "learning_rate": 3.439349516463833e-05,
      "loss": 0.5235,
      "step": 8587
    },
    {
      "epoch": 1.7654435193750642,
      "grad_norm": 0.1989864706993103,
      "learning_rate": 3.438378872532806e-05,
      "loss": 0.5219,
      "step": 8588
    },
    {
      "epoch": 1.7656490903484428,
      "grad_norm": 0.17310731112957,
      "learning_rate": 3.43740828090289e-05,
      "loss": 0.506,
      "step": 8589
    },
    {
      "epoch": 1.7658546613218213,
      "grad_norm": 0.16371743381023407,
      "learning_rate": 3.4364377416219e-05,
      "loss": 0.5388,
      "step": 8590
    },
    {
      "epoch": 1.7660602322952,
      "grad_norm": 0.19252368807792664,
      "learning_rate": 3.4354672547376524e-05,
      "loss": 0.5251,
      "step": 8591
    },
    {
      "epoch": 1.7662658032685785,
      "grad_norm": 0.1986730545759201,
      "learning_rate": 3.4344968202979584e-05,
      "loss": 0.5353,
      "step": 8592
    },
    {
      "epoch": 1.766471374241957,
      "grad_norm": 0.22330817580223083,
      "learning_rate": 3.433526438350625e-05,
      "loss": 0.5355,
      "step": 8593
    },
    {
      "epoch": 1.7666769452153357,
      "grad_norm": 0.19446399807929993,
      "learning_rate": 3.43255610894346e-05,
      "loss": 0.518,
      "step": 8594
    },
    {
      "epoch": 1.7668825161887143,
      "grad_norm": 0.19539190828800201,
      "learning_rate": 3.431585832124266e-05,
      "loss": 0.5334,
      "step": 8595
    },
    {
      "epoch": 1.7670880871620929,
      "grad_norm": 0.20236273109912872,
      "learning_rate": 3.430615607940844e-05,
      "loss": 0.5315,
      "step": 8596
    },
    {
      "epoch": 1.7672936581354712,
      "grad_norm": 0.1652330905199051,
      "learning_rate": 3.429645436440991e-05,
      "loss": 0.5177,
      "step": 8597
    },
    {
      "epoch": 1.7674992291088498,
      "grad_norm": 0.16170786321163177,
      "learning_rate": 3.428675317672507e-05,
      "loss": 0.508,
      "step": 8598
    },
    {
      "epoch": 1.7677048000822284,
      "grad_norm": 0.1644188016653061,
      "learning_rate": 3.427705251683182e-05,
      "loss": 0.5064,
      "step": 8599
    },
    {
      "epoch": 1.767910371055607,
      "grad_norm": 0.1265815794467926,
      "learning_rate": 3.4267352385208086e-05,
      "loss": 0.4951,
      "step": 8600
    },
    {
      "epoch": 1.7681159420289854,
      "grad_norm": 0.16070230305194855,
      "learning_rate": 3.425765278233172e-05,
      "loss": 0.5369,
      "step": 8601
    },
    {
      "epoch": 1.768321513002364,
      "grad_norm": 0.19323338568210602,
      "learning_rate": 3.42479537086806e-05,
      "loss": 0.5374,
      "step": 8602
    },
    {
      "epoch": 1.7685270839757425,
      "grad_norm": 0.19410564005374908,
      "learning_rate": 3.423825516473254e-05,
      "loss": 0.5405,
      "step": 8603
    },
    {
      "epoch": 1.7687326549491211,
      "grad_norm": 0.19003941118717194,
      "learning_rate": 3.422855715096534e-05,
      "loss": 0.5468,
      "step": 8604
    },
    {
      "epoch": 1.7689382259224997,
      "grad_norm": 0.19323213398456573,
      "learning_rate": 3.421885966785679e-05,
      "loss": 0.5257,
      "step": 8605
    },
    {
      "epoch": 1.7691437968958783,
      "grad_norm": 0.1951300948858261,
      "learning_rate": 3.420916271588464e-05,
      "loss": 0.5289,
      "step": 8606
    },
    {
      "epoch": 1.769349367869257,
      "grad_norm": 0.17118534445762634,
      "learning_rate": 3.419946629552661e-05,
      "loss": 0.5018,
      "step": 8607
    },
    {
      "epoch": 1.7695549388426355,
      "grad_norm": 0.18546664714813232,
      "learning_rate": 3.418977040726039e-05,
      "loss": 0.5171,
      "step": 8608
    },
    {
      "epoch": 1.769760509816014,
      "grad_norm": 0.2011442631483078,
      "learning_rate": 3.418007505156365e-05,
      "loss": 0.5485,
      "step": 8609
    },
    {
      "epoch": 1.7699660807893927,
      "grad_norm": 0.20571096241474152,
      "learning_rate": 3.417038022891405e-05,
      "loss": 0.5286,
      "step": 8610
    },
    {
      "epoch": 1.7701716517627712,
      "grad_norm": 0.19118675589561462,
      "learning_rate": 3.416068593978917e-05,
      "loss": 0.5185,
      "step": 8611
    },
    {
      "epoch": 1.7703772227361496,
      "grad_norm": 0.19263485074043274,
      "learning_rate": 3.415099218466666e-05,
      "loss": 0.5285,
      "step": 8612
    },
    {
      "epoch": 1.7705827937095282,
      "grad_norm": 0.15957173705101013,
      "learning_rate": 3.4141298964024046e-05,
      "loss": 0.4959,
      "step": 8613
    },
    {
      "epoch": 1.7707883646829068,
      "grad_norm": 0.13335727155208588,
      "learning_rate": 3.4131606278338875e-05,
      "loss": 0.5116,
      "step": 8614
    },
    {
      "epoch": 1.7709939356562854,
      "grad_norm": 0.15437600016593933,
      "learning_rate": 3.412191412808868e-05,
      "loss": 0.545,
      "step": 8615
    },
    {
      "epoch": 1.7711995066296637,
      "grad_norm": 0.19646428525447845,
      "learning_rate": 3.411222251375092e-05,
      "loss": 0.5433,
      "step": 8616
    },
    {
      "epoch": 1.7714050776030423,
      "grad_norm": 0.21242785453796387,
      "learning_rate": 3.410253143580307e-05,
      "loss": 0.546,
      "step": 8617
    },
    {
      "epoch": 1.771610648576421,
      "grad_norm": 0.19566522538661957,
      "learning_rate": 3.4092840894722545e-05,
      "loss": 0.5379,
      "step": 8618
    },
    {
      "epoch": 1.7718162195497995,
      "grad_norm": 0.19648124277591705,
      "learning_rate": 3.40831508909868e-05,
      "loss": 0.566,
      "step": 8619
    },
    {
      "epoch": 1.772021790523178,
      "grad_norm": 0.17227233946323395,
      "learning_rate": 3.407346142507317e-05,
      "loss": 0.5122,
      "step": 8620
    },
    {
      "epoch": 1.7722273614965567,
      "grad_norm": 0.1685340255498886,
      "learning_rate": 3.406377249745902e-05,
      "loss": 0.5275,
      "step": 8621
    },
    {
      "epoch": 1.7724329324699353,
      "grad_norm": 0.19663850963115692,
      "learning_rate": 3.4054084108621695e-05,
      "loss": 0.5189,
      "step": 8622
    },
    {
      "epoch": 1.7726385034433139,
      "grad_norm": 0.19546058773994446,
      "learning_rate": 3.4044396259038475e-05,
      "loss": 0.5577,
      "step": 8623
    },
    {
      "epoch": 1.7728440744166925,
      "grad_norm": 0.19245782494544983,
      "learning_rate": 3.4034708949186655e-05,
      "loss": 0.5378,
      "step": 8624
    },
    {
      "epoch": 1.773049645390071,
      "grad_norm": 0.191994771361351,
      "learning_rate": 3.402502217954346e-05,
      "loss": 0.5207,
      "step": 8625
    },
    {
      "epoch": 1.7732552163634496,
      "grad_norm": 0.19610409438610077,
      "learning_rate": 3.401533595058612e-05,
      "loss": 0.5512,
      "step": 8626
    },
    {
      "epoch": 1.773460787336828,
      "grad_norm": 0.19996674358844757,
      "learning_rate": 3.400565026279186e-05,
      "loss": 0.5401,
      "step": 8627
    },
    {
      "epoch": 1.7736663583102066,
      "grad_norm": 0.19628667831420898,
      "learning_rate": 3.3995965116637814e-05,
      "loss": 0.5596,
      "step": 8628
    },
    {
      "epoch": 1.7738719292835852,
      "grad_norm": 0.2043389528989792,
      "learning_rate": 3.398628051260114e-05,
      "loss": 0.545,
      "step": 8629
    },
    {
      "epoch": 1.7740775002569638,
      "grad_norm": 0.1604812741279602,
      "learning_rate": 3.397659645115894e-05,
      "loss": 0.4899,
      "step": 8630
    },
    {
      "epoch": 1.7742830712303421,
      "grad_norm": 0.15960481762886047,
      "learning_rate": 3.396691293278831e-05,
      "loss": 0.5164,
      "step": 8631
    },
    {
      "epoch": 1.7744886422037207,
      "grad_norm": 0.1938653588294983,
      "learning_rate": 3.395722995796629e-05,
      "loss": 0.537,
      "step": 8632
    },
    {
      "epoch": 1.7746942131770993,
      "grad_norm": 0.202159583568573,
      "learning_rate": 3.3947547527169964e-05,
      "loss": 0.5427,
      "step": 8633
    },
    {
      "epoch": 1.7748997841504779,
      "grad_norm": 0.19612862169742584,
      "learning_rate": 3.3937865640876305e-05,
      "loss": 0.5301,
      "step": 8634
    },
    {
      "epoch": 1.7751053551238565,
      "grad_norm": 0.20528623461723328,
      "learning_rate": 3.39281842995623e-05,
      "loss": 0.5452,
      "step": 8635
    },
    {
      "epoch": 1.775310926097235,
      "grad_norm": 0.16790783405303955,
      "learning_rate": 3.3918503503704905e-05,
      "loss": 0.5233,
      "step": 8636
    },
    {
      "epoch": 1.7755164970706137,
      "grad_norm": 0.12118061631917953,
      "learning_rate": 3.390882325378105e-05,
      "loss": 0.5104,
      "step": 8637
    },
    {
      "epoch": 1.7757220680439922,
      "grad_norm": 0.1575068235397339,
      "learning_rate": 3.389914355026764e-05,
      "loss": 0.5293,
      "step": 8638
    },
    {
      "epoch": 1.7759276390173708,
      "grad_norm": 0.16683286428451538,
      "learning_rate": 3.3889464393641516e-05,
      "loss": 0.5083,
      "step": 8639
    },
    {
      "epoch": 1.7761332099907494,
      "grad_norm": 0.15598100423812866,
      "learning_rate": 3.387978578437957e-05,
      "loss": 0.5133,
      "step": 8640
    },
    {
      "epoch": 1.776338780964128,
      "grad_norm": 0.202442929148674,
      "learning_rate": 3.387010772295861e-05,
      "loss": 0.5476,
      "step": 8641
    },
    {
      "epoch": 1.7765443519375064,
      "grad_norm": 0.16517791152000427,
      "learning_rate": 3.3860430209855415e-05,
      "loss": 0.504,
      "step": 8642
    },
    {
      "epoch": 1.776749922910885,
      "grad_norm": 0.18405590951442719,
      "learning_rate": 3.3850753245546756e-05,
      "loss": 0.5372,
      "step": 8643
    },
    {
      "epoch": 1.7769554938842635,
      "grad_norm": 0.16793282330036163,
      "learning_rate": 3.384107683050938e-05,
      "loss": 0.5214,
      "step": 8644
    },
    {
      "epoch": 1.7771610648576421,
      "grad_norm": 0.1541900336742401,
      "learning_rate": 3.383140096521997e-05,
      "loss": 0.5294,
      "step": 8645
    },
    {
      "epoch": 1.7773666358310205,
      "grad_norm": 0.22302818298339844,
      "learning_rate": 3.3821725650155247e-05,
      "loss": 0.5564,
      "step": 8646
    },
    {
      "epoch": 1.777572206804399,
      "grad_norm": 0.1978428214788437,
      "learning_rate": 3.381205088579185e-05,
      "loss": 0.5236,
      "step": 8647
    },
    {
      "epoch": 1.7777777777777777,
      "grad_norm": 0.1918904036283493,
      "learning_rate": 3.380237667260642e-05,
      "loss": 0.5082,
      "step": 8648
    },
    {
      "epoch": 1.7779833487511563,
      "grad_norm": 0.1877550482749939,
      "learning_rate": 3.379270301107555e-05,
      "loss": 0.5303,
      "step": 8649
    },
    {
      "epoch": 1.7781889197245349,
      "grad_norm": 0.1972031146287918,
      "learning_rate": 3.3783029901675826e-05,
      "loss": 0.5553,
      "step": 8650
    },
    {
      "epoch": 1.7783944906979134,
      "grad_norm": 0.19708669185638428,
      "learning_rate": 3.377335734488379e-05,
      "loss": 0.5414,
      "step": 8651
    },
    {
      "epoch": 1.778600061671292,
      "grad_norm": 0.1898190975189209,
      "learning_rate": 3.376368534117595e-05,
      "loss": 0.5227,
      "step": 8652
    },
    {
      "epoch": 1.7788056326446706,
      "grad_norm": 0.19257086515426636,
      "learning_rate": 3.3754013891028826e-05,
      "loss": 0.5288,
      "step": 8653
    },
    {
      "epoch": 1.7790112036180492,
      "grad_norm": 0.20055457949638367,
      "learning_rate": 3.374434299491888e-05,
      "loss": 0.5422,
      "step": 8654
    },
    {
      "epoch": 1.7792167745914278,
      "grad_norm": 0.19281229376792908,
      "learning_rate": 3.373467265332254e-05,
      "loss": 0.5286,
      "step": 8655
    },
    {
      "epoch": 1.7794223455648064,
      "grad_norm": 0.16794267296791077,
      "learning_rate": 3.372500286671622e-05,
      "loss": 0.5123,
      "step": 8656
    },
    {
      "epoch": 1.779627916538185,
      "grad_norm": 0.17391237616539001,
      "learning_rate": 3.371533363557631e-05,
      "loss": 0.5325,
      "step": 8657
    },
    {
      "epoch": 1.7798334875115633,
      "grad_norm": 0.19158299267292023,
      "learning_rate": 3.3705664960379176e-05,
      "loss": 0.5259,
      "step": 8658
    },
    {
      "epoch": 1.780039058484942,
      "grad_norm": 0.19506706297397614,
      "learning_rate": 3.3695996841601114e-05,
      "loss": 0.5304,
      "step": 8659
    },
    {
      "epoch": 1.7802446294583205,
      "grad_norm": 0.19340963661670685,
      "learning_rate": 3.3686329279718484e-05,
      "loss": 0.5321,
      "step": 8660
    },
    {
      "epoch": 1.7804502004316989,
      "grad_norm": 0.16678109765052795,
      "learning_rate": 3.367666227520752e-05,
      "loss": 0.4986,
      "step": 8661
    },
    {
      "epoch": 1.7806557714050775,
      "grad_norm": 0.1627744436264038,
      "learning_rate": 3.366699582854449e-05,
      "loss": 0.5676,
      "step": 8662
    },
    {
      "epoch": 1.780861342378456,
      "grad_norm": 0.2138591855764389,
      "learning_rate": 3.365732994020559e-05,
      "loss": 0.5439,
      "step": 8663
    },
    {
      "epoch": 1.7810669133518346,
      "grad_norm": 0.197159543633461,
      "learning_rate": 3.3647664610667036e-05,
      "loss": 0.5174,
      "step": 8664
    },
    {
      "epoch": 1.7812724843252132,
      "grad_norm": 0.16026876866817474,
      "learning_rate": 3.363799984040499e-05,
      "loss": 0.5147,
      "step": 8665
    },
    {
      "epoch": 1.7814780552985918,
      "grad_norm": 0.1291634738445282,
      "learning_rate": 3.3628335629895564e-05,
      "loss": 0.5185,
      "step": 8666
    },
    {
      "epoch": 1.7816836262719704,
      "grad_norm": 0.12628033757209778,
      "learning_rate": 3.3618671979614906e-05,
      "loss": 0.5011,
      "step": 8667
    },
    {
      "epoch": 1.781889197245349,
      "grad_norm": 0.16729123890399933,
      "learning_rate": 3.360900889003909e-05,
      "loss": 0.5241,
      "step": 8668
    },
    {
      "epoch": 1.7820947682187276,
      "grad_norm": 0.199641615152359,
      "learning_rate": 3.3599346361644154e-05,
      "loss": 0.5325,
      "step": 8669
    },
    {
      "epoch": 1.7823003391921062,
      "grad_norm": 0.1920914649963379,
      "learning_rate": 3.3589684394906144e-05,
      "loss": 0.5149,
      "step": 8670
    },
    {
      "epoch": 1.7825059101654848,
      "grad_norm": 0.19104242324829102,
      "learning_rate": 3.358002299030105e-05,
      "loss": 0.5151,
      "step": 8671
    },
    {
      "epoch": 1.7827114811388634,
      "grad_norm": 0.1941048502922058,
      "learning_rate": 3.3570362148304846e-05,
      "loss": 0.5251,
      "step": 8672
    },
    {
      "epoch": 1.7829170521122417,
      "grad_norm": 0.18796855211257935,
      "learning_rate": 3.356070186939346e-05,
      "loss": 0.5222,
      "step": 8673
    },
    {
      "epoch": 1.7831226230856203,
      "grad_norm": 0.1918378323316574,
      "learning_rate": 3.355104215404284e-05,
      "loss": 0.5433,
      "step": 8674
    },
    {
      "epoch": 1.783328194058999,
      "grad_norm": 0.19748041033744812,
      "learning_rate": 3.354138300272887e-05,
      "loss": 0.5324,
      "step": 8675
    },
    {
      "epoch": 1.7835337650323775,
      "grad_norm": 0.20158739387989044,
      "learning_rate": 3.35317244159274e-05,
      "loss": 0.5326,
      "step": 8676
    },
    {
      "epoch": 1.7837393360057558,
      "grad_norm": 0.19620271027088165,
      "learning_rate": 3.352206639411426e-05,
      "loss": 0.5195,
      "step": 8677
    },
    {
      "epoch": 1.7839449069791344,
      "grad_norm": 0.2002163529396057,
      "learning_rate": 3.3512408937765256e-05,
      "loss": 0.5183,
      "step": 8678
    },
    {
      "epoch": 1.784150477952513,
      "grad_norm": 0.16237804293632507,
      "learning_rate": 3.350275204735618e-05,
      "loss": 0.5099,
      "step": 8679
    },
    {
      "epoch": 1.7843560489258916,
      "grad_norm": 0.16660307347774506,
      "learning_rate": 3.349309572336276e-05,
      "loss": 0.5543,
      "step": 8680
    },
    {
      "epoch": 1.7845616198992702,
      "grad_norm": 0.2031785398721695,
      "learning_rate": 3.3483439966260734e-05,
      "loss": 0.5341,
      "step": 8681
    },
    {
      "epoch": 1.7847671908726488,
      "grad_norm": 0.19732213020324707,
      "learning_rate": 3.34737847765258e-05,
      "loss": 0.5229,
      "step": 8682
    },
    {
      "epoch": 1.7849727618460274,
      "grad_norm": 0.20520782470703125,
      "learning_rate": 3.3464130154633616e-05,
      "loss": 0.5356,
      "step": 8683
    },
    {
      "epoch": 1.785178332819406,
      "grad_norm": 0.1953929215669632,
      "learning_rate": 3.345447610105983e-05,
      "loss": 0.546,
      "step": 8684
    },
    {
      "epoch": 1.7853839037927846,
      "grad_norm": 0.19621872901916504,
      "learning_rate": 3.344482261628003e-05,
      "loss": 0.5344,
      "step": 8685
    },
    {
      "epoch": 1.7855894747661631,
      "grad_norm": 0.18925665318965912,
      "learning_rate": 3.3435169700769815e-05,
      "loss": 0.5191,
      "step": 8686
    },
    {
      "epoch": 1.7857950457395417,
      "grad_norm": 0.16739846765995026,
      "learning_rate": 3.3425517355004715e-05,
      "loss": 0.5152,
      "step": 8687
    },
    {
      "epoch": 1.78600061671292,
      "grad_norm": 0.198704332113266,
      "learning_rate": 3.3415865579460305e-05,
      "loss": 0.5538,
      "step": 8688
    },
    {
      "epoch": 1.7862061876862987,
      "grad_norm": 0.24151252210140228,
      "learning_rate": 3.340621437461204e-05,
      "loss": 0.5001,
      "step": 8689
    },
    {
      "epoch": 1.7864117586596773,
      "grad_norm": 0.20363937318325043,
      "learning_rate": 3.3396563740935406e-05,
      "loss": 0.5489,
      "step": 8690
    },
    {
      "epoch": 1.7866173296330559,
      "grad_norm": 0.19773469865322113,
      "learning_rate": 3.338691367890584e-05,
      "loss": 0.5149,
      "step": 8691
    },
    {
      "epoch": 1.7868229006064342,
      "grad_norm": 0.1598690301179886,
      "learning_rate": 3.3377264188998764e-05,
      "loss": 0.516,
      "step": 8692
    },
    {
      "epoch": 1.7870284715798128,
      "grad_norm": 0.12722936272621155,
      "learning_rate": 3.3367615271689555e-05,
      "loss": 0.5108,
      "step": 8693
    },
    {
      "epoch": 1.7872340425531914,
      "grad_norm": 0.15668757259845734,
      "learning_rate": 3.335796692745356e-05,
      "loss": 0.5482,
      "step": 8694
    },
    {
      "epoch": 1.78743961352657,
      "grad_norm": 0.20184186100959778,
      "learning_rate": 3.3348319156766126e-05,
      "loss": 0.5621,
      "step": 8695
    },
    {
      "epoch": 1.7876451844999486,
      "grad_norm": 0.20065537095069885,
      "learning_rate": 3.333867196010255e-05,
      "loss": 0.5341,
      "step": 8696
    },
    {
      "epoch": 1.7878507554733272,
      "grad_norm": 0.19662179052829742,
      "learning_rate": 3.3329025337938106e-05,
      "loss": 0.5398,
      "step": 8697
    },
    {
      "epoch": 1.7880563264467058,
      "grad_norm": 0.1874885857105255,
      "learning_rate": 3.331937929074804e-05,
      "loss": 0.53,
      "step": 8698
    },
    {
      "epoch": 1.7882618974200843,
      "grad_norm": 0.19309687614440918,
      "learning_rate": 3.330973381900754e-05,
      "loss": 0.5446,
      "step": 8699
    },
    {
      "epoch": 1.788467468393463,
      "grad_norm": 0.1995777040719986,
      "learning_rate": 3.330008892319183e-05,
      "loss": 0.5365,
      "step": 8700
    },
    {
      "epoch": 1.7886730393668415,
      "grad_norm": 0.20065602660179138,
      "learning_rate": 3.3290444603776045e-05,
      "loss": 0.5344,
      "step": 8701
    },
    {
      "epoch": 1.78887861034022,
      "grad_norm": 0.19749754667282104,
      "learning_rate": 3.328080086123532e-05,
      "loss": 0.5441,
      "step": 8702
    },
    {
      "epoch": 1.7890841813135985,
      "grad_norm": 0.1977740079164505,
      "learning_rate": 3.3271157696044774e-05,
      "loss": 0.4977,
      "step": 8703
    },
    {
      "epoch": 1.789289752286977,
      "grad_norm": 0.19205152988433838,
      "learning_rate": 3.3261515108679465e-05,
      "loss": 0.5375,
      "step": 8704
    },
    {
      "epoch": 1.7894953232603557,
      "grad_norm": 0.19639678299427032,
      "learning_rate": 3.325187309961445e-05,
      "loss": 0.5507,
      "step": 8705
    },
    {
      "epoch": 1.7897008942337342,
      "grad_norm": 0.17259171605110168,
      "learning_rate": 3.3242231669324727e-05,
      "loss": 0.5051,
      "step": 8706
    },
    {
      "epoch": 1.7899064652071126,
      "grad_norm": 0.13026978075504303,
      "learning_rate": 3.323259081828529e-05,
      "loss": 0.5009,
      "step": 8707
    },
    {
      "epoch": 1.7901120361804912,
      "grad_norm": 0.16744323074817657,
      "learning_rate": 3.322295054697109e-05,
      "loss": 0.5441,
      "step": 8708
    },
    {
      "epoch": 1.7903176071538698,
      "grad_norm": 0.1634387969970703,
      "learning_rate": 3.3213310855857096e-05,
      "loss": 0.5119,
      "step": 8709
    },
    {
      "epoch": 1.7905231781272484,
      "grad_norm": 0.15612611174583435,
      "learning_rate": 3.3203671745418175e-05,
      "loss": 0.5536,
      "step": 8710
    },
    {
      "epoch": 1.790728749100627,
      "grad_norm": 0.23640312254428864,
      "learning_rate": 3.31940332161292e-05,
      "loss": 0.5475,
      "step": 8711
    },
    {
      "epoch": 1.7909343200740055,
      "grad_norm": 0.19410596787929535,
      "learning_rate": 3.318439526846505e-05,
      "loss": 0.5559,
      "step": 8712
    },
    {
      "epoch": 1.7911398910473841,
      "grad_norm": 0.19222721457481384,
      "learning_rate": 3.317475790290051e-05,
      "loss": 0.5342,
      "step": 8713
    },
    {
      "epoch": 1.7913454620207627,
      "grad_norm": 0.17586657404899597,
      "learning_rate": 3.316512111991038e-05,
      "loss": 0.4957,
      "step": 8714
    },
    {
      "epoch": 1.7915510329941413,
      "grad_norm": 0.1588892936706543,
      "learning_rate": 3.31554849199694e-05,
      "loss": 0.5331,
      "step": 8715
    },
    {
      "epoch": 1.79175660396752,
      "grad_norm": 0.202567458152771,
      "learning_rate": 3.3145849303552333e-05,
      "loss": 0.5418,
      "step": 8716
    },
    {
      "epoch": 1.7919621749408985,
      "grad_norm": 0.19263319671154022,
      "learning_rate": 3.3136214271133865e-05,
      "loss": 0.5144,
      "step": 8717
    },
    {
      "epoch": 1.7921677459142769,
      "grad_norm": 0.17071235179901123,
      "learning_rate": 3.312657982318866e-05,
      "loss": 0.5028,
      "step": 8718
    },
    {
      "epoch": 1.7923733168876554,
      "grad_norm": 0.16436687111854553,
      "learning_rate": 3.311694596019138e-05,
      "loss": 0.5452,
      "step": 8719
    },
    {
      "epoch": 1.792578887861034,
      "grad_norm": 0.16867224872112274,
      "learning_rate": 3.310731268261662e-05,
      "loss": 0.5006,
      "step": 8720
    },
    {
      "epoch": 1.7927844588344126,
      "grad_norm": 0.16634447872638702,
      "learning_rate": 3.3097679990938975e-05,
      "loss": 0.5141,
      "step": 8721
    },
    {
      "epoch": 1.792990029807791,
      "grad_norm": 0.16255205869674683,
      "learning_rate": 3.308804788563302e-05,
      "loss": 0.5167,
      "step": 8722
    },
    {
      "epoch": 1.7931956007811696,
      "grad_norm": 0.12086722999811172,
      "learning_rate": 3.307841636717326e-05,
      "loss": 0.5256,
      "step": 8723
    },
    {
      "epoch": 1.7934011717545482,
      "grad_norm": 0.16073068976402283,
      "learning_rate": 3.3068785436034214e-05,
      "loss": 0.522,
      "step": 8724
    },
    {
      "epoch": 1.7936067427279268,
      "grad_norm": 0.19669640064239502,
      "learning_rate": 3.305915509269034e-05,
      "loss": 0.5543,
      "step": 8725
    },
    {
      "epoch": 1.7938123137013053,
      "grad_norm": 0.17151986062526703,
      "learning_rate": 3.304952533761608e-05,
      "loss": 0.5286,
      "step": 8726
    },
    {
      "epoch": 1.794017884674684,
      "grad_norm": 0.15375934541225433,
      "learning_rate": 3.303989617128586e-05,
      "loss": 0.5302,
      "step": 8727
    },
    {
      "epoch": 1.7942234556480625,
      "grad_norm": 0.19120700657367706,
      "learning_rate": 3.303026759417403e-05,
      "loss": 0.5134,
      "step": 8728
    },
    {
      "epoch": 1.794429026621441,
      "grad_norm": 0.15886104106903076,
      "learning_rate": 3.302063960675498e-05,
      "loss": 0.4965,
      "step": 8729
    },
    {
      "epoch": 1.7946345975948197,
      "grad_norm": 0.1626490205526352,
      "learning_rate": 3.3011012209503034e-05,
      "loss": 0.5338,
      "step": 8730
    },
    {
      "epoch": 1.7948401685681983,
      "grad_norm": 0.20152784883975983,
      "learning_rate": 3.300138540289248e-05,
      "loss": 0.5339,
      "step": 8731
    },
    {
      "epoch": 1.7950457395415769,
      "grad_norm": 0.1927708387374878,
      "learning_rate": 3.2991759187397575e-05,
      "loss": 0.5188,
      "step": 8732
    },
    {
      "epoch": 1.7952513105149552,
      "grad_norm": 0.16235662996768951,
      "learning_rate": 3.2982133563492586e-05,
      "loss": 0.4898,
      "step": 8733
    },
    {
      "epoch": 1.7954568814883338,
      "grad_norm": 0.1614857167005539,
      "learning_rate": 3.2972508531651686e-05,
      "loss": 0.5315,
      "step": 8734
    },
    {
      "epoch": 1.7956624524617124,
      "grad_norm": 0.19275487959384918,
      "learning_rate": 3.2962884092349074e-05,
      "loss": 0.532,
      "step": 8735
    },
    {
      "epoch": 1.795868023435091,
      "grad_norm": 0.1601790487766266,
      "learning_rate": 3.295326024605891e-05,
      "loss": 0.4982,
      "step": 8736
    },
    {
      "epoch": 1.7960735944084694,
      "grad_norm": 0.16727516055107117,
      "learning_rate": 3.2943636993255316e-05,
      "loss": 0.5415,
      "step": 8737
    },
    {
      "epoch": 1.796279165381848,
      "grad_norm": 0.19914865493774414,
      "learning_rate": 3.293401433441237e-05,
      "loss": 0.507,
      "step": 8738
    },
    {
      "epoch": 1.7964847363552265,
      "grad_norm": 0.20361186563968658,
      "learning_rate": 3.2924392270004136e-05,
      "loss": 0.5369,
      "step": 8739
    },
    {
      "epoch": 1.7966903073286051,
      "grad_norm": 0.19120000302791595,
      "learning_rate": 3.2914770800504665e-05,
      "loss": 0.5204,
      "step": 8740
    },
    {
      "epoch": 1.7968958783019837,
      "grad_norm": 0.19319793581962585,
      "learning_rate": 3.2905149926387946e-05,
      "loss": 0.5346,
      "step": 8741
    },
    {
      "epoch": 1.7971014492753623,
      "grad_norm": 0.19608697295188904,
      "learning_rate": 3.289552964812793e-05,
      "loss": 0.528,
      "step": 8742
    },
    {
      "epoch": 1.797307020248741,
      "grad_norm": 0.19638018310070038,
      "learning_rate": 3.2885909966198625e-05,
      "loss": 0.5554,
      "step": 8743
    },
    {
      "epoch": 1.7975125912221195,
      "grad_norm": 0.19244056940078735,
      "learning_rate": 3.28762908810739e-05,
      "loss": 0.5331,
      "step": 8744
    },
    {
      "epoch": 1.797718162195498,
      "grad_norm": 0.1934266835451126,
      "learning_rate": 3.2866672393227665e-05,
      "loss": 0.523,
      "step": 8745
    },
    {
      "epoch": 1.7979237331688767,
      "grad_norm": 0.1938556432723999,
      "learning_rate": 3.285705450313377e-05,
      "loss": 0.5195,
      "step": 8746
    },
    {
      "epoch": 1.7981293041422552,
      "grad_norm": 0.1935025453567505,
      "learning_rate": 3.284743721126605e-05,
      "loss": 0.5507,
      "step": 8747
    },
    {
      "epoch": 1.7983348751156338,
      "grad_norm": 0.16398179531097412,
      "learning_rate": 3.2837820518098294e-05,
      "loss": 0.5202,
      "step": 8748
    },
    {
      "epoch": 1.7985404460890122,
      "grad_norm": 0.13340577483177185,
      "learning_rate": 3.2828204424104256e-05,
      "loss": 0.5141,
      "step": 8749
    },
    {
      "epoch": 1.7987460170623908,
      "grad_norm": 0.16972105205059052,
      "learning_rate": 3.2818588929757714e-05,
      "loss": 0.5168,
      "step": 8750
    },
    {
      "epoch": 1.7989515880357694,
      "grad_norm": 0.19210562109947205,
      "learning_rate": 3.2808974035532354e-05,
      "loss": 0.5188,
      "step": 8751
    },
    {
      "epoch": 1.7991571590091477,
      "grad_norm": 0.19385598599910736,
      "learning_rate": 3.279935974190187e-05,
      "loss": 0.5101,
      "step": 8752
    },
    {
      "epoch": 1.7993627299825263,
      "grad_norm": 0.20557603240013123,
      "learning_rate": 3.278974604933991e-05,
      "loss": 0.5596,
      "step": 8753
    },
    {
      "epoch": 1.799568300955905,
      "grad_norm": 0.17109614610671997,
      "learning_rate": 3.2780132958320075e-05,
      "loss": 0.5028,
      "step": 8754
    },
    {
      "epoch": 1.7997738719292835,
      "grad_norm": 0.1690118908882141,
      "learning_rate": 3.277052046931598e-05,
      "loss": 0.5425,
      "step": 8755
    },
    {
      "epoch": 1.799979442902662,
      "grad_norm": 0.20591634511947632,
      "learning_rate": 3.276090858280118e-05,
      "loss": 0.5446,
      "step": 8756
    },
    {
      "epoch": 1.8001850138760407,
      "grad_norm": 0.19887815415859222,
      "learning_rate": 3.27512972992492e-05,
      "loss": 0.557,
      "step": 8757
    },
    {
      "epoch": 1.8003905848494193,
      "grad_norm": 0.1681029200553894,
      "learning_rate": 3.274168661913357e-05,
      "loss": 0.5276,
      "step": 8758
    },
    {
      "epoch": 1.8005961558227979,
      "grad_norm": 0.1590951383113861,
      "learning_rate": 3.273207654292774e-05,
      "loss": 0.5437,
      "step": 8759
    },
    {
      "epoch": 1.8008017267961764,
      "grad_norm": 0.19525672495365143,
      "learning_rate": 3.272246707110516e-05,
      "loss": 0.5255,
      "step": 8760
    },
    {
      "epoch": 1.801007297769555,
      "grad_norm": 0.19866180419921875,
      "learning_rate": 3.271285820413924e-05,
      "loss": 0.5169,
      "step": 8761
    },
    {
      "epoch": 1.8012128687429336,
      "grad_norm": 0.18859532475471497,
      "learning_rate": 3.270324994250337e-05,
      "loss": 0.5348,
      "step": 8762
    },
    {
      "epoch": 1.8014184397163122,
      "grad_norm": 0.19412027299404144,
      "learning_rate": 3.2693642286670884e-05,
      "loss": 0.5319,
      "step": 8763
    },
    {
      "epoch": 1.8016240106896906,
      "grad_norm": 0.1961035430431366,
      "learning_rate": 3.2684035237115134e-05,
      "loss": 0.5443,
      "step": 8764
    },
    {
      "epoch": 1.8018295816630692,
      "grad_norm": 0.19343046844005585,
      "learning_rate": 3.2674428794309405e-05,
      "loss": 0.5361,
      "step": 8765
    },
    {
      "epoch": 1.8020351526364478,
      "grad_norm": 0.1937210112810135,
      "learning_rate": 3.266482295872695e-05,
      "loss": 0.5418,
      "step": 8766
    },
    {
      "epoch": 1.8022407236098263,
      "grad_norm": 0.19837218523025513,
      "learning_rate": 3.265521773084103e-05,
      "loss": 0.5194,
      "step": 8767
    },
    {
      "epoch": 1.8024462945832047,
      "grad_norm": 0.19206029176712036,
      "learning_rate": 3.264561311112483e-05,
      "loss": 0.535,
      "step": 8768
    },
    {
      "epoch": 1.8026518655565833,
      "grad_norm": 0.19339123368263245,
      "learning_rate": 3.263600910005152e-05,
      "loss": 0.509,
      "step": 8769
    },
    {
      "epoch": 1.8028574365299619,
      "grad_norm": 0.16199949383735657,
      "learning_rate": 3.262640569809424e-05,
      "loss": 0.4946,
      "step": 8770
    },
    {
      "epoch": 1.8030630075033405,
      "grad_norm": 0.1284702867269516,
      "learning_rate": 3.261680290572613e-05,
      "loss": 0.5095,
      "step": 8771
    },
    {
      "epoch": 1.803268578476719,
      "grad_norm": 0.16122448444366455,
      "learning_rate": 3.2607200723420274e-05,
      "loss": 0.5157,
      "step": 8772
    },
    {
      "epoch": 1.8034741494500977,
      "grad_norm": 0.1663200855255127,
      "learning_rate": 3.259759915164971e-05,
      "loss": 0.5245,
      "step": 8773
    },
    {
      "epoch": 1.8036797204234762,
      "grad_norm": 0.15676259994506836,
      "learning_rate": 3.258799819088746e-05,
      "loss": 0.5267,
      "step": 8774
    },
    {
      "epoch": 1.8038852913968548,
      "grad_norm": 0.20118048787117004,
      "learning_rate": 3.257839784160652e-05,
      "loss": 0.5446,
      "step": 8775
    },
    {
      "epoch": 1.8040908623702334,
      "grad_norm": 0.198233500123024,
      "learning_rate": 3.256879810427987e-05,
      "loss": 0.5508,
      "step": 8776
    },
    {
      "epoch": 1.804296433343612,
      "grad_norm": 0.20336700975894928,
      "learning_rate": 3.255919897938043e-05,
      "loss": 0.5365,
      "step": 8777
    },
    {
      "epoch": 1.8045020043169906,
      "grad_norm": 0.16567686200141907,
      "learning_rate": 3.2549600467381096e-05,
      "loss": 0.5033,
      "step": 8778
    },
    {
      "epoch": 1.804707575290369,
      "grad_norm": 0.16237860918045044,
      "learning_rate": 3.2540002568754776e-05,
      "loss": 0.5379,
      "step": 8779
    },
    {
      "epoch": 1.8049131462637475,
      "grad_norm": 0.19675461947917938,
      "learning_rate": 3.2530405283974284e-05,
      "loss": 0.5328,
      "step": 8780
    },
    {
      "epoch": 1.8051187172371261,
      "grad_norm": 0.19670812785625458,
      "learning_rate": 3.2520808613512446e-05,
      "loss": 0.5439,
      "step": 8781
    },
    {
      "epoch": 1.8053242882105047,
      "grad_norm": 0.19091184437274933,
      "learning_rate": 3.2511212557842036e-05,
      "loss": 0.5355,
      "step": 8782
    },
    {
      "epoch": 1.805529859183883,
      "grad_norm": 0.19207298755645752,
      "learning_rate": 3.250161711743581e-05,
      "loss": 0.5185,
      "step": 8783
    },
    {
      "epoch": 1.8057354301572617,
      "grad_norm": 0.19454807043075562,
      "learning_rate": 3.2492022292766476e-05,
      "loss": 0.5098,
      "step": 8784
    },
    {
      "epoch": 1.8059410011306403,
      "grad_norm": 0.1931590735912323,
      "learning_rate": 3.248242808430676e-05,
      "loss": 0.5535,
      "step": 8785
    },
    {
      "epoch": 1.8061465721040189,
      "grad_norm": 0.19954814016819,
      "learning_rate": 3.24728344925293e-05,
      "loss": 0.5226,
      "step": 8786
    },
    {
      "epoch": 1.8063521430773974,
      "grad_norm": 0.18844476342201233,
      "learning_rate": 3.2463241517906725e-05,
      "loss": 0.502,
      "step": 8787
    },
    {
      "epoch": 1.806557714050776,
      "grad_norm": 0.20117510855197906,
      "learning_rate": 3.245364916091166e-05,
      "loss": 0.5558,
      "step": 8788
    },
    {
      "epoch": 1.8067632850241546,
      "grad_norm": 0.16829055547714233,
      "learning_rate": 3.244405742201665e-05,
      "loss": 0.5025,
      "step": 8789
    },
    {
      "epoch": 1.8069688559975332,
      "grad_norm": 0.16325733065605164,
      "learning_rate": 3.243446630169425e-05,
      "loss": 0.5352,
      "step": 8790
    },
    {
      "epoch": 1.8071744269709118,
      "grad_norm": 0.2082752287387848,
      "learning_rate": 3.242487580041695e-05,
      "loss": 0.5324,
      "step": 8791
    },
    {
      "epoch": 1.8073799979442904,
      "grad_norm": 0.19825617969036102,
      "learning_rate": 3.2415285918657254e-05,
      "loss": 0.5495,
      "step": 8792
    },
    {
      "epoch": 1.807585568917669,
      "grad_norm": 0.19314411282539368,
      "learning_rate": 3.24056966568876e-05,
      "loss": 0.5279,
      "step": 8793
    },
    {
      "epoch": 1.8077911398910473,
      "grad_norm": 0.18960903584957123,
      "learning_rate": 3.2396108015580414e-05,
      "loss": 0.5288,
      "step": 8794
    },
    {
      "epoch": 1.807996710864426,
      "grad_norm": 0.1918916255235672,
      "learning_rate": 3.2386519995208064e-05,
      "loss": 0.5223,
      "step": 8795
    },
    {
      "epoch": 1.8082022818378045,
      "grad_norm": 0.1647498458623886,
      "learning_rate": 3.2376932596242916e-05,
      "loss": 0.4907,
      "step": 8796
    },
    {
      "epoch": 1.808407852811183,
      "grad_norm": 0.296530157327652,
      "learning_rate": 3.236734581915732e-05,
      "loss": 0.5286,
      "step": 8797
    },
    {
      "epoch": 1.8086134237845615,
      "grad_norm": 0.20278790593147278,
      "learning_rate": 3.235775966442352e-05,
      "loss": 0.5266,
      "step": 8798
    },
    {
      "epoch": 1.80881899475794,
      "grad_norm": 0.19407640397548676,
      "learning_rate": 3.234817413251382e-05,
      "loss": 0.5374,
      "step": 8799
    },
    {
      "epoch": 1.8090245657313186,
      "grad_norm": 0.1970399171113968,
      "learning_rate": 3.233858922390045e-05,
      "loss": 0.5392,
      "step": 8800
    },
    {
      "epoch": 1.8092301367046972,
      "grad_norm": 0.19362105429172516,
      "learning_rate": 3.232900493905562e-05,
      "loss": 0.5208,
      "step": 8801
    },
    {
      "epoch": 1.8094357076780758,
      "grad_norm": 0.17109599709510803,
      "learning_rate": 3.2319421278451495e-05,
      "loss": 0.5229,
      "step": 8802
    },
    {
      "epoch": 1.8096412786514544,
      "grad_norm": 0.16294682025909424,
      "learning_rate": 3.230983824256021e-05,
      "loss": 0.5206,
      "step": 8803
    },
    {
      "epoch": 1.809846849624833,
      "grad_norm": 0.2055048942565918,
      "learning_rate": 3.2300255831853856e-05,
      "loss": 0.5383,
      "step": 8804
    },
    {
      "epoch": 1.8100524205982116,
      "grad_norm": 0.1635579615831375,
      "learning_rate": 3.229067404680456e-05,
      "loss": 0.492,
      "step": 8805
    },
    {
      "epoch": 1.8102579915715902,
      "grad_norm": 0.11798587441444397,
      "learning_rate": 3.228109288788435e-05,
      "loss": 0.493,
      "step": 8806
    },
    {
      "epoch": 1.8104635625449688,
      "grad_norm": 0.16314202547073364,
      "learning_rate": 3.227151235556525e-05,
      "loss": 0.5462,
      "step": 8807
    },
    {
      "epoch": 1.8106691335183474,
      "grad_norm": 0.19977326691150665,
      "learning_rate": 3.2261932450319237e-05,
      "loss": 0.5128,
      "step": 8808
    },
    {
      "epoch": 1.8108747044917257,
      "grad_norm": 0.19161002337932587,
      "learning_rate": 3.2252353172618275e-05,
      "loss": 0.5166,
      "step": 8809
    },
    {
      "epoch": 1.8110802754651043,
      "grad_norm": 0.19649870693683624,
      "learning_rate": 3.2242774522934294e-05,
      "loss": 0.5205,
      "step": 8810
    },
    {
      "epoch": 1.811285846438483,
      "grad_norm": 0.19603271782398224,
      "learning_rate": 3.2233196501739164e-05,
      "loss": 0.5314,
      "step": 8811
    },
    {
      "epoch": 1.8114914174118615,
      "grad_norm": 0.19260643422603607,
      "learning_rate": 3.222361910950479e-05,
      "loss": 0.4841,
      "step": 8812
    },
    {
      "epoch": 1.8116969883852398,
      "grad_norm": 0.19248011708259583,
      "learning_rate": 3.221404234670299e-05,
      "loss": 0.5179,
      "step": 8813
    },
    {
      "epoch": 1.8119025593586184,
      "grad_norm": 0.21127529442310333,
      "learning_rate": 3.2204466213805556e-05,
      "loss": 0.5432,
      "step": 8814
    },
    {
      "epoch": 1.812108130331997,
      "grad_norm": 0.20110765099525452,
      "learning_rate": 3.219489071128427e-05,
      "loss": 0.5333,
      "step": 8815
    },
    {
      "epoch": 1.8123137013053756,
      "grad_norm": 0.19356440007686615,
      "learning_rate": 3.2185315839610864e-05,
      "loss": 0.5167,
      "step": 8816
    },
    {
      "epoch": 1.8125192722787542,
      "grad_norm": 0.19028930366039276,
      "learning_rate": 3.217574159925706e-05,
      "loss": 0.526,
      "step": 8817
    },
    {
      "epoch": 1.8127248432521328,
      "grad_norm": 0.16638804972171783,
      "learning_rate": 3.21661679906945e-05,
      "loss": 0.5041,
      "step": 8818
    },
    {
      "epoch": 1.8129304142255114,
      "grad_norm": 0.13588784635066986,
      "learning_rate": 3.2156595014394874e-05,
      "loss": 0.5016,
      "step": 8819
    },
    {
      "epoch": 1.81313598519889,
      "grad_norm": 0.16222389042377472,
      "learning_rate": 3.214702267082978e-05,
      "loss": 0.5194,
      "step": 8820
    },
    {
      "epoch": 1.8133415561722686,
      "grad_norm": 0.1995917111635208,
      "learning_rate": 3.213745096047081e-05,
      "loss": 0.5335,
      "step": 8821
    },
    {
      "epoch": 1.8135471271456471,
      "grad_norm": 0.20120279490947723,
      "learning_rate": 3.212787988378951e-05,
      "loss": 0.5491,
      "step": 8822
    },
    {
      "epoch": 1.8137526981190257,
      "grad_norm": 0.19153755903244019,
      "learning_rate": 3.211830944125741e-05,
      "loss": 0.5137,
      "step": 8823
    },
    {
      "epoch": 1.8139582690924043,
      "grad_norm": 0.19813428819179535,
      "learning_rate": 3.210873963334599e-05,
      "loss": 0.5302,
      "step": 8824
    },
    {
      "epoch": 1.8141638400657827,
      "grad_norm": 0.16227731108665466,
      "learning_rate": 3.20991704605267e-05,
      "loss": 0.5092,
      "step": 8825
    },
    {
      "epoch": 1.8143694110391613,
      "grad_norm": 0.13235917687416077,
      "learning_rate": 3.2089601923270996e-05,
      "loss": 0.5202,
      "step": 8826
    },
    {
      "epoch": 1.8145749820125399,
      "grad_norm": 0.16478480398654938,
      "learning_rate": 3.208003402205027e-05,
      "loss": 0.5245,
      "step": 8827
    },
    {
      "epoch": 1.8147805529859182,
      "grad_norm": 0.19610688090324402,
      "learning_rate": 3.207046675733587e-05,
      "loss": 0.514,
      "step": 8828
    },
    {
      "epoch": 1.8149861239592968,
      "grad_norm": 0.19508203864097595,
      "learning_rate": 3.206090012959915e-05,
      "loss": 0.5138,
      "step": 8829
    },
    {
      "epoch": 1.8151916949326754,
      "grad_norm": 0.20295090973377228,
      "learning_rate": 3.205133413931139e-05,
      "loss": 0.5225,
      "step": 8830
    },
    {
      "epoch": 1.815397265906054,
      "grad_norm": 0.17680945992469788,
      "learning_rate": 3.204176878694388e-05,
      "loss": 0.5177,
      "step": 8831
    },
    {
      "epoch": 1.8156028368794326,
      "grad_norm": 0.16306188702583313,
      "learning_rate": 3.203220407296784e-05,
      "loss": 0.5388,
      "step": 8832
    },
    {
      "epoch": 1.8158084078528112,
      "grad_norm": 0.22620275616645813,
      "learning_rate": 3.2022639997854516e-05,
      "loss": 0.5522,
      "step": 8833
    },
    {
      "epoch": 1.8160139788261898,
      "grad_norm": 0.19404473900794983,
      "learning_rate": 3.201307656207506e-05,
      "loss": 0.5213,
      "step": 8834
    },
    {
      "epoch": 1.8162195497995683,
      "grad_norm": 0.1629197895526886,
      "learning_rate": 3.200351376610062e-05,
      "loss": 0.4795,
      "step": 8835
    },
    {
      "epoch": 1.816425120772947,
      "grad_norm": 0.16560040414333344,
      "learning_rate": 3.199395161040231e-05,
      "loss": 0.524,
      "step": 8836
    },
    {
      "epoch": 1.8166306917463255,
      "grad_norm": 0.19243641197681427,
      "learning_rate": 3.198439009545122e-05,
      "loss": 0.5346,
      "step": 8837
    },
    {
      "epoch": 1.816836262719704,
      "grad_norm": 0.16431495547294617,
      "learning_rate": 3.1974829221718386e-05,
      "loss": 0.5181,
      "step": 8838
    },
    {
      "epoch": 1.8170418336930827,
      "grad_norm": 0.15657220780849457,
      "learning_rate": 3.196526898967483e-05,
      "loss": 0.5453,
      "step": 8839
    },
    {
      "epoch": 1.817247404666461,
      "grad_norm": 0.1584593802690506,
      "learning_rate": 3.1955709399791556e-05,
      "loss": 0.4755,
      "step": 8840
    },
    {
      "epoch": 1.8174529756398397,
      "grad_norm": 0.16270606219768524,
      "learning_rate": 3.194615045253952e-05,
      "loss": 0.5515,
      "step": 8841
    },
    {
      "epoch": 1.8176585466132182,
      "grad_norm": 0.2008228451013565,
      "learning_rate": 3.193659214838962e-05,
      "loss": 0.5282,
      "step": 8842
    },
    {
      "epoch": 1.8178641175865966,
      "grad_norm": 0.17246867716312408,
      "learning_rate": 3.192703448781278e-05,
      "loss": 0.5119,
      "step": 8843
    },
    {
      "epoch": 1.8180696885599752,
      "grad_norm": 0.124653160572052,
      "learning_rate": 3.1917477471279846e-05,
      "loss": 0.509,
      "step": 8844
    },
    {
      "epoch": 1.8182752595333538,
      "grad_norm": 0.1591031700372696,
      "learning_rate": 3.1907921099261654e-05,
      "loss": 0.5118,
      "step": 8845
    },
    {
      "epoch": 1.8184808305067324,
      "grad_norm": 0.20176099240779877,
      "learning_rate": 3.189836537222897e-05,
      "loss": 0.5179,
      "step": 8846
    },
    {
      "epoch": 1.818686401480111,
      "grad_norm": 0.19873826205730438,
      "learning_rate": 3.1888810290652606e-05,
      "loss": 0.5189,
      "step": 8847
    },
    {
      "epoch": 1.8188919724534895,
      "grad_norm": 0.19450412690639496,
      "learning_rate": 3.187925585500329e-05,
      "loss": 0.5346,
      "step": 8848
    },
    {
      "epoch": 1.8190975434268681,
      "grad_norm": 0.19407886266708374,
      "learning_rate": 3.18697020657517e-05,
      "loss": 0.5105,
      "step": 8849
    },
    {
      "epoch": 1.8193031144002467,
      "grad_norm": 0.1899595409631729,
      "learning_rate": 3.186014892336852e-05,
      "loss": 0.5173,
      "step": 8850
    },
    {
      "epoch": 1.8195086853736253,
      "grad_norm": 0.19497114419937134,
      "learning_rate": 3.185059642832438e-05,
      "loss": 0.5314,
      "step": 8851
    },
    {
      "epoch": 1.819714256347004,
      "grad_norm": 0.18996943533420563,
      "learning_rate": 3.184104458108991e-05,
      "loss": 0.5371,
      "step": 8852
    },
    {
      "epoch": 1.8199198273203825,
      "grad_norm": 0.20072153210639954,
      "learning_rate": 3.1831493382135644e-05,
      "loss": 0.5398,
      "step": 8853
    },
    {
      "epoch": 1.820125398293761,
      "grad_norm": 0.18781349062919617,
      "learning_rate": 3.182194283193216e-05,
      "loss": 0.5452,
      "step": 8854
    },
    {
      "epoch": 1.8203309692671394,
      "grad_norm": 0.19404610991477966,
      "learning_rate": 3.181239293094997e-05,
      "loss": 0.5322,
      "step": 8855
    },
    {
      "epoch": 1.820536540240518,
      "grad_norm": 0.19891172647476196,
      "learning_rate": 3.180284367965953e-05,
      "loss": 0.5382,
      "step": 8856
    },
    {
      "epoch": 1.8207421112138966,
      "grad_norm": 0.19251051545143127,
      "learning_rate": 3.179329507853131e-05,
      "loss": 0.5373,
      "step": 8857
    },
    {
      "epoch": 1.8209476821872752,
      "grad_norm": 0.16882584989070892,
      "learning_rate": 3.178374712803571e-05,
      "loss": 0.5063,
      "step": 8858
    },
    {
      "epoch": 1.8211532531606536,
      "grad_norm": 0.12265215069055557,
      "learning_rate": 3.177419982864312e-05,
      "loss": 0.5056,
      "step": 8859
    },
    {
      "epoch": 1.8213588241340322,
      "grad_norm": 0.17124712467193604,
      "learning_rate": 3.176465318082386e-05,
      "loss": 0.5412,
      "step": 8860
    },
    {
      "epoch": 1.8215643951074107,
      "grad_norm": 0.20257225632667542,
      "learning_rate": 3.1755107185048296e-05,
      "loss": 0.5302,
      "step": 8861
    },
    {
      "epoch": 1.8217699660807893,
      "grad_norm": 0.2040639966726303,
      "learning_rate": 3.17455618417867e-05,
      "loss": 0.5245,
      "step": 8862
    },
    {
      "epoch": 1.821975537054168,
      "grad_norm": 0.16710929572582245,
      "learning_rate": 3.173601715150931e-05,
      "loss": 0.5054,
      "step": 8863
    },
    {
      "epoch": 1.8221811080275465,
      "grad_norm": 0.16893459856510162,
      "learning_rate": 3.172647311468637e-05,
      "loss": 0.5471,
      "step": 8864
    },
    {
      "epoch": 1.822386679000925,
      "grad_norm": 0.208717480301857,
      "learning_rate": 3.171692973178805e-05,
      "loss": 0.5122,
      "step": 8865
    },
    {
      "epoch": 1.8225922499743037,
      "grad_norm": 0.20160967111587524,
      "learning_rate": 3.170738700328453e-05,
      "loss": 0.537,
      "step": 8866
    },
    {
      "epoch": 1.8227978209476823,
      "grad_norm": 0.20131829380989075,
      "learning_rate": 3.16978449296459e-05,
      "loss": 0.5303,
      "step": 8867
    },
    {
      "epoch": 1.8230033919210609,
      "grad_norm": 0.1935744732618332,
      "learning_rate": 3.168830351134229e-05,
      "loss": 0.5284,
      "step": 8868
    },
    {
      "epoch": 1.8232089628944395,
      "grad_norm": 0.16306568682193756,
      "learning_rate": 3.167876274884375e-05,
      "loss": 0.5169,
      "step": 8869
    },
    {
      "epoch": 1.8234145338678178,
      "grad_norm": 0.17691202461719513,
      "learning_rate": 3.166922264262031e-05,
      "loss": 0.5176,
      "step": 8870
    },
    {
      "epoch": 1.8236201048411964,
      "grad_norm": 0.1908418834209442,
      "learning_rate": 3.165968319314196e-05,
      "loss": 0.5321,
      "step": 8871
    },
    {
      "epoch": 1.823825675814575,
      "grad_norm": 0.2010890543460846,
      "learning_rate": 3.1650144400878655e-05,
      "loss": 0.534,
      "step": 8872
    },
    {
      "epoch": 1.8240312467879536,
      "grad_norm": 0.19382749497890472,
      "learning_rate": 3.164060626630035e-05,
      "loss": 0.528,
      "step": 8873
    },
    {
      "epoch": 1.824236817761332,
      "grad_norm": 0.16458258032798767,
      "learning_rate": 3.163106878987692e-05,
      "loss": 0.5106,
      "step": 8874
    },
    {
      "epoch": 1.8244423887347105,
      "grad_norm": 0.15716025233268738,
      "learning_rate": 3.162153197207825e-05,
      "loss": 0.5123,
      "step": 8875
    },
    {
      "epoch": 1.8246479597080891,
      "grad_norm": 0.19664350152015686,
      "learning_rate": 3.161199581337418e-05,
      "loss": 0.5322,
      "step": 8876
    },
    {
      "epoch": 1.8248535306814677,
      "grad_norm": 0.19546431303024292,
      "learning_rate": 3.160246031423449e-05,
      "loss": 0.5382,
      "step": 8877
    },
    {
      "epoch": 1.8250591016548463,
      "grad_norm": 0.15982358157634735,
      "learning_rate": 3.1592925475128965e-05,
      "loss": 0.5175,
      "step": 8878
    },
    {
      "epoch": 1.825264672628225,
      "grad_norm": 0.1651008278131485,
      "learning_rate": 3.1583391296527345e-05,
      "loss": 0.55,
      "step": 8879
    },
    {
      "epoch": 1.8254702436016035,
      "grad_norm": 0.19452379643917084,
      "learning_rate": 3.15738577788993e-05,
      "loss": 0.5374,
      "step": 8880
    },
    {
      "epoch": 1.825675814574982,
      "grad_norm": 0.1954164355993271,
      "learning_rate": 3.1564324922714546e-05,
      "loss": 0.5137,
      "step": 8881
    },
    {
      "epoch": 1.8258813855483607,
      "grad_norm": 0.19587047398090363,
      "learning_rate": 3.155479272844271e-05,
      "loss": 0.5334,
      "step": 8882
    },
    {
      "epoch": 1.8260869565217392,
      "grad_norm": 0.19509205222129822,
      "learning_rate": 3.154526119655339e-05,
      "loss": 0.5252,
      "step": 8883
    },
    {
      "epoch": 1.8262925274951178,
      "grad_norm": 0.1942068487405777,
      "learning_rate": 3.153573032751616e-05,
      "loss": 0.487,
      "step": 8884
    },
    {
      "epoch": 1.8264980984684962,
      "grad_norm": 0.19760790467262268,
      "learning_rate": 3.152620012180057e-05,
      "loss": 0.5228,
      "step": 8885
    },
    {
      "epoch": 1.8267036694418748,
      "grad_norm": 0.20716530084609985,
      "learning_rate": 3.151667057987612e-05,
      "loss": 0.5397,
      "step": 8886
    },
    {
      "epoch": 1.8269092404152534,
      "grad_norm": 0.17616085708141327,
      "learning_rate": 3.1507141702212276e-05,
      "loss": 0.5052,
      "step": 8887
    },
    {
      "epoch": 1.827114811388632,
      "grad_norm": 0.16189555823802948,
      "learning_rate": 3.149761348927851e-05,
      "loss": 0.54,
      "step": 8888
    },
    {
      "epoch": 1.8273203823620103,
      "grad_norm": 0.19330181181430817,
      "learning_rate": 3.148808594154422e-05,
      "loss": 0.5388,
      "step": 8889
    },
    {
      "epoch": 1.827525953335389,
      "grad_norm": 0.1901981681585312,
      "learning_rate": 3.1478559059478784e-05,
      "loss": 0.522,
      "step": 8890
    },
    {
      "epoch": 1.8277315243087675,
      "grad_norm": 0.2037983387708664,
      "learning_rate": 3.146903284355154e-05,
      "loss": 0.552,
      "step": 8891
    },
    {
      "epoch": 1.827937095282146,
      "grad_norm": 0.19158220291137695,
      "learning_rate": 3.14595072942318e-05,
      "loss": 0.5287,
      "step": 8892
    },
    {
      "epoch": 1.8281426662555247,
      "grad_norm": 0.19278709590435028,
      "learning_rate": 3.1449982411988846e-05,
      "loss": 0.5276,
      "step": 8893
    },
    {
      "epoch": 1.8283482372289033,
      "grad_norm": 0.19211730360984802,
      "learning_rate": 3.144045819729193e-05,
      "loss": 0.5246,
      "step": 8894
    },
    {
      "epoch": 1.8285538082022819,
      "grad_norm": 0.16746920347213745,
      "learning_rate": 3.143093465061026e-05,
      "loss": 0.5085,
      "step": 8895
    },
    {
      "epoch": 1.8287593791756604,
      "grad_norm": 0.16652396321296692,
      "learning_rate": 3.142141177241301e-05,
      "loss": 0.5325,
      "step": 8896
    },
    {
      "epoch": 1.828964950149039,
      "grad_norm": 0.19498294591903687,
      "learning_rate": 3.141188956316935e-05,
      "loss": 0.5344,
      "step": 8897
    },
    {
      "epoch": 1.8291705211224176,
      "grad_norm": 0.19246521592140198,
      "learning_rate": 3.140236802334837e-05,
      "loss": 0.5317,
      "step": 8898
    },
    {
      "epoch": 1.8293760920957962,
      "grad_norm": 0.15903492271900177,
      "learning_rate": 3.139284715341918e-05,
      "loss": 0.4802,
      "step": 8899
    },
    {
      "epoch": 1.8295816630691746,
      "grad_norm": 0.16071657836437225,
      "learning_rate": 3.1383326953850794e-05,
      "loss": 0.5418,
      "step": 8900
    },
    {
      "epoch": 1.8297872340425532,
      "grad_norm": 0.19449485838413239,
      "learning_rate": 3.1373807425112236e-05,
      "loss": 0.5342,
      "step": 8901
    },
    {
      "epoch": 1.8299928050159318,
      "grad_norm": 0.19907018542289734,
      "learning_rate": 3.136428856767252e-05,
      "loss": 0.5059,
      "step": 8902
    },
    {
      "epoch": 1.8301983759893103,
      "grad_norm": 0.19666697084903717,
      "learning_rate": 3.135477038200057e-05,
      "loss": 0.5349,
      "step": 8903
    },
    {
      "epoch": 1.8304039469626887,
      "grad_norm": 0.20423884689807892,
      "learning_rate": 3.13452528685653e-05,
      "loss": 0.5282,
      "step": 8904
    },
    {
      "epoch": 1.8306095179360673,
      "grad_norm": 0.1999506801366806,
      "learning_rate": 3.133573602783559e-05,
      "loss": 0.5322,
      "step": 8905
    },
    {
      "epoch": 1.8308150889094459,
      "grad_norm": 0.1950768530368805,
      "learning_rate": 3.132621986028031e-05,
      "loss": 0.5047,
      "step": 8906
    },
    {
      "epoch": 1.8310206598828245,
      "grad_norm": 0.1928025186061859,
      "learning_rate": 3.131670436636827e-05,
      "loss": 0.5322,
      "step": 8907
    },
    {
      "epoch": 1.831226230856203,
      "grad_norm": 0.18942581117153168,
      "learning_rate": 3.1307189546568223e-05,
      "loss": 0.5073,
      "step": 8908
    },
    {
      "epoch": 1.8314318018295817,
      "grad_norm": 0.19761119782924652,
      "learning_rate": 3.129767540134898e-05,
      "loss": 0.5366,
      "step": 8909
    },
    {
      "epoch": 1.8316373728029602,
      "grad_norm": 0.19659826159477234,
      "learning_rate": 3.1288161931179216e-05,
      "loss": 0.5442,
      "step": 8910
    },
    {
      "epoch": 1.8318429437763388,
      "grad_norm": 0.19665639102458954,
      "learning_rate": 3.1278649136527626e-05,
      "loss": 0.5245,
      "step": 8911
    },
    {
      "epoch": 1.8320485147497174,
      "grad_norm": 0.19645392894744873,
      "learning_rate": 3.1269137017862864e-05,
      "loss": 0.5327,
      "step": 8912
    },
    {
      "epoch": 1.832254085723096,
      "grad_norm": 0.1934535652399063,
      "learning_rate": 3.1259625575653535e-05,
      "loss": 0.5234,
      "step": 8913
    },
    {
      "epoch": 1.8324596566964746,
      "grad_norm": 0.19229261577129364,
      "learning_rate": 3.125011481036823e-05,
      "loss": 0.528,
      "step": 8914
    },
    {
      "epoch": 1.8326652276698532,
      "grad_norm": 0.1917877197265625,
      "learning_rate": 3.124060472247549e-05,
      "loss": 0.5341,
      "step": 8915
    },
    {
      "epoch": 1.8328707986432315,
      "grad_norm": 0.1936071366071701,
      "learning_rate": 3.1231095312443864e-05,
      "loss": 0.5224,
      "step": 8916
    },
    {
      "epoch": 1.8330763696166101,
      "grad_norm": 0.2054835557937622,
      "learning_rate": 3.12215865807418e-05,
      "loss": 0.5125,
      "step": 8917
    },
    {
      "epoch": 1.8332819405899887,
      "grad_norm": 0.1977832019329071,
      "learning_rate": 3.121207852783778e-05,
      "loss": 0.5194,
      "step": 8918
    },
    {
      "epoch": 1.833487511563367,
      "grad_norm": 0.19401463866233826,
      "learning_rate": 3.1202571154200206e-05,
      "loss": 0.5293,
      "step": 8919
    },
    {
      "epoch": 1.8336930825367457,
      "grad_norm": 0.16132505238056183,
      "learning_rate": 3.119306446029746e-05,
      "loss": 0.4925,
      "step": 8920
    },
    {
      "epoch": 1.8338986535101243,
      "grad_norm": 0.15574200451374054,
      "learning_rate": 3.1183558446597894e-05,
      "loss": 0.5394,
      "step": 8921
    },
    {
      "epoch": 1.8341042244835029,
      "grad_norm": 0.2005411684513092,
      "learning_rate": 3.117405311356981e-05,
      "loss": 0.5304,
      "step": 8922
    },
    {
      "epoch": 1.8343097954568814,
      "grad_norm": 0.1618259698152542,
      "learning_rate": 3.116454846168153e-05,
      "loss": 0.5146,
      "step": 8923
    },
    {
      "epoch": 1.83451536643026,
      "grad_norm": 0.17110760509967804,
      "learning_rate": 3.115504449140127e-05,
      "loss": 0.5491,
      "step": 8924
    },
    {
      "epoch": 1.8347209374036386,
      "grad_norm": 0.1980321705341339,
      "learning_rate": 3.114554120319726e-05,
      "loss": 0.523,
      "step": 8925
    },
    {
      "epoch": 1.8349265083770172,
      "grad_norm": 0.19496552646160126,
      "learning_rate": 3.113603859753768e-05,
      "loss": 0.5267,
      "step": 8926
    },
    {
      "epoch": 1.8351320793503958,
      "grad_norm": 0.19820396602153778,
      "learning_rate": 3.112653667489067e-05,
      "loss": 0.5187,
      "step": 8927
    },
    {
      "epoch": 1.8353376503237744,
      "grad_norm": 0.16340111196041107,
      "learning_rate": 3.111703543572436e-05,
      "loss": 0.5077,
      "step": 8928
    },
    {
      "epoch": 1.835543221297153,
      "grad_norm": 0.17557556927204132,
      "learning_rate": 3.110753488050682e-05,
      "loss": 0.5238,
      "step": 8929
    },
    {
      "epoch": 1.8357487922705316,
      "grad_norm": 0.2018451690673828,
      "learning_rate": 3.10980350097061e-05,
      "loss": 0.5438,
      "step": 8930
    },
    {
      "epoch": 1.83595436324391,
      "grad_norm": 0.18654681742191315,
      "learning_rate": 3.108853582379023e-05,
      "loss": 0.5106,
      "step": 8931
    },
    {
      "epoch": 1.8361599342172885,
      "grad_norm": 0.18761597573757172,
      "learning_rate": 3.1079037323227176e-05,
      "loss": 0.5089,
      "step": 8932
    },
    {
      "epoch": 1.836365505190667,
      "grad_norm": 0.19022609293460846,
      "learning_rate": 3.1069539508484894e-05,
      "loss": 0.5145,
      "step": 8933
    },
    {
      "epoch": 1.8365710761640457,
      "grad_norm": 0.18628591299057007,
      "learning_rate": 3.106004238003128e-05,
      "loss": 0.5176,
      "step": 8934
    },
    {
      "epoch": 1.836776647137424,
      "grad_norm": 0.19824586808681488,
      "learning_rate": 3.105054593833422e-05,
      "loss": 0.522,
      "step": 8935
    },
    {
      "epoch": 1.8369822181108026,
      "grad_norm": 0.16452063620090485,
      "learning_rate": 3.1041050183861545e-05,
      "loss": 0.5126,
      "step": 8936
    },
    {
      "epoch": 1.8371877890841812,
      "grad_norm": 0.1635677069425583,
      "learning_rate": 3.103155511708111e-05,
      "loss": 0.5449,
      "step": 8937
    },
    {
      "epoch": 1.8373933600575598,
      "grad_norm": 0.17322902381420135,
      "learning_rate": 3.1022060738460663e-05,
      "loss": 0.5107,
      "step": 8938
    },
    {
      "epoch": 1.8375989310309384,
      "grad_norm": 0.17800618708133698,
      "learning_rate": 3.101256704846794e-05,
      "loss": 0.5426,
      "step": 8939
    },
    {
      "epoch": 1.837804502004317,
      "grad_norm": 0.1971377432346344,
      "learning_rate": 3.100307404757067e-05,
      "loss": 0.5059,
      "step": 8940
    },
    {
      "epoch": 1.8380100729776956,
      "grad_norm": 0.18664588034152985,
      "learning_rate": 3.099358173623652e-05,
      "loss": 0.5143,
      "step": 8941
    },
    {
      "epoch": 1.8382156439510742,
      "grad_norm": 0.1906706988811493,
      "learning_rate": 3.0984090114933135e-05,
      "loss": 0.5337,
      "step": 8942
    },
    {
      "epoch": 1.8384212149244528,
      "grad_norm": 0.19608831405639648,
      "learning_rate": 3.09745991841281e-05,
      "loss": 0.5284,
      "step": 8943
    },
    {
      "epoch": 1.8386267858978314,
      "grad_norm": 0.19849687814712524,
      "learning_rate": 3.096510894428902e-05,
      "loss": 0.5223,
      "step": 8944
    },
    {
      "epoch": 1.83883235687121,
      "grad_norm": 0.19968105852603912,
      "learning_rate": 3.095561939588344e-05,
      "loss": 0.5307,
      "step": 8945
    },
    {
      "epoch": 1.8390379278445883,
      "grad_norm": 0.17240165174007416,
      "learning_rate": 3.094613053937883e-05,
      "loss": 0.5226,
      "step": 8946
    },
    {
      "epoch": 1.839243498817967,
      "grad_norm": 0.16927485167980194,
      "learning_rate": 3.0936642375242697e-05,
      "loss": 0.5411,
      "step": 8947
    },
    {
      "epoch": 1.8394490697913455,
      "grad_norm": 0.19402731955051422,
      "learning_rate": 3.092715490394245e-05,
      "loss": 0.5159,
      "step": 8948
    },
    {
      "epoch": 1.839654640764724,
      "grad_norm": 0.16462527215480804,
      "learning_rate": 3.091766812594551e-05,
      "loss": 0.5177,
      "step": 8949
    },
    {
      "epoch": 1.8398602117381024,
      "grad_norm": 0.16749082505702972,
      "learning_rate": 3.0908182041719226e-05,
      "loss": 0.5446,
      "step": 8950
    },
    {
      "epoch": 1.840065782711481,
      "grad_norm": 0.1971343457698822,
      "learning_rate": 3.089869665173095e-05,
      "loss": 0.5092,
      "step": 8951
    },
    {
      "epoch": 1.8402713536848596,
      "grad_norm": 0.20525288581848145,
      "learning_rate": 3.0889211956447994e-05,
      "loss": 0.5572,
      "step": 8952
    },
    {
      "epoch": 1.8404769246582382,
      "grad_norm": 0.20502051711082458,
      "learning_rate": 3.0879727956337605e-05,
      "loss": 0.5269,
      "step": 8953
    },
    {
      "epoch": 1.8406824956316168,
      "grad_norm": 0.20041027665138245,
      "learning_rate": 3.087024465186704e-05,
      "loss": 0.5216,
      "step": 8954
    },
    {
      "epoch": 1.8408880666049954,
      "grad_norm": 0.1678602546453476,
      "learning_rate": 3.086076204350346e-05,
      "loss": 0.4868,
      "step": 8955
    },
    {
      "epoch": 1.841093637578374,
      "grad_norm": 0.1751408874988556,
      "learning_rate": 3.085128013171403e-05,
      "loss": 0.549,
      "step": 8956
    },
    {
      "epoch": 1.8412992085517526,
      "grad_norm": 0.19476006925106049,
      "learning_rate": 3.084179891696592e-05,
      "loss": 0.5015,
      "step": 8957
    },
    {
      "epoch": 1.8415047795251311,
      "grad_norm": 0.20983824133872986,
      "learning_rate": 3.083231839972621e-05,
      "loss": 0.5351,
      "step": 8958
    },
    {
      "epoch": 1.8417103504985097,
      "grad_norm": 0.19999557733535767,
      "learning_rate": 3.082283858046194e-05,
      "loss": 0.5268,
      "step": 8959
    },
    {
      "epoch": 1.8419159214718883,
      "grad_norm": 0.2033097892999649,
      "learning_rate": 3.081335945964014e-05,
      "loss": 0.5243,
      "step": 8960
    },
    {
      "epoch": 1.8421214924452667,
      "grad_norm": 0.19662059843540192,
      "learning_rate": 3.080388103772783e-05,
      "loss": 0.5197,
      "step": 8961
    },
    {
      "epoch": 1.8423270634186453,
      "grad_norm": 0.19115544855594635,
      "learning_rate": 3.079440331519194e-05,
      "loss": 0.5119,
      "step": 8962
    },
    {
      "epoch": 1.8425326343920239,
      "grad_norm": 0.19383569061756134,
      "learning_rate": 3.078492629249939e-05,
      "loss": 0.5221,
      "step": 8963
    },
    {
      "epoch": 1.8427382053654024,
      "grad_norm": 0.19358788430690765,
      "learning_rate": 3.077544997011709e-05,
      "loss": 0.5366,
      "step": 8964
    },
    {
      "epoch": 1.8429437763387808,
      "grad_norm": 0.19317568838596344,
      "learning_rate": 3.0765974348511895e-05,
      "loss": 0.5127,
      "step": 8965
    },
    {
      "epoch": 1.8431493473121594,
      "grad_norm": 0.19126683473587036,
      "learning_rate": 3.075649942815061e-05,
      "loss": 0.5027,
      "step": 8966
    },
    {
      "epoch": 1.843354918285538,
      "grad_norm": 0.2007630318403244,
      "learning_rate": 3.0747025209500024e-05,
      "loss": 0.5352,
      "step": 8967
    },
    {
      "epoch": 1.8435604892589166,
      "grad_norm": 0.1700150966644287,
      "learning_rate": 3.073755169302689e-05,
      "loss": 0.4973,
      "step": 8968
    },
    {
      "epoch": 1.8437660602322952,
      "grad_norm": 0.1250450313091278,
      "learning_rate": 3.0728078879197913e-05,
      "loss": 0.5154,
      "step": 8969
    },
    {
      "epoch": 1.8439716312056738,
      "grad_norm": 0.16313976049423218,
      "learning_rate": 3.071860676847978e-05,
      "loss": 0.5528,
      "step": 8970
    },
    {
      "epoch": 1.8441772021790523,
      "grad_norm": 0.16907833516597748,
      "learning_rate": 3.070913536133915e-05,
      "loss": 0.5112,
      "step": 8971
    },
    {
      "epoch": 1.844382773152431,
      "grad_norm": 0.11831056326627731,
      "learning_rate": 3.0699664658242614e-05,
      "loss": 0.4941,
      "step": 8972
    },
    {
      "epoch": 1.8445883441258095,
      "grad_norm": 0.15390439331531525,
      "learning_rate": 3.0690194659656774e-05,
      "loss": 0.514,
      "step": 8973
    },
    {
      "epoch": 1.844793915099188,
      "grad_norm": 0.16013920307159424,
      "learning_rate": 3.0680725366048155e-05,
      "loss": 0.4877,
      "step": 8974
    },
    {
      "epoch": 1.8449994860725667,
      "grad_norm": 0.1253557801246643,
      "learning_rate": 3.067125677788327e-05,
      "loss": 0.5135,
      "step": 8975
    },
    {
      "epoch": 1.845205057045945,
      "grad_norm": 0.16094715893268585,
      "learning_rate": 3.0661788895628595e-05,
      "loss": 0.533,
      "step": 8976
    },
    {
      "epoch": 1.8454106280193237,
      "grad_norm": 0.190200075507164,
      "learning_rate": 3.065232171975054e-05,
      "loss": 0.5279,
      "step": 8977
    },
    {
      "epoch": 1.8456161989927022,
      "grad_norm": 0.20202942192554474,
      "learning_rate": 3.064285525071556e-05,
      "loss": 0.5234,
      "step": 8978
    },
    {
      "epoch": 1.8458217699660808,
      "grad_norm": 0.20393583178520203,
      "learning_rate": 3.063338948898999e-05,
      "loss": 0.5437,
      "step": 8979
    },
    {
      "epoch": 1.8460273409394592,
      "grad_norm": 0.19702088832855225,
      "learning_rate": 3.062392443504017e-05,
      "loss": 0.5375,
      "step": 8980
    },
    {
      "epoch": 1.8462329119128378,
      "grad_norm": 0.19736789166927338,
      "learning_rate": 3.061446008933239e-05,
      "loss": 0.5485,
      "step": 8981
    },
    {
      "epoch": 1.8464384828862164,
      "grad_norm": 0.19195613265037537,
      "learning_rate": 3.060499645233294e-05,
      "loss": 0.5325,
      "step": 8982
    },
    {
      "epoch": 1.846644053859595,
      "grad_norm": 0.19624346494674683,
      "learning_rate": 3.059553352450803e-05,
      "loss": 0.5307,
      "step": 8983
    },
    {
      "epoch": 1.8468496248329735,
      "grad_norm": 0.17116032540798187,
      "learning_rate": 3.058607130632383e-05,
      "loss": 0.4922,
      "step": 8984
    },
    {
      "epoch": 1.8470551958063521,
      "grad_norm": 0.1617693156003952,
      "learning_rate": 3.057660979824655e-05,
      "loss": 0.5326,
      "step": 8985
    },
    {
      "epoch": 1.8472607667797307,
      "grad_norm": 0.19013215601444244,
      "learning_rate": 3.05671490007423e-05,
      "loss": 0.5203,
      "step": 8986
    },
    {
      "epoch": 1.8474663377531093,
      "grad_norm": 0.1693909615278244,
      "learning_rate": 3.055768891427715e-05,
      "loss": 0.5208,
      "step": 8987
    },
    {
      "epoch": 1.847671908726488,
      "grad_norm": 0.1591087430715561,
      "learning_rate": 3.054822953931716e-05,
      "loss": 0.5186,
      "step": 8988
    },
    {
      "epoch": 1.8478774796998665,
      "grad_norm": 0.19975587725639343,
      "learning_rate": 3.0538770876328365e-05,
      "loss": 0.5238,
      "step": 8989
    },
    {
      "epoch": 1.848083050673245,
      "grad_norm": 0.21245107054710388,
      "learning_rate": 3.052931292577673e-05,
      "loss": 0.5405,
      "step": 8990
    },
    {
      "epoch": 1.8482886216466234,
      "grad_norm": 0.19569487869739532,
      "learning_rate": 3.051985568812819e-05,
      "loss": 0.5452,
      "step": 8991
    },
    {
      "epoch": 1.848494192620002,
      "grad_norm": 0.19539184868335724,
      "learning_rate": 3.0510399163848704e-05,
      "loss": 0.518,
      "step": 8992
    },
    {
      "epoch": 1.8486997635933806,
      "grad_norm": 0.1992693692445755,
      "learning_rate": 3.0500943353404117e-05,
      "loss": 0.5521,
      "step": 8993
    },
    {
      "epoch": 1.8489053345667592,
      "grad_norm": 0.1907494217157364,
      "learning_rate": 3.0491488257260293e-05,
      "loss": 0.5105,
      "step": 8994
    },
    {
      "epoch": 1.8491109055401376,
      "grad_norm": 0.1949932873249054,
      "learning_rate": 3.0482033875883026e-05,
      "loss": 0.5214,
      "step": 8995
    },
    {
      "epoch": 1.8493164765135162,
      "grad_norm": 0.19968056678771973,
      "learning_rate": 3.0472580209738096e-05,
      "loss": 0.5388,
      "step": 8996
    },
    {
      "epoch": 1.8495220474868947,
      "grad_norm": 0.1978997439146042,
      "learning_rate": 3.0463127259291236e-05,
      "loss": 0.5319,
      "step": 8997
    },
    {
      "epoch": 1.8497276184602733,
      "grad_norm": 0.2019454538822174,
      "learning_rate": 3.0453675025008134e-05,
      "loss": 0.532,
      "step": 8998
    },
    {
      "epoch": 1.849933189433652,
      "grad_norm": 0.20041054487228394,
      "learning_rate": 3.0444223507354492e-05,
      "loss": 0.5036,
      "step": 8999
    },
    {
      "epoch": 1.8501387604070305,
      "grad_norm": 0.20030958950519562,
      "learning_rate": 3.0434772706795925e-05,
      "loss": 0.5458,
      "step": 9000
    },
    {
      "epoch": 1.850344331380409,
      "grad_norm": 0.16151051223278046,
      "learning_rate": 3.042532262379803e-05,
      "loss": 0.5085,
      "step": 9001
    },
    {
      "epoch": 1.8505499023537877,
      "grad_norm": 0.15830279886722565,
      "learning_rate": 3.0415873258826368e-05,
      "loss": 0.536,
      "step": 9002
    },
    {
      "epoch": 1.8507554733271663,
      "grad_norm": 0.19460676610469818,
      "learning_rate": 3.040642461234645e-05,
      "loss": 0.5357,
      "step": 9003
    },
    {
      "epoch": 1.8509610443005449,
      "grad_norm": 0.1874227076768875,
      "learning_rate": 3.0396976684823795e-05,
      "loss": 0.5028,
      "step": 9004
    },
    {
      "epoch": 1.8511666152739235,
      "grad_norm": 0.19529518485069275,
      "learning_rate": 3.0387529476723823e-05,
      "loss": 0.548,
      "step": 9005
    },
    {
      "epoch": 1.851372186247302,
      "grad_norm": 0.19249314069747925,
      "learning_rate": 3.0378082988511997e-05,
      "loss": 0.4975,
      "step": 9006
    },
    {
      "epoch": 1.8515777572206804,
      "grad_norm": 0.15955211222171783,
      "learning_rate": 3.0368637220653672e-05,
      "loss": 0.4934,
      "step": 9007
    },
    {
      "epoch": 1.851783328194059,
      "grad_norm": 0.16296181082725525,
      "learning_rate": 3.0359192173614212e-05,
      "loss": 0.5391,
      "step": 9008
    },
    {
      "epoch": 1.8519888991674376,
      "grad_norm": 0.18920543789863586,
      "learning_rate": 3.0349747847858923e-05,
      "loss": 0.5126,
      "step": 9009
    },
    {
      "epoch": 1.852194470140816,
      "grad_norm": 0.20026175677776337,
      "learning_rate": 3.0340304243853077e-05,
      "loss": 0.5336,
      "step": 9010
    },
    {
      "epoch": 1.8524000411141945,
      "grad_norm": 0.1883440762758255,
      "learning_rate": 3.0330861362061927e-05,
      "loss": 0.4898,
      "step": 9011
    },
    {
      "epoch": 1.8526056120875731,
      "grad_norm": 0.19095604121685028,
      "learning_rate": 3.0321419202950652e-05,
      "loss": 0.5055,
      "step": 9012
    },
    {
      "epoch": 1.8528111830609517,
      "grad_norm": 0.19625356793403625,
      "learning_rate": 3.0311977766984462e-05,
      "loss": 0.5161,
      "step": 9013
    },
    {
      "epoch": 1.8530167540343303,
      "grad_norm": 0.19662852585315704,
      "learning_rate": 3.0302537054628483e-05,
      "loss": 0.5448,
      "step": 9014
    },
    {
      "epoch": 1.853222325007709,
      "grad_norm": 0.20150268077850342,
      "learning_rate": 3.0293097066347794e-05,
      "loss": 0.503,
      "step": 9015
    },
    {
      "epoch": 1.8534278959810875,
      "grad_norm": 0.20207509398460388,
      "learning_rate": 3.0283657802607484e-05,
      "loss": 0.5437,
      "step": 9016
    },
    {
      "epoch": 1.853633466954466,
      "grad_norm": 0.20044514536857605,
      "learning_rate": 3.027421926387257e-05,
      "loss": 0.5406,
      "step": 9017
    },
    {
      "epoch": 1.8538390379278447,
      "grad_norm": 0.2027798891067505,
      "learning_rate": 3.026478145060804e-05,
      "loss": 0.5493,
      "step": 9018
    },
    {
      "epoch": 1.8540446089012232,
      "grad_norm": 0.19402191042900085,
      "learning_rate": 3.025534436327884e-05,
      "loss": 0.5346,
      "step": 9019
    },
    {
      "epoch": 1.8542501798746018,
      "grad_norm": 0.2023455947637558,
      "learning_rate": 3.0245908002349927e-05,
      "loss": 0.5521,
      "step": 9020
    },
    {
      "epoch": 1.8544557508479804,
      "grad_norm": 0.19758723676204681,
      "learning_rate": 3.0236472368286162e-05,
      "loss": 0.5314,
      "step": 9021
    },
    {
      "epoch": 1.8546613218213588,
      "grad_norm": 0.19898824393749237,
      "learning_rate": 3.0227037461552405e-05,
      "loss": 0.5221,
      "step": 9022
    },
    {
      "epoch": 1.8548668927947374,
      "grad_norm": 0.18745093047618866,
      "learning_rate": 3.021760328261346e-05,
      "loss": 0.5196,
      "step": 9023
    },
    {
      "epoch": 1.855072463768116,
      "grad_norm": 0.1959155946969986,
      "learning_rate": 3.0208169831934095e-05,
      "loss": 0.515,
      "step": 9024
    },
    {
      "epoch": 1.8552780347414946,
      "grad_norm": 0.19238829612731934,
      "learning_rate": 3.0198737109979084e-05,
      "loss": 0.5023,
      "step": 9025
    },
    {
      "epoch": 1.855483605714873,
      "grad_norm": 0.19325855374336243,
      "learning_rate": 3.01893051172131e-05,
      "loss": 0.5261,
      "step": 9026
    },
    {
      "epoch": 1.8556891766882515,
      "grad_norm": 0.1941802203655243,
      "learning_rate": 3.017987385410083e-05,
      "loss": 0.5381,
      "step": 9027
    },
    {
      "epoch": 1.85589474766163,
      "grad_norm": 0.19294960796833038,
      "learning_rate": 3.0170443321106913e-05,
      "loss": 0.5493,
      "step": 9028
    },
    {
      "epoch": 1.8561003186350087,
      "grad_norm": 0.20181645452976227,
      "learning_rate": 3.0161013518695943e-05,
      "loss": 0.5268,
      "step": 9029
    },
    {
      "epoch": 1.8563058896083873,
      "grad_norm": 0.1975722759962082,
      "learning_rate": 3.0151584447332476e-05,
      "loss": 0.5342,
      "step": 9030
    },
    {
      "epoch": 1.8565114605817659,
      "grad_norm": 0.20087282359600067,
      "learning_rate": 3.0142156107481048e-05,
      "loss": 0.522,
      "step": 9031
    },
    {
      "epoch": 1.8567170315551444,
      "grad_norm": 0.19749726355075836,
      "learning_rate": 3.013272849960612e-05,
      "loss": 0.5077,
      "step": 9032
    },
    {
      "epoch": 1.856922602528523,
      "grad_norm": 0.19727249443531036,
      "learning_rate": 3.0123301624172185e-05,
      "loss": 0.5261,
      "step": 9033
    },
    {
      "epoch": 1.8571281735019016,
      "grad_norm": 0.2018827497959137,
      "learning_rate": 3.0113875481643647e-05,
      "loss": 0.5258,
      "step": 9034
    },
    {
      "epoch": 1.8573337444752802,
      "grad_norm": 0.17497631907463074,
      "learning_rate": 3.0104450072484895e-05,
      "loss": 0.525,
      "step": 9035
    },
    {
      "epoch": 1.8575393154486588,
      "grad_norm": 0.16717809438705444,
      "learning_rate": 3.0095025397160248e-05,
      "loss": 0.5311,
      "step": 9036
    },
    {
      "epoch": 1.8577448864220372,
      "grad_norm": 0.19906377792358398,
      "learning_rate": 3.0085601456134044e-05,
      "loss": 0.521,
      "step": 9037
    },
    {
      "epoch": 1.8579504573954158,
      "grad_norm": 0.19669370353221893,
      "learning_rate": 3.0076178249870547e-05,
      "loss": 0.495,
      "step": 9038
    },
    {
      "epoch": 1.8581560283687943,
      "grad_norm": 0.1930094212293625,
      "learning_rate": 3.006675577883398e-05,
      "loss": 0.5243,
      "step": 9039
    },
    {
      "epoch": 1.858361599342173,
      "grad_norm": 0.1566167026758194,
      "learning_rate": 3.0057334043488573e-05,
      "loss": 0.4969,
      "step": 9040
    },
    {
      "epoch": 1.8585671703155513,
      "grad_norm": 0.1628788709640503,
      "learning_rate": 3.0047913044298474e-05,
      "loss": 0.534,
      "step": 9041
    },
    {
      "epoch": 1.8587727412889299,
      "grad_norm": 0.19704844057559967,
      "learning_rate": 3.0038492781727817e-05,
      "loss": 0.5278,
      "step": 9042
    },
    {
      "epoch": 1.8589783122623085,
      "grad_norm": 0.2023809403181076,
      "learning_rate": 3.002907325624069e-05,
      "loss": 0.5197,
      "step": 9043
    },
    {
      "epoch": 1.859183883235687,
      "grad_norm": 0.1649642139673233,
      "learning_rate": 3.0019654468301153e-05,
      "loss": 0.5043,
      "step": 9044
    },
    {
      "epoch": 1.8593894542090657,
      "grad_norm": 0.16661269962787628,
      "learning_rate": 3.001023641837321e-05,
      "loss": 0.5443,
      "step": 9045
    },
    {
      "epoch": 1.8595950251824442,
      "grad_norm": 0.19846822321414948,
      "learning_rate": 3.000081910692085e-05,
      "loss": 0.5275,
      "step": 9046
    },
    {
      "epoch": 1.8598005961558228,
      "grad_norm": 0.1986059993505478,
      "learning_rate": 2.9991402534408043e-05,
      "loss": 0.5404,
      "step": 9047
    },
    {
      "epoch": 1.8600061671292014,
      "grad_norm": 0.20389589667320251,
      "learning_rate": 2.9981986701298672e-05,
      "loss": 0.5433,
      "step": 9048
    },
    {
      "epoch": 1.86021173810258,
      "grad_norm": 0.1978558897972107,
      "learning_rate": 2.9972571608056634e-05,
      "loss": 0.5279,
      "step": 9049
    },
    {
      "epoch": 1.8604173090759586,
      "grad_norm": 0.19354282319545746,
      "learning_rate": 2.996315725514575e-05,
      "loss": 0.5127,
      "step": 9050
    },
    {
      "epoch": 1.8606228800493372,
      "grad_norm": 0.2036632001399994,
      "learning_rate": 2.995374364302983e-05,
      "loss": 0.5386,
      "step": 9051
    },
    {
      "epoch": 1.8608284510227155,
      "grad_norm": 0.19390377402305603,
      "learning_rate": 2.9944330772172635e-05,
      "loss": 0.5256,
      "step": 9052
    },
    {
      "epoch": 1.8610340219960941,
      "grad_norm": 0.16378752887248993,
      "learning_rate": 2.9934918643037872e-05,
      "loss": 0.523,
      "step": 9053
    },
    {
      "epoch": 1.8612395929694727,
      "grad_norm": 0.28664878010749817,
      "learning_rate": 2.9925507256089277e-05,
      "loss": 0.5099,
      "step": 9054
    },
    {
      "epoch": 1.8614451639428513,
      "grad_norm": 0.16860786080360413,
      "learning_rate": 2.9916096611790473e-05,
      "loss": 0.5528,
      "step": 9055
    },
    {
      "epoch": 1.8616507349162297,
      "grad_norm": 0.1950398087501526,
      "learning_rate": 2.990668671060509e-05,
      "loss": 0.5442,
      "step": 9056
    },
    {
      "epoch": 1.8618563058896083,
      "grad_norm": 0.1937715858221054,
      "learning_rate": 2.98972775529967e-05,
      "loss": 0.5393,
      "step": 9057
    },
    {
      "epoch": 1.8620618768629869,
      "grad_norm": 0.1997014582157135,
      "learning_rate": 2.988786913942886e-05,
      "loss": 0.5328,
      "step": 9058
    },
    {
      "epoch": 1.8622674478363654,
      "grad_norm": 0.19975896179676056,
      "learning_rate": 2.9878461470365082e-05,
      "loss": 0.5348,
      "step": 9059
    },
    {
      "epoch": 1.862473018809744,
      "grad_norm": 0.15818095207214355,
      "learning_rate": 2.986905454626881e-05,
      "loss": 0.4831,
      "step": 9060
    },
    {
      "epoch": 1.8626785897831226,
      "grad_norm": 0.1348692923784256,
      "learning_rate": 2.9859648367603506e-05,
      "loss": 0.4821,
      "step": 9061
    },
    {
      "epoch": 1.8628841607565012,
      "grad_norm": 0.1564972698688507,
      "learning_rate": 2.9850242934832573e-05,
      "loss": 0.5206,
      "step": 9062
    },
    {
      "epoch": 1.8630897317298798,
      "grad_norm": 0.19233213365077972,
      "learning_rate": 2.9840838248419352e-05,
      "loss": 0.5317,
      "step": 9063
    },
    {
      "epoch": 1.8632953027032584,
      "grad_norm": 0.20065823197364807,
      "learning_rate": 2.983143430882718e-05,
      "loss": 0.5267,
      "step": 9064
    },
    {
      "epoch": 1.863500873676637,
      "grad_norm": 0.20056359469890594,
      "learning_rate": 2.9822031116519345e-05,
      "loss": 0.5365,
      "step": 9065
    },
    {
      "epoch": 1.8637064446500156,
      "grad_norm": 0.2093392014503479,
      "learning_rate": 2.9812628671959084e-05,
      "loss": 0.53,
      "step": 9066
    },
    {
      "epoch": 1.863912015623394,
      "grad_norm": 0.20502184331417084,
      "learning_rate": 2.9803226975609622e-05,
      "loss": 0.5227,
      "step": 9067
    },
    {
      "epoch": 1.8641175865967725,
      "grad_norm": 0.19816361367702484,
      "learning_rate": 2.9793826027934147e-05,
      "loss": 0.5054,
      "step": 9068
    },
    {
      "epoch": 1.864323157570151,
      "grad_norm": 0.2030927836894989,
      "learning_rate": 2.9784425829395777e-05,
      "loss": 0.5327,
      "step": 9069
    },
    {
      "epoch": 1.8645287285435297,
      "grad_norm": 0.20948849618434906,
      "learning_rate": 2.9775026380457645e-05,
      "loss": 0.5415,
      "step": 9070
    },
    {
      "epoch": 1.864734299516908,
      "grad_norm": 0.19347041845321655,
      "learning_rate": 2.97656276815828e-05,
      "loss": 0.5216,
      "step": 9071
    },
    {
      "epoch": 1.8649398704902866,
      "grad_norm": 0.16669385135173798,
      "learning_rate": 2.975622973323427e-05,
      "loss": 0.4946,
      "step": 9072
    },
    {
      "epoch": 1.8651454414636652,
      "grad_norm": 0.1577424257993698,
      "learning_rate": 2.9746832535875054e-05,
      "loss": 0.5255,
      "step": 9073
    },
    {
      "epoch": 1.8653510124370438,
      "grad_norm": 0.19290731847286224,
      "learning_rate": 2.973743608996809e-05,
      "loss": 0.5162,
      "step": 9074
    },
    {
      "epoch": 1.8655565834104224,
      "grad_norm": 0.22282882034778595,
      "learning_rate": 2.9728040395976326e-05,
      "loss": 0.5466,
      "step": 9075
    },
    {
      "epoch": 1.865762154383801,
      "grad_norm": 0.19410258531570435,
      "learning_rate": 2.9718645454362635e-05,
      "loss": 0.5002,
      "step": 9076
    },
    {
      "epoch": 1.8659677253571796,
      "grad_norm": 0.199889674782753,
      "learning_rate": 2.9709251265589857e-05,
      "loss": 0.5468,
      "step": 9077
    },
    {
      "epoch": 1.8661732963305582,
      "grad_norm": 0.19312036037445068,
      "learning_rate": 2.969985783012079e-05,
      "loss": 0.5278,
      "step": 9078
    },
    {
      "epoch": 1.8663788673039368,
      "grad_norm": 0.1675236076116562,
      "learning_rate": 2.9690465148418225e-05,
      "loss": 0.5274,
      "step": 9079
    },
    {
      "epoch": 1.8665844382773153,
      "grad_norm": 0.1646019071340561,
      "learning_rate": 2.9681073220944887e-05,
      "loss": 0.5227,
      "step": 9080
    },
    {
      "epoch": 1.866790009250694,
      "grad_norm": 0.19794802367687225,
      "learning_rate": 2.9671682048163452e-05,
      "loss": 0.5234,
      "step": 9081
    },
    {
      "epoch": 1.8669955802240725,
      "grad_norm": 0.19309046864509583,
      "learning_rate": 2.9662291630536612e-05,
      "loss": 0.5235,
      "step": 9082
    },
    {
      "epoch": 1.867201151197451,
      "grad_norm": 0.19643832743167877,
      "learning_rate": 2.965290196852698e-05,
      "loss": 0.5161,
      "step": 9083
    },
    {
      "epoch": 1.8674067221708295,
      "grad_norm": 0.19640390574932098,
      "learning_rate": 2.964351306259713e-05,
      "loss": 0.5374,
      "step": 9084
    },
    {
      "epoch": 1.867612293144208,
      "grad_norm": 0.2005051225423813,
      "learning_rate": 2.9634124913209623e-05,
      "loss": 0.5183,
      "step": 9085
    },
    {
      "epoch": 1.8678178641175864,
      "grad_norm": 0.20132414996623993,
      "learning_rate": 2.9624737520826958e-05,
      "loss": 0.5101,
      "step": 9086
    },
    {
      "epoch": 1.868023435090965,
      "grad_norm": 0.19228623807430267,
      "learning_rate": 2.9615350885911618e-05,
      "loss": 0.5274,
      "step": 9087
    },
    {
      "epoch": 1.8682290060643436,
      "grad_norm": 0.16477905213832855,
      "learning_rate": 2.9605965008926004e-05,
      "loss": 0.4958,
      "step": 9088
    },
    {
      "epoch": 1.8684345770377222,
      "grad_norm": 0.16282616555690765,
      "learning_rate": 2.9596579890332563e-05,
      "loss": 0.5274,
      "step": 9089
    },
    {
      "epoch": 1.8686401480111008,
      "grad_norm": 0.19304989278316498,
      "learning_rate": 2.958719553059363e-05,
      "loss": 0.5014,
      "step": 9090
    },
    {
      "epoch": 1.8688457189844794,
      "grad_norm": 0.16735190153121948,
      "learning_rate": 2.957781193017154e-05,
      "loss": 0.513,
      "step": 9091
    },
    {
      "epoch": 1.869051289957858,
      "grad_norm": 0.11929447948932648,
      "learning_rate": 2.9568429089528573e-05,
      "loss": 0.502,
      "step": 9092
    },
    {
      "epoch": 1.8692568609312366,
      "grad_norm": 0.16071327030658722,
      "learning_rate": 2.955904700912698e-05,
      "loss": 0.5299,
      "step": 9093
    },
    {
      "epoch": 1.8694624319046151,
      "grad_norm": 0.20334213972091675,
      "learning_rate": 2.954966568942897e-05,
      "loss": 0.534,
      "step": 9094
    },
    {
      "epoch": 1.8696680028779937,
      "grad_norm": 0.19918020069599152,
      "learning_rate": 2.9540285130896692e-05,
      "loss": 0.5146,
      "step": 9095
    },
    {
      "epoch": 1.8698735738513723,
      "grad_norm": 0.16279511153697968,
      "learning_rate": 2.9530905333992337e-05,
      "loss": 0.4845,
      "step": 9096
    },
    {
      "epoch": 1.870079144824751,
      "grad_norm": 0.16142159700393677,
      "learning_rate": 2.9521526299177962e-05,
      "loss": 0.5215,
      "step": 9097
    },
    {
      "epoch": 1.8702847157981293,
      "grad_norm": 0.20098471641540527,
      "learning_rate": 2.951214802691565e-05,
      "loss": 0.5314,
      "step": 9098
    },
    {
      "epoch": 1.8704902867715079,
      "grad_norm": 0.19500714540481567,
      "learning_rate": 2.950277051766741e-05,
      "loss": 0.5257,
      "step": 9099
    },
    {
      "epoch": 1.8706958577448864,
      "grad_norm": 0.19438640773296356,
      "learning_rate": 2.949339377189522e-05,
      "loss": 0.5412,
      "step": 9100
    },
    {
      "epoch": 1.870901428718265,
      "grad_norm": 0.16201166808605194,
      "learning_rate": 2.9484017790061058e-05,
      "loss": 0.4902,
      "step": 9101
    },
    {
      "epoch": 1.8711069996916434,
      "grad_norm": 0.15816009044647217,
      "learning_rate": 2.9474642572626804e-05,
      "loss": 0.5344,
      "step": 9102
    },
    {
      "epoch": 1.871312570665022,
      "grad_norm": 0.19865500926971436,
      "learning_rate": 2.9465268120054347e-05,
      "loss": 0.5286,
      "step": 9103
    },
    {
      "epoch": 1.8715181416384006,
      "grad_norm": 0.19558370113372803,
      "learning_rate": 2.945589443280553e-05,
      "loss": 0.5003,
      "step": 9104
    },
    {
      "epoch": 1.8717237126117792,
      "grad_norm": 0.19835351407527924,
      "learning_rate": 2.944652151134214e-05,
      "loss": 0.5106,
      "step": 9105
    },
    {
      "epoch": 1.8719292835851578,
      "grad_norm": 0.1939878761768341,
      "learning_rate": 2.9437149356125937e-05,
      "loss": 0.5295,
      "step": 9106
    },
    {
      "epoch": 1.8721348545585363,
      "grad_norm": 0.18890373408794403,
      "learning_rate": 2.9427777967618645e-05,
      "loss": 0.5072,
      "step": 9107
    },
    {
      "epoch": 1.872340425531915,
      "grad_norm": 0.20133374631404877,
      "learning_rate": 2.9418407346281948e-05,
      "loss": 0.5436,
      "step": 9108
    },
    {
      "epoch": 1.8725459965052935,
      "grad_norm": 0.19078055024147034,
      "learning_rate": 2.940903749257748e-05,
      "loss": 0.4905,
      "step": 9109
    },
    {
      "epoch": 1.872751567478672,
      "grad_norm": 0.1644534170627594,
      "learning_rate": 2.9399668406966874e-05,
      "loss": 0.5029,
      "step": 9110
    },
    {
      "epoch": 1.8729571384520507,
      "grad_norm": 0.1681102067232132,
      "learning_rate": 2.9390300089911696e-05,
      "loss": 0.54,
      "step": 9111
    },
    {
      "epoch": 1.8731627094254293,
      "grad_norm": 0.16144701838493347,
      "learning_rate": 2.938093254187346e-05,
      "loss": 0.5035,
      "step": 9112
    },
    {
      "epoch": 1.8733682803988077,
      "grad_norm": 0.16440553963184357,
      "learning_rate": 2.937156576331368e-05,
      "loss": 0.5317,
      "step": 9113
    },
    {
      "epoch": 1.8735738513721862,
      "grad_norm": 0.19571755826473236,
      "learning_rate": 2.936219975469382e-05,
      "loss": 0.5346,
      "step": 9114
    },
    {
      "epoch": 1.8737794223455648,
      "grad_norm": 0.6120141744613647,
      "learning_rate": 2.9352834516475254e-05,
      "loss": 0.5264,
      "step": 9115
    },
    {
      "epoch": 1.8739849933189434,
      "grad_norm": 0.1942664086818695,
      "learning_rate": 2.9343470049119426e-05,
      "loss": 0.5409,
      "step": 9116
    },
    {
      "epoch": 1.8741905642923218,
      "grad_norm": 0.19954350590705872,
      "learning_rate": 2.9334106353087646e-05,
      "loss": 0.5159,
      "step": 9117
    },
    {
      "epoch": 1.8743961352657004,
      "grad_norm": 0.156526118516922,
      "learning_rate": 2.9324743428841223e-05,
      "loss": 0.4767,
      "step": 9118
    },
    {
      "epoch": 1.874601706239079,
      "grad_norm": 0.1730726808309555,
      "learning_rate": 2.9315381276841425e-05,
      "loss": 0.5267,
      "step": 9119
    },
    {
      "epoch": 1.8748072772124575,
      "grad_norm": 0.20543117821216583,
      "learning_rate": 2.9306019897549483e-05,
      "loss": 0.5323,
      "step": 9120
    },
    {
      "epoch": 1.8750128481858361,
      "grad_norm": 0.17114831507205963,
      "learning_rate": 2.9296659291426576e-05,
      "loss": 0.5179,
      "step": 9121
    },
    {
      "epoch": 1.8752184191592147,
      "grad_norm": 0.16466084122657776,
      "learning_rate": 2.928729945893387e-05,
      "loss": 0.5224,
      "step": 9122
    },
    {
      "epoch": 1.8754239901325933,
      "grad_norm": 0.19490589201450348,
      "learning_rate": 2.927794040053249e-05,
      "loss": 0.5288,
      "step": 9123
    },
    {
      "epoch": 1.875629561105972,
      "grad_norm": 0.165072500705719,
      "learning_rate": 2.926858211668349e-05,
      "loss": 0.4984,
      "step": 9124
    },
    {
      "epoch": 1.8758351320793505,
      "grad_norm": 0.1546640247106552,
      "learning_rate": 2.9259224607847928e-05,
      "loss": 0.544,
      "step": 9125
    },
    {
      "epoch": 1.876040703052729,
      "grad_norm": 0.18832944333553314,
      "learning_rate": 2.9249867874486802e-05,
      "loss": 0.5269,
      "step": 9126
    },
    {
      "epoch": 1.8762462740261077,
      "grad_norm": 0.19997960329055786,
      "learning_rate": 2.924051191706107e-05,
      "loss": 0.539,
      "step": 9127
    },
    {
      "epoch": 1.876451844999486,
      "grad_norm": 0.21653611958026886,
      "learning_rate": 2.9231156736031653e-05,
      "loss": 0.5414,
      "step": 9128
    },
    {
      "epoch": 1.8766574159728646,
      "grad_norm": 0.1963309794664383,
      "learning_rate": 2.922180233185942e-05,
      "loss": 0.5308,
      "step": 9129
    },
    {
      "epoch": 1.8768629869462432,
      "grad_norm": 0.19767159223556519,
      "learning_rate": 2.921244870500526e-05,
      "loss": 0.5479,
      "step": 9130
    },
    {
      "epoch": 1.8770685579196218,
      "grad_norm": 0.19611725211143494,
      "learning_rate": 2.9203095855929962e-05,
      "loss": 0.5213,
      "step": 9131
    },
    {
      "epoch": 1.8772741288930002,
      "grad_norm": 0.19497352838516235,
      "learning_rate": 2.91937437850943e-05,
      "loss": 0.5212,
      "step": 9132
    },
    {
      "epoch": 1.8774796998663787,
      "grad_norm": 0.19085940718650818,
      "learning_rate": 2.918439249295899e-05,
      "loss": 0.532,
      "step": 9133
    },
    {
      "epoch": 1.8776852708397573,
      "grad_norm": 0.1957186907529831,
      "learning_rate": 2.917504197998475e-05,
      "loss": 0.5046,
      "step": 9134
    },
    {
      "epoch": 1.877890841813136,
      "grad_norm": 0.17413191497325897,
      "learning_rate": 2.916569224663223e-05,
      "loss": 0.5181,
      "step": 9135
    },
    {
      "epoch": 1.8780964127865145,
      "grad_norm": 0.16276034712791443,
      "learning_rate": 2.9156343293362013e-05,
      "loss": 0.5378,
      "step": 9136
    },
    {
      "epoch": 1.878301983759893,
      "grad_norm": 0.19842852652072906,
      "learning_rate": 2.914699512063474e-05,
      "loss": 0.5238,
      "step": 9137
    },
    {
      "epoch": 1.8785075547332717,
      "grad_norm": 0.20142436027526855,
      "learning_rate": 2.9137647728910915e-05,
      "loss": 0.53,
      "step": 9138
    },
    {
      "epoch": 1.8787131257066503,
      "grad_norm": 0.1989341378211975,
      "learning_rate": 2.9128301118651043e-05,
      "loss": 0.5447,
      "step": 9139
    },
    {
      "epoch": 1.8789186966800289,
      "grad_norm": 0.15917402505874634,
      "learning_rate": 2.9118955290315593e-05,
      "loss": 0.4962,
      "step": 9140
    },
    {
      "epoch": 1.8791242676534075,
      "grad_norm": 0.16550025343894958,
      "learning_rate": 2.9109610244364994e-05,
      "loss": 0.5044,
      "step": 9141
    },
    {
      "epoch": 1.879329838626786,
      "grad_norm": 0.19528605043888092,
      "learning_rate": 2.9100265981259613e-05,
      "loss": 0.52,
      "step": 9142
    },
    {
      "epoch": 1.8795354096001644,
      "grad_norm": 0.15918217599391937,
      "learning_rate": 2.909092250145981e-05,
      "loss": 0.5113,
      "step": 9143
    },
    {
      "epoch": 1.879740980573543,
      "grad_norm": 0.19821493327617645,
      "learning_rate": 2.9081579805425912e-05,
      "loss": 0.5407,
      "step": 9144
    },
    {
      "epoch": 1.8799465515469216,
      "grad_norm": 0.19132786989212036,
      "learning_rate": 2.9072237893618154e-05,
      "loss": 0.5243,
      "step": 9145
    },
    {
      "epoch": 1.8801521225203002,
      "grad_norm": 0.20594055950641632,
      "learning_rate": 2.9062896766496812e-05,
      "loss": 0.5245,
      "step": 9146
    },
    {
      "epoch": 1.8803576934936785,
      "grad_norm": 0.1643124520778656,
      "learning_rate": 2.9053556424522043e-05,
      "loss": 0.5056,
      "step": 9147
    },
    {
      "epoch": 1.8805632644670571,
      "grad_norm": 0.16157592833042145,
      "learning_rate": 2.9044216868154028e-05,
      "loss": 0.5309,
      "step": 9148
    },
    {
      "epoch": 1.8807688354404357,
      "grad_norm": 0.1968626081943512,
      "learning_rate": 2.9034878097852863e-05,
      "loss": 0.5417,
      "step": 9149
    },
    {
      "epoch": 1.8809744064138143,
      "grad_norm": 0.1947098821401596,
      "learning_rate": 2.9025540114078615e-05,
      "loss": 0.5476,
      "step": 9150
    },
    {
      "epoch": 1.881179977387193,
      "grad_norm": 0.1969742625951767,
      "learning_rate": 2.9016202917291363e-05,
      "loss": 0.5182,
      "step": 9151
    },
    {
      "epoch": 1.8813855483605715,
      "grad_norm": 0.199255108833313,
      "learning_rate": 2.9006866507951085e-05,
      "loss": 0.5049,
      "step": 9152
    },
    {
      "epoch": 1.88159111933395,
      "grad_norm": 0.1936180591583252,
      "learning_rate": 2.899753088651774e-05,
      "loss": 0.5345,
      "step": 9153
    },
    {
      "epoch": 1.8817966903073287,
      "grad_norm": 0.1694704294204712,
      "learning_rate": 2.8988196053451242e-05,
      "loss": 0.4989,
      "step": 9154
    },
    {
      "epoch": 1.8820022612807072,
      "grad_norm": 0.16625025868415833,
      "learning_rate": 2.89788620092115e-05,
      "loss": 0.5487,
      "step": 9155
    },
    {
      "epoch": 1.8822078322540858,
      "grad_norm": 0.2028672993183136,
      "learning_rate": 2.8969528754258344e-05,
      "loss": 0.5241,
      "step": 9156
    },
    {
      "epoch": 1.8824134032274644,
      "grad_norm": 0.20288680493831635,
      "learning_rate": 2.896019628905156e-05,
      "loss": 0.5528,
      "step": 9157
    },
    {
      "epoch": 1.8826189742008428,
      "grad_norm": 0.1986982226371765,
      "learning_rate": 2.8950864614050947e-05,
      "loss": 0.5214,
      "step": 9158
    },
    {
      "epoch": 1.8828245451742214,
      "grad_norm": 0.16761691868305206,
      "learning_rate": 2.8941533729716225e-05,
      "loss": 0.5164,
      "step": 9159
    },
    {
      "epoch": 1.8830301161476,
      "grad_norm": 0.17081286013126373,
      "learning_rate": 2.8932203636507085e-05,
      "loss": 0.5364,
      "step": 9160
    },
    {
      "epoch": 1.8832356871209786,
      "grad_norm": 0.19834405183792114,
      "learning_rate": 2.8922874334883166e-05,
      "loss": 0.5329,
      "step": 9161
    },
    {
      "epoch": 1.883441258094357,
      "grad_norm": 0.1979091614484787,
      "learning_rate": 2.8913545825304082e-05,
      "loss": 0.5363,
      "step": 9162
    },
    {
      "epoch": 1.8836468290677355,
      "grad_norm": 0.19581232964992523,
      "learning_rate": 2.8904218108229417e-05,
      "loss": 0.5278,
      "step": 9163
    },
    {
      "epoch": 1.883852400041114,
      "grad_norm": 0.1917477548122406,
      "learning_rate": 2.8894891184118666e-05,
      "loss": 0.5051,
      "step": 9164
    },
    {
      "epoch": 1.8840579710144927,
      "grad_norm": 0.1971418410539627,
      "learning_rate": 2.888556505343137e-05,
      "loss": 0.5333,
      "step": 9165
    },
    {
      "epoch": 1.8842635419878713,
      "grad_norm": 0.19777396321296692,
      "learning_rate": 2.8876239716626963e-05,
      "loss": 0.5299,
      "step": 9166
    },
    {
      "epoch": 1.8844691129612499,
      "grad_norm": 0.20348910987377167,
      "learning_rate": 2.8866915174164866e-05,
      "loss": 0.5299,
      "step": 9167
    },
    {
      "epoch": 1.8846746839346284,
      "grad_norm": 0.16637156903743744,
      "learning_rate": 2.8857591426504452e-05,
      "loss": 0.5043,
      "step": 9168
    },
    {
      "epoch": 1.884880254908007,
      "grad_norm": 0.1301034539937973,
      "learning_rate": 2.8848268474105064e-05,
      "loss": 0.5234,
      "step": 9169
    },
    {
      "epoch": 1.8850858258813856,
      "grad_norm": 0.16645324230194092,
      "learning_rate": 2.8838946317425992e-05,
      "loss": 0.5215,
      "step": 9170
    },
    {
      "epoch": 1.8852913968547642,
      "grad_norm": 0.19991381466388702,
      "learning_rate": 2.882962495692648e-05,
      "loss": 0.5436,
      "step": 9171
    },
    {
      "epoch": 1.8854969678281428,
      "grad_norm": 0.19237017631530762,
      "learning_rate": 2.8820304393065785e-05,
      "loss": 0.5136,
      "step": 9172
    },
    {
      "epoch": 1.8857025388015214,
      "grad_norm": 0.19153942167758942,
      "learning_rate": 2.8810984626303068e-05,
      "loss": 0.5263,
      "step": 9173
    },
    {
      "epoch": 1.8859081097748998,
      "grad_norm": 0.20043647289276123,
      "learning_rate": 2.8801665657097478e-05,
      "loss": 0.5285,
      "step": 9174
    },
    {
      "epoch": 1.8861136807482783,
      "grad_norm": 0.1646028459072113,
      "learning_rate": 2.87923474859081e-05,
      "loss": 0.4842,
      "step": 9175
    },
    {
      "epoch": 1.886319251721657,
      "grad_norm": 0.1615920066833496,
      "learning_rate": 2.8783030113194004e-05,
      "loss": 0.5396,
      "step": 9176
    },
    {
      "epoch": 1.8865248226950353,
      "grad_norm": 0.19795995950698853,
      "learning_rate": 2.8773713539414224e-05,
      "loss": 0.5248,
      "step": 9177
    },
    {
      "epoch": 1.8867303936684139,
      "grad_norm": 0.19956757128238678,
      "learning_rate": 2.8764397765027717e-05,
      "loss": 0.5554,
      "step": 9178
    },
    {
      "epoch": 1.8869359646417925,
      "grad_norm": 0.20267313718795776,
      "learning_rate": 2.8755082790493463e-05,
      "loss": 0.5167,
      "step": 9179
    },
    {
      "epoch": 1.887141535615171,
      "grad_norm": 0.2114168256521225,
      "learning_rate": 2.8745768616270358e-05,
      "loss": 0.5346,
      "step": 9180
    },
    {
      "epoch": 1.8873471065885497,
      "grad_norm": 0.1968614161014557,
      "learning_rate": 2.873645524281726e-05,
      "loss": 0.5359,
      "step": 9181
    },
    {
      "epoch": 1.8875526775619282,
      "grad_norm": 0.19454684853553772,
      "learning_rate": 2.8727142670592992e-05,
      "loss": 0.5156,
      "step": 9182
    },
    {
      "epoch": 1.8877582485353068,
      "grad_norm": 0.19672390818595886,
      "learning_rate": 2.8717830900056353e-05,
      "loss": 0.521,
      "step": 9183
    },
    {
      "epoch": 1.8879638195086854,
      "grad_norm": 0.1960788369178772,
      "learning_rate": 2.8708519931666074e-05,
      "loss": 0.5307,
      "step": 9184
    },
    {
      "epoch": 1.888169390482064,
      "grad_norm": 0.20308300852775574,
      "learning_rate": 2.869920976588086e-05,
      "loss": 0.535,
      "step": 9185
    },
    {
      "epoch": 1.8883749614554426,
      "grad_norm": 0.2008272409439087,
      "learning_rate": 2.86899004031594e-05,
      "loss": 0.5472,
      "step": 9186
    },
    {
      "epoch": 1.8885805324288212,
      "grad_norm": 0.20526312291622162,
      "learning_rate": 2.8680591843960325e-05,
      "loss": 0.5531,
      "step": 9187
    },
    {
      "epoch": 1.8887861034021998,
      "grad_norm": 0.19478276371955872,
      "learning_rate": 2.8671284088742203e-05,
      "loss": 0.5059,
      "step": 9188
    },
    {
      "epoch": 1.8889916743755781,
      "grad_norm": 0.19619226455688477,
      "learning_rate": 2.86619771379636e-05,
      "loss": 0.5273,
      "step": 9189
    },
    {
      "epoch": 1.8891972453489567,
      "grad_norm": 0.1992175430059433,
      "learning_rate": 2.8652670992083012e-05,
      "loss": 0.502,
      "step": 9190
    },
    {
      "epoch": 1.8894028163223353,
      "grad_norm": 0.20220716297626495,
      "learning_rate": 2.864336565155891e-05,
      "loss": 0.5146,
      "step": 9191
    },
    {
      "epoch": 1.889608387295714,
      "grad_norm": 0.2015795111656189,
      "learning_rate": 2.863406111684975e-05,
      "loss": 0.5589,
      "step": 9192
    },
    {
      "epoch": 1.8898139582690923,
      "grad_norm": 0.19961073994636536,
      "learning_rate": 2.86247573884139e-05,
      "loss": 0.5222,
      "step": 9193
    },
    {
      "epoch": 1.8900195292424709,
      "grad_norm": 0.1948811113834381,
      "learning_rate": 2.8615454466709714e-05,
      "loss": 0.5291,
      "step": 9194
    },
    {
      "epoch": 1.8902251002158494,
      "grad_norm": 0.16690470278263092,
      "learning_rate": 2.8606152352195506e-05,
      "loss": 0.4997,
      "step": 9195
    },
    {
      "epoch": 1.890430671189228,
      "grad_norm": 0.12287265807390213,
      "learning_rate": 2.8596851045329547e-05,
      "loss": 0.4877,
      "step": 9196
    },
    {
      "epoch": 1.8906362421626066,
      "grad_norm": 0.1636972278356552,
      "learning_rate": 2.8587550546570063e-05,
      "loss": 0.5293,
      "step": 9197
    },
    {
      "epoch": 1.8908418131359852,
      "grad_norm": 0.1975325345993042,
      "learning_rate": 2.8578250856375253e-05,
      "loss": 0.5406,
      "step": 9198
    },
    {
      "epoch": 1.8910473841093638,
      "grad_norm": 0.19792711734771729,
      "learning_rate": 2.8568951975203272e-05,
      "loss": 0.5435,
      "step": 9199
    },
    {
      "epoch": 1.8912529550827424,
      "grad_norm": 0.1665029525756836,
      "learning_rate": 2.8559653903512225e-05,
      "loss": 0.5264,
      "step": 9200
    },
    {
      "epoch": 1.891458526056121,
      "grad_norm": 0.1603616178035736,
      "learning_rate": 2.855035664176019e-05,
      "loss": 0.5334,
      "step": 9201
    },
    {
      "epoch": 1.8916640970294996,
      "grad_norm": 0.16326431930065155,
      "learning_rate": 2.8541060190405204e-05,
      "loss": 0.5195,
      "step": 9202
    },
    {
      "epoch": 1.8918696680028781,
      "grad_norm": 0.17168152332305908,
      "learning_rate": 2.8531764549905253e-05,
      "loss": 0.5336,
      "step": 9203
    },
    {
      "epoch": 1.8920752389762565,
      "grad_norm": 0.19132192432880402,
      "learning_rate": 2.8522469720718287e-05,
      "loss": 0.5346,
      "step": 9204
    },
    {
      "epoch": 1.892280809949635,
      "grad_norm": 0.20156370103359222,
      "learning_rate": 2.851317570330221e-05,
      "loss": 0.53,
      "step": 9205
    },
    {
      "epoch": 1.8924863809230137,
      "grad_norm": 0.19648964703083038,
      "learning_rate": 2.850388249811492e-05,
      "loss": 0.5264,
      "step": 9206
    },
    {
      "epoch": 1.8926919518963923,
      "grad_norm": 0.19839881360530853,
      "learning_rate": 2.8494590105614233e-05,
      "loss": 0.5382,
      "step": 9207
    },
    {
      "epoch": 1.8928975228697706,
      "grad_norm": 0.19902363419532776,
      "learning_rate": 2.8485298526257956e-05,
      "loss": 0.501,
      "step": 9208
    },
    {
      "epoch": 1.8931030938431492,
      "grad_norm": 0.19624055922031403,
      "learning_rate": 2.8476007760503814e-05,
      "loss": 0.5242,
      "step": 9209
    },
    {
      "epoch": 1.8933086648165278,
      "grad_norm": 0.1998245269060135,
      "learning_rate": 2.8466717808809548e-05,
      "loss": 0.5281,
      "step": 9210
    },
    {
      "epoch": 1.8935142357899064,
      "grad_norm": 0.2012401521205902,
      "learning_rate": 2.845742867163282e-05,
      "loss": 0.5282,
      "step": 9211
    },
    {
      "epoch": 1.893719806763285,
      "grad_norm": 0.22999098896980286,
      "learning_rate": 2.844814034943124e-05,
      "loss": 0.4983,
      "step": 9212
    },
    {
      "epoch": 1.8939253777366636,
      "grad_norm": 0.1972244828939438,
      "learning_rate": 2.8438852842662445e-05,
      "loss": 0.5242,
      "step": 9213
    },
    {
      "epoch": 1.8941309487100422,
      "grad_norm": 0.19226615130901337,
      "learning_rate": 2.8429566151783957e-05,
      "loss": 0.5224,
      "step": 9214
    },
    {
      "epoch": 1.8943365196834208,
      "grad_norm": 0.19878603518009186,
      "learning_rate": 2.8420280277253303e-05,
      "loss": 0.5235,
      "step": 9215
    },
    {
      "epoch": 1.8945420906567993,
      "grad_norm": 0.19549743831157684,
      "learning_rate": 2.8410995219527937e-05,
      "loss": 0.5459,
      "step": 9216
    },
    {
      "epoch": 1.894747661630178,
      "grad_norm": 0.19706833362579346,
      "learning_rate": 2.8401710979065313e-05,
      "loss": 0.5388,
      "step": 9217
    },
    {
      "epoch": 1.8949532326035565,
      "grad_norm": 0.19738836586475372,
      "learning_rate": 2.839242755632279e-05,
      "loss": 0.5026,
      "step": 9218
    },
    {
      "epoch": 1.895158803576935,
      "grad_norm": 0.1891833394765854,
      "learning_rate": 2.838314495175774e-05,
      "loss": 0.5232,
      "step": 9219
    },
    {
      "epoch": 1.8953643745503135,
      "grad_norm": 0.2042219191789627,
      "learning_rate": 2.837386316582748e-05,
      "loss": 0.5171,
      "step": 9220
    },
    {
      "epoch": 1.895569945523692,
      "grad_norm": 0.19573958218097687,
      "learning_rate": 2.8364582198989256e-05,
      "loss": 0.521,
      "step": 9221
    },
    {
      "epoch": 1.8957755164970707,
      "grad_norm": 0.1948520541191101,
      "learning_rate": 2.835530205170033e-05,
      "loss": 0.5316,
      "step": 9222
    },
    {
      "epoch": 1.895981087470449,
      "grad_norm": 0.1974736452102661,
      "learning_rate": 2.8346022724417877e-05,
      "loss": 0.5227,
      "step": 9223
    },
    {
      "epoch": 1.8961866584438276,
      "grad_norm": 0.19401709735393524,
      "learning_rate": 2.8336744217599044e-05,
      "loss": 0.5546,
      "step": 9224
    },
    {
      "epoch": 1.8963922294172062,
      "grad_norm": 0.2048596292734146,
      "learning_rate": 2.832746653170093e-05,
      "loss": 0.536,
      "step": 9225
    },
    {
      "epoch": 1.8965978003905848,
      "grad_norm": 0.19587990641593933,
      "learning_rate": 2.8318189667180604e-05,
      "loss": 0.5109,
      "step": 9226
    },
    {
      "epoch": 1.8968033713639634,
      "grad_norm": 0.20599375665187836,
      "learning_rate": 2.8308913624495113e-05,
      "loss": 0.5001,
      "step": 9227
    },
    {
      "epoch": 1.897008942337342,
      "grad_norm": 0.12338798493146896,
      "learning_rate": 2.829963840410144e-05,
      "loss": 0.5183,
      "step": 9228
    },
    {
      "epoch": 1.8972145133107206,
      "grad_norm": 0.16888199746608734,
      "learning_rate": 2.829036400645652e-05,
      "loss": 0.5375,
      "step": 9229
    },
    {
      "epoch": 1.8974200842840991,
      "grad_norm": 0.1930612325668335,
      "learning_rate": 2.8281090432017264e-05,
      "loss": 0.5138,
      "step": 9230
    },
    {
      "epoch": 1.8976256552574777,
      "grad_norm": 0.19552965462207794,
      "learning_rate": 2.827181768124054e-05,
      "loss": 0.5154,
      "step": 9231
    },
    {
      "epoch": 1.8978312262308563,
      "grad_norm": 0.1639067679643631,
      "learning_rate": 2.8262545754583176e-05,
      "loss": 0.5142,
      "step": 9232
    },
    {
      "epoch": 1.898036797204235,
      "grad_norm": 0.15904250741004944,
      "learning_rate": 2.8253274652501932e-05,
      "loss": 0.5298,
      "step": 9233
    },
    {
      "epoch": 1.8982423681776133,
      "grad_norm": 0.19867335259914398,
      "learning_rate": 2.824400437545359e-05,
      "loss": 0.5261,
      "step": 9234
    },
    {
      "epoch": 1.8984479391509919,
      "grad_norm": 0.16743482649326324,
      "learning_rate": 2.8234734923894837e-05,
      "loss": 0.4987,
      "step": 9235
    },
    {
      "epoch": 1.8986535101243704,
      "grad_norm": 0.15749593079090118,
      "learning_rate": 2.822546629828233e-05,
      "loss": 0.5142,
      "step": 9236
    },
    {
      "epoch": 1.898859081097749,
      "grad_norm": 0.21486081182956696,
      "learning_rate": 2.8216198499072697e-05,
      "loss": 0.5442,
      "step": 9237
    },
    {
      "epoch": 1.8990646520711274,
      "grad_norm": 0.16459722816944122,
      "learning_rate": 2.8206931526722516e-05,
      "loss": 0.5078,
      "step": 9238
    },
    {
      "epoch": 1.899270223044506,
      "grad_norm": 0.16410863399505615,
      "learning_rate": 2.819766538168832e-05,
      "loss": 0.5079,
      "step": 9239
    },
    {
      "epoch": 1.8994757940178846,
      "grad_norm": 0.20152071118354797,
      "learning_rate": 2.8188400064426613e-05,
      "loss": 0.5097,
      "step": 9240
    },
    {
      "epoch": 1.8996813649912632,
      "grad_norm": 0.19304706156253815,
      "learning_rate": 2.8179135575393867e-05,
      "loss": 0.5271,
      "step": 9241
    },
    {
      "epoch": 1.8998869359646418,
      "grad_norm": 0.19228653609752655,
      "learning_rate": 2.8169871915046488e-05,
      "loss": 0.5202,
      "step": 9242
    },
    {
      "epoch": 1.9000925069380203,
      "grad_norm": 0.20129820704460144,
      "learning_rate": 2.816060908384086e-05,
      "loss": 0.527,
      "step": 9243
    },
    {
      "epoch": 1.900298077911399,
      "grad_norm": 0.203932523727417,
      "learning_rate": 2.8151347082233317e-05,
      "loss": 0.5486,
      "step": 9244
    },
    {
      "epoch": 1.9005036488847775,
      "grad_norm": 0.1936814785003662,
      "learning_rate": 2.8142085910680153e-05,
      "loss": 0.5111,
      "step": 9245
    },
    {
      "epoch": 1.900709219858156,
      "grad_norm": 0.198873832821846,
      "learning_rate": 2.813282556963762e-05,
      "loss": 0.5324,
      "step": 9246
    },
    {
      "epoch": 1.9009147908315347,
      "grad_norm": 0.19669774174690247,
      "learning_rate": 2.8123566059561917e-05,
      "loss": 0.5218,
      "step": 9247
    },
    {
      "epoch": 1.9011203618049133,
      "grad_norm": 0.19902367889881134,
      "learning_rate": 2.8114307380909255e-05,
      "loss": 0.5155,
      "step": 9248
    },
    {
      "epoch": 1.9013259327782919,
      "grad_norm": 0.1605267971754074,
      "learning_rate": 2.8105049534135744e-05,
      "loss": 0.502,
      "step": 9249
    },
    {
      "epoch": 1.9015315037516702,
      "grad_norm": 0.16073958575725555,
      "learning_rate": 2.8095792519697472e-05,
      "loss": 0.5487,
      "step": 9250
    },
    {
      "epoch": 1.9017370747250488,
      "grad_norm": 0.19656385481357574,
      "learning_rate": 2.8086536338050488e-05,
      "loss": 0.5008,
      "step": 9251
    },
    {
      "epoch": 1.9019426456984274,
      "grad_norm": 0.20274598896503448,
      "learning_rate": 2.807728098965081e-05,
      "loss": 0.5181,
      "step": 9252
    },
    {
      "epoch": 1.9021482166718058,
      "grad_norm": 0.1996408849954605,
      "learning_rate": 2.8068026474954407e-05,
      "loss": 0.5041,
      "step": 9253
    },
    {
      "epoch": 1.9023537876451844,
      "grad_norm": 0.20199070870876312,
      "learning_rate": 2.805877279441717e-05,
      "loss": 0.5173,
      "step": 9254
    },
    {
      "epoch": 1.902559358618563,
      "grad_norm": 0.16346138715744019,
      "learning_rate": 2.804951994849504e-05,
      "loss": 0.5007,
      "step": 9255
    },
    {
      "epoch": 1.9027649295919415,
      "grad_norm": 0.16361282765865326,
      "learning_rate": 2.8040267937643842e-05,
      "loss": 0.5564,
      "step": 9256
    },
    {
      "epoch": 1.9029705005653201,
      "grad_norm": 0.1939617544412613,
      "learning_rate": 2.8031016762319366e-05,
      "loss": 0.5369,
      "step": 9257
    },
    {
      "epoch": 1.9031760715386987,
      "grad_norm": 0.20072026550769806,
      "learning_rate": 2.802176642297738e-05,
      "loss": 0.5177,
      "step": 9258
    },
    {
      "epoch": 1.9033816425120773,
      "grad_norm": 0.20429256558418274,
      "learning_rate": 2.801251692007361e-05,
      "loss": 0.5275,
      "step": 9259
    },
    {
      "epoch": 1.903587213485456,
      "grad_norm": 0.20607547461986542,
      "learning_rate": 2.8003268254063734e-05,
      "loss": 0.5278,
      "step": 9260
    },
    {
      "epoch": 1.9037927844588345,
      "grad_norm": 0.19499348104000092,
      "learning_rate": 2.7994020425403363e-05,
      "loss": 0.5303,
      "step": 9261
    },
    {
      "epoch": 1.903998355432213,
      "grad_norm": 0.19774754345417023,
      "learning_rate": 2.7984773434548146e-05,
      "loss": 0.5161,
      "step": 9262
    },
    {
      "epoch": 1.9042039264055917,
      "grad_norm": 0.2001447230577469,
      "learning_rate": 2.79755272819536e-05,
      "loss": 0.5278,
      "step": 9263
    },
    {
      "epoch": 1.9044094973789703,
      "grad_norm": 0.18907634913921356,
      "learning_rate": 2.7966281968075258e-05,
      "loss": 0.5056,
      "step": 9264
    },
    {
      "epoch": 1.9046150683523486,
      "grad_norm": 0.19662658870220184,
      "learning_rate": 2.7957037493368595e-05,
      "loss": 0.5241,
      "step": 9265
    },
    {
      "epoch": 1.9048206393257272,
      "grad_norm": 0.16739560663700104,
      "learning_rate": 2.794779385828903e-05,
      "loss": 0.5178,
      "step": 9266
    },
    {
      "epoch": 1.9050262102991058,
      "grad_norm": 0.16151371598243713,
      "learning_rate": 2.7938551063291945e-05,
      "loss": 0.5285,
      "step": 9267
    },
    {
      "epoch": 1.9052317812724842,
      "grad_norm": 0.20817650854587555,
      "learning_rate": 2.7929309108832727e-05,
      "loss": 0.5066,
      "step": 9268
    },
    {
      "epoch": 1.9054373522458627,
      "grad_norm": 0.20188304781913757,
      "learning_rate": 2.7920067995366655e-05,
      "loss": 0.5425,
      "step": 9269
    },
    {
      "epoch": 1.9056429232192413,
      "grad_norm": 0.20524436235427856,
      "learning_rate": 2.7910827723348997e-05,
      "loss": 0.5229,
      "step": 9270
    },
    {
      "epoch": 1.90584849419262,
      "grad_norm": 0.1990385204553604,
      "learning_rate": 2.790158829323499e-05,
      "loss": 0.5126,
      "step": 9271
    },
    {
      "epoch": 1.9060540651659985,
      "grad_norm": 0.19942565262317657,
      "learning_rate": 2.7892349705479808e-05,
      "loss": 0.5362,
      "step": 9272
    },
    {
      "epoch": 1.906259636139377,
      "grad_norm": 0.2039434313774109,
      "learning_rate": 2.7883111960538585e-05,
      "loss": 0.517,
      "step": 9273
    },
    {
      "epoch": 1.9064652071127557,
      "grad_norm": 0.20296645164489746,
      "learning_rate": 2.7873875058866438e-05,
      "loss": 0.5199,
      "step": 9274
    },
    {
      "epoch": 1.9066707780861343,
      "grad_norm": 0.16485995054244995,
      "learning_rate": 2.786463900091842e-05,
      "loss": 0.4991,
      "step": 9275
    },
    {
      "epoch": 1.9068763490595129,
      "grad_norm": 0.12144458293914795,
      "learning_rate": 2.7855403787149536e-05,
      "loss": 0.5215,
      "step": 9276
    },
    {
      "epoch": 1.9070819200328915,
      "grad_norm": 0.12911517918109894,
      "learning_rate": 2.7846169418014794e-05,
      "loss": 0.5127,
      "step": 9277
    },
    {
      "epoch": 1.90728749100627,
      "grad_norm": 0.16436396539211273,
      "learning_rate": 2.78369358939691e-05,
      "loss": 0.5474,
      "step": 9278
    },
    {
      "epoch": 1.9074930619796486,
      "grad_norm": 0.19795219600200653,
      "learning_rate": 2.782770321546736e-05,
      "loss": 0.5031,
      "step": 9279
    },
    {
      "epoch": 1.907698632953027,
      "grad_norm": 0.20450328290462494,
      "learning_rate": 2.7818471382964418e-05,
      "loss": 0.5389,
      "step": 9280
    },
    {
      "epoch": 1.9079042039264056,
      "grad_norm": 0.19718262553215027,
      "learning_rate": 2.7809240396915066e-05,
      "loss": 0.5057,
      "step": 9281
    },
    {
      "epoch": 1.9081097748997842,
      "grad_norm": 0.19920390844345093,
      "learning_rate": 2.7800010257774107e-05,
      "loss": 0.5347,
      "step": 9282
    },
    {
      "epoch": 1.9083153458731628,
      "grad_norm": 0.19880931079387665,
      "learning_rate": 2.7790780965996248e-05,
      "loss": 0.525,
      "step": 9283
    },
    {
      "epoch": 1.9085209168465411,
      "grad_norm": 0.19791699945926666,
      "learning_rate": 2.778155252203618e-05,
      "loss": 0.5229,
      "step": 9284
    },
    {
      "epoch": 1.9087264878199197,
      "grad_norm": 0.19774897396564484,
      "learning_rate": 2.7772324926348524e-05,
      "loss": 0.4856,
      "step": 9285
    },
    {
      "epoch": 1.9089320587932983,
      "grad_norm": 0.20268942415714264,
      "learning_rate": 2.7763098179387917e-05,
      "loss": 0.5158,
      "step": 9286
    },
    {
      "epoch": 1.909137629766677,
      "grad_norm": 0.19894084334373474,
      "learning_rate": 2.7753872281608892e-05,
      "loss": 0.5155,
      "step": 9287
    },
    {
      "epoch": 1.9093432007400555,
      "grad_norm": 0.20365378260612488,
      "learning_rate": 2.774464723346595e-05,
      "loss": 0.5379,
      "step": 9288
    },
    {
      "epoch": 1.909548771713434,
      "grad_norm": 0.20348501205444336,
      "learning_rate": 2.773542303541361e-05,
      "loss": 0.5442,
      "step": 9289
    },
    {
      "epoch": 1.9097543426868127,
      "grad_norm": 0.20363953709602356,
      "learning_rate": 2.772619968790628e-05,
      "loss": 0.5028,
      "step": 9290
    },
    {
      "epoch": 1.9099599136601912,
      "grad_norm": 0.20089566707611084,
      "learning_rate": 2.771697719139836e-05,
      "loss": 0.5419,
      "step": 9291
    },
    {
      "epoch": 1.9101654846335698,
      "grad_norm": 0.19938144087791443,
      "learning_rate": 2.7707755546344185e-05,
      "loss": 0.5269,
      "step": 9292
    },
    {
      "epoch": 1.9103710556069484,
      "grad_norm": 0.1974303126335144,
      "learning_rate": 2.7698534753198074e-05,
      "loss": 0.5038,
      "step": 9293
    },
    {
      "epoch": 1.910576626580327,
      "grad_norm": 0.16670559346675873,
      "learning_rate": 2.768931481241428e-05,
      "loss": 0.498,
      "step": 9294
    },
    {
      "epoch": 1.9107821975537054,
      "grad_norm": 0.16241280734539032,
      "learning_rate": 2.768009572444703e-05,
      "loss": 0.4979,
      "step": 9295
    },
    {
      "epoch": 1.910987768527084,
      "grad_norm": 0.2039078176021576,
      "learning_rate": 2.767087748975053e-05,
      "loss": 0.5426,
      "step": 9296
    },
    {
      "epoch": 1.9111933395004626,
      "grad_norm": 0.20147615671157837,
      "learning_rate": 2.766166010877889e-05,
      "loss": 0.5098,
      "step": 9297
    },
    {
      "epoch": 1.9113989104738411,
      "grad_norm": 0.19318887591362,
      "learning_rate": 2.765244358198621e-05,
      "loss": 0.5412,
      "step": 9298
    },
    {
      "epoch": 1.9116044814472195,
      "grad_norm": 0.19322159886360168,
      "learning_rate": 2.7643227909826573e-05,
      "loss": 0.5412,
      "step": 9299
    },
    {
      "epoch": 1.911810052420598,
      "grad_norm": 0.1994897574186325,
      "learning_rate": 2.7634013092753962e-05,
      "loss": 0.5362,
      "step": 9300
    },
    {
      "epoch": 1.9120156233939767,
      "grad_norm": 0.19642673432826996,
      "learning_rate": 2.762479913122236e-05,
      "loss": 0.5088,
      "step": 9301
    },
    {
      "epoch": 1.9122211943673553,
      "grad_norm": 0.20559348165988922,
      "learning_rate": 2.761558602568567e-05,
      "loss": 0.5465,
      "step": 9302
    },
    {
      "epoch": 1.9124267653407339,
      "grad_norm": 0.1719941794872284,
      "learning_rate": 2.760637377659782e-05,
      "loss": 0.4991,
      "step": 9303
    },
    {
      "epoch": 1.9126323363141124,
      "grad_norm": 0.1635911911725998,
      "learning_rate": 2.7597162384412645e-05,
      "loss": 0.5127,
      "step": 9304
    },
    {
      "epoch": 1.912837907287491,
      "grad_norm": 0.19266277551651,
      "learning_rate": 2.7587951849583936e-05,
      "loss": 0.5235,
      "step": 9305
    },
    {
      "epoch": 1.9130434782608696,
      "grad_norm": 0.20263995230197906,
      "learning_rate": 2.757874217256544e-05,
      "loss": 0.5287,
      "step": 9306
    },
    {
      "epoch": 1.9132490492342482,
      "grad_norm": 0.21031515300273895,
      "learning_rate": 2.75695333538109e-05,
      "loss": 0.5176,
      "step": 9307
    },
    {
      "epoch": 1.9134546202076268,
      "grad_norm": 0.19321498274803162,
      "learning_rate": 2.7560325393773992e-05,
      "loss": 0.5296,
      "step": 9308
    },
    {
      "epoch": 1.9136601911810054,
      "grad_norm": 0.16547061502933502,
      "learning_rate": 2.7551118292908317e-05,
      "loss": 0.5214,
      "step": 9309
    },
    {
      "epoch": 1.9138657621543838,
      "grad_norm": 0.15889470279216766,
      "learning_rate": 2.7541912051667503e-05,
      "loss": 0.5355,
      "step": 9310
    },
    {
      "epoch": 1.9140713331277623,
      "grad_norm": 0.19868826866149902,
      "learning_rate": 2.7532706670505082e-05,
      "loss": 0.5194,
      "step": 9311
    },
    {
      "epoch": 1.914276904101141,
      "grad_norm": 0.20042477548122406,
      "learning_rate": 2.7523502149874562e-05,
      "loss": 0.4973,
      "step": 9312
    },
    {
      "epoch": 1.9144824750745195,
      "grad_norm": 0.2017516791820526,
      "learning_rate": 2.7514298490229408e-05,
      "loss": 0.5121,
      "step": 9313
    },
    {
      "epoch": 1.9146880460478979,
      "grad_norm": 0.19849123060703278,
      "learning_rate": 2.7505095692023043e-05,
      "loss": 0.527,
      "step": 9314
    },
    {
      "epoch": 1.9148936170212765,
      "grad_norm": 0.19380970299243927,
      "learning_rate": 2.7495893755708823e-05,
      "loss": 0.5175,
      "step": 9315
    },
    {
      "epoch": 1.915099187994655,
      "grad_norm": 0.16672258079051971,
      "learning_rate": 2.748669268174011e-05,
      "loss": 0.4853,
      "step": 9316
    },
    {
      "epoch": 1.9153047589680336,
      "grad_norm": 0.1614876687526703,
      "learning_rate": 2.74774924705702e-05,
      "loss": 0.5163,
      "step": 9317
    },
    {
      "epoch": 1.9155103299414122,
      "grad_norm": 0.1949508935213089,
      "learning_rate": 2.746829312265233e-05,
      "loss": 0.515,
      "step": 9318
    },
    {
      "epoch": 1.9157159009147908,
      "grad_norm": 0.2158740758895874,
      "learning_rate": 2.745909463843972e-05,
      "loss": 0.5089,
      "step": 9319
    },
    {
      "epoch": 1.9159214718881694,
      "grad_norm": 0.16678479313850403,
      "learning_rate": 2.744989701838553e-05,
      "loss": 0.5147,
      "step": 9320
    },
    {
      "epoch": 1.916127042861548,
      "grad_norm": 0.16045857965946198,
      "learning_rate": 2.7440700262942893e-05,
      "loss": 0.505,
      "step": 9321
    },
    {
      "epoch": 1.9163326138349266,
      "grad_norm": 0.17274217307567596,
      "learning_rate": 2.7431504372564874e-05,
      "loss": 0.5002,
      "step": 9322
    },
    {
      "epoch": 1.9165381848083052,
      "grad_norm": 0.16283760964870453,
      "learning_rate": 2.7422309347704505e-05,
      "loss": 0.5303,
      "step": 9323
    },
    {
      "epoch": 1.9167437557816838,
      "grad_norm": 0.1970645785331726,
      "learning_rate": 2.741311518881481e-05,
      "loss": 0.5198,
      "step": 9324
    },
    {
      "epoch": 1.9169493267550621,
      "grad_norm": 0.20442216098308563,
      "learning_rate": 2.7403921896348735e-05,
      "loss": 0.4928,
      "step": 9325
    },
    {
      "epoch": 1.9171548977284407,
      "grad_norm": 0.19744066894054413,
      "learning_rate": 2.739472947075918e-05,
      "loss": 0.5099,
      "step": 9326
    },
    {
      "epoch": 1.9173604687018193,
      "grad_norm": 0.20807257294654846,
      "learning_rate": 2.7385537912499014e-05,
      "loss": 0.5173,
      "step": 9327
    },
    {
      "epoch": 1.917566039675198,
      "grad_norm": 0.19921061396598816,
      "learning_rate": 2.7376347222021067e-05,
      "loss": 0.5094,
      "step": 9328
    },
    {
      "epoch": 1.9177716106485763,
      "grad_norm": 0.1887097805738449,
      "learning_rate": 2.7367157399778125e-05,
      "loss": 0.514,
      "step": 9329
    },
    {
      "epoch": 1.9179771816219549,
      "grad_norm": 0.1967703402042389,
      "learning_rate": 2.7357968446222903e-05,
      "loss": 0.5085,
      "step": 9330
    },
    {
      "epoch": 1.9181827525953334,
      "grad_norm": 0.1980697363615036,
      "learning_rate": 2.734878036180813e-05,
      "loss": 0.5417,
      "step": 9331
    },
    {
      "epoch": 1.918388323568712,
      "grad_norm": 0.20071645081043243,
      "learning_rate": 2.733959314698645e-05,
      "loss": 0.5293,
      "step": 9332
    },
    {
      "epoch": 1.9185938945420906,
      "grad_norm": 0.1977865993976593,
      "learning_rate": 2.7330406802210472e-05,
      "loss": 0.5359,
      "step": 9333
    },
    {
      "epoch": 1.9187994655154692,
      "grad_norm": 0.19883140921592712,
      "learning_rate": 2.7321221327932762e-05,
      "loss": 0.5049,
      "step": 9334
    },
    {
      "epoch": 1.9190050364888478,
      "grad_norm": 0.19968102872371674,
      "learning_rate": 2.7312036724605848e-05,
      "loss": 0.5255,
      "step": 9335
    },
    {
      "epoch": 1.9192106074622264,
      "grad_norm": 0.19368182122707367,
      "learning_rate": 2.7302852992682212e-05,
      "loss": 0.5299,
      "step": 9336
    },
    {
      "epoch": 1.919416178435605,
      "grad_norm": 0.18962502479553223,
      "learning_rate": 2.7293670132614277e-05,
      "loss": 0.505,
      "step": 9337
    },
    {
      "epoch": 1.9196217494089836,
      "grad_norm": 0.19553595781326294,
      "learning_rate": 2.7284488144854465e-05,
      "loss": 0.5214,
      "step": 9338
    },
    {
      "epoch": 1.9198273203823621,
      "grad_norm": 0.1957550048828125,
      "learning_rate": 2.7275307029855118e-05,
      "loss": 0.5377,
      "step": 9339
    },
    {
      "epoch": 1.9200328913557407,
      "grad_norm": 0.19873984158039093,
      "learning_rate": 2.726612678806856e-05,
      "loss": 0.53,
      "step": 9340
    },
    {
      "epoch": 1.920238462329119,
      "grad_norm": 0.2044048011302948,
      "learning_rate": 2.7256947419947038e-05,
      "loss": 0.5364,
      "step": 9341
    },
    {
      "epoch": 1.9204440333024977,
      "grad_norm": 0.1971905678510666,
      "learning_rate": 2.7247768925942793e-05,
      "loss": 0.5233,
      "step": 9342
    },
    {
      "epoch": 1.9206496042758763,
      "grad_norm": 0.15951332449913025,
      "learning_rate": 2.7238591306507985e-05,
      "loss": 0.5017,
      "step": 9343
    },
    {
      "epoch": 1.9208551752492546,
      "grad_norm": 0.172995924949646,
      "learning_rate": 2.722941456209478e-05,
      "loss": 0.5254,
      "step": 9344
    },
    {
      "epoch": 1.9210607462226332,
      "grad_norm": 0.2066241055727005,
      "learning_rate": 2.7220238693155255e-05,
      "loss": 0.5268,
      "step": 9345
    },
    {
      "epoch": 1.9212663171960118,
      "grad_norm": 0.19944432377815247,
      "learning_rate": 2.721106370014147e-05,
      "loss": 0.5281,
      "step": 9346
    },
    {
      "epoch": 1.9214718881693904,
      "grad_norm": 0.20762419700622559,
      "learning_rate": 2.7201889583505433e-05,
      "loss": 0.5314,
      "step": 9347
    },
    {
      "epoch": 1.921677459142769,
      "grad_norm": 0.16759265959262848,
      "learning_rate": 2.7192716343699114e-05,
      "loss": 0.4948,
      "step": 9348
    },
    {
      "epoch": 1.9218830301161476,
      "grad_norm": 0.12171138823032379,
      "learning_rate": 2.718354398117441e-05,
      "loss": 0.4984,
      "step": 9349
    },
    {
      "epoch": 1.9220886010895262,
      "grad_norm": 0.17452505230903625,
      "learning_rate": 2.7174372496383224e-05,
      "loss": 0.5404,
      "step": 9350
    },
    {
      "epoch": 1.9222941720629048,
      "grad_norm": 0.16276037693023682,
      "learning_rate": 2.716520188977739e-05,
      "loss": 0.5044,
      "step": 9351
    },
    {
      "epoch": 1.9224997430362833,
      "grad_norm": 0.1610327959060669,
      "learning_rate": 2.7156032161808704e-05,
      "loss": 0.5304,
      "step": 9352
    },
    {
      "epoch": 1.922705314009662,
      "grad_norm": 0.2447415590286255,
      "learning_rate": 2.7146863312928917e-05,
      "loss": 0.5119,
      "step": 9353
    },
    {
      "epoch": 1.9229108849830405,
      "grad_norm": 0.19157683849334717,
      "learning_rate": 2.7137695343589725e-05,
      "loss": 0.5232,
      "step": 9354
    },
    {
      "epoch": 1.9231164559564191,
      "grad_norm": 0.20079728960990906,
      "learning_rate": 2.71285282542428e-05,
      "loss": 0.5146,
      "step": 9355
    },
    {
      "epoch": 1.9233220269297975,
      "grad_norm": 0.20246592164039612,
      "learning_rate": 2.7119362045339755e-05,
      "loss": 0.5289,
      "step": 9356
    },
    {
      "epoch": 1.923527597903176,
      "grad_norm": 0.1998445987701416,
      "learning_rate": 2.7110196717332144e-05,
      "loss": 0.537,
      "step": 9357
    },
    {
      "epoch": 1.9237331688765547,
      "grad_norm": 0.20412832498550415,
      "learning_rate": 2.7101032270671548e-05,
      "loss": 0.5388,
      "step": 9358
    },
    {
      "epoch": 1.9239387398499332,
      "grad_norm": 0.19689737260341644,
      "learning_rate": 2.709186870580943e-05,
      "loss": 0.529,
      "step": 9359
    },
    {
      "epoch": 1.9241443108233116,
      "grad_norm": 0.19693289697170258,
      "learning_rate": 2.7082706023197238e-05,
      "loss": 0.5362,
      "step": 9360
    },
    {
      "epoch": 1.9243498817966902,
      "grad_norm": 0.1994449496269226,
      "learning_rate": 2.707354422328637e-05,
      "loss": 0.5326,
      "step": 9361
    },
    {
      "epoch": 1.9245554527700688,
      "grad_norm": 0.20162896811962128,
      "learning_rate": 2.7064383306528194e-05,
      "loss": 0.5167,
      "step": 9362
    },
    {
      "epoch": 1.9247610237434474,
      "grad_norm": 0.19568750262260437,
      "learning_rate": 2.7055223273374027e-05,
      "loss": 0.5314,
      "step": 9363
    },
    {
      "epoch": 1.924966594716826,
      "grad_norm": 0.20198176801204681,
      "learning_rate": 2.7046064124275115e-05,
      "loss": 0.5225,
      "step": 9364
    },
    {
      "epoch": 1.9251721656902046,
      "grad_norm": 0.22592489421367645,
      "learning_rate": 2.7036905859682726e-05,
      "loss": 0.5282,
      "step": 9365
    },
    {
      "epoch": 1.9253777366635831,
      "grad_norm": 0.19349443912506104,
      "learning_rate": 2.7027748480048022e-05,
      "loss": 0.4946,
      "step": 9366
    },
    {
      "epoch": 1.9255833076369617,
      "grad_norm": 0.20024524629116058,
      "learning_rate": 2.701859198582215e-05,
      "loss": 0.5214,
      "step": 9367
    },
    {
      "epoch": 1.9257888786103403,
      "grad_norm": 0.19572319090366364,
      "learning_rate": 2.700943637745621e-05,
      "loss": 0.5243,
      "step": 9368
    },
    {
      "epoch": 1.925994449583719,
      "grad_norm": 0.20359370112419128,
      "learning_rate": 2.7000281655401248e-05,
      "loss": 0.5192,
      "step": 9369
    },
    {
      "epoch": 1.9262000205570975,
      "grad_norm": 0.17284277081489563,
      "learning_rate": 2.6991127820108274e-05,
      "loss": 0.5126,
      "step": 9370
    },
    {
      "epoch": 1.9264055915304759,
      "grad_norm": 0.17155306041240692,
      "learning_rate": 2.6981974872028255e-05,
      "loss": 0.5354,
      "step": 9371
    },
    {
      "epoch": 1.9266111625038544,
      "grad_norm": 0.20635953545570374,
      "learning_rate": 2.6972822811612127e-05,
      "loss": 0.5047,
      "step": 9372
    },
    {
      "epoch": 1.926816733477233,
      "grad_norm": 0.16604094207286835,
      "learning_rate": 2.696367163931075e-05,
      "loss": 0.5067,
      "step": 9373
    },
    {
      "epoch": 1.9270223044506116,
      "grad_norm": 0.15949425101280212,
      "learning_rate": 2.695452135557498e-05,
      "loss": 0.5084,
      "step": 9374
    },
    {
      "epoch": 1.92722787542399,
      "grad_norm": 0.19722892343997955,
      "learning_rate": 2.69453719608556e-05,
      "loss": 0.5247,
      "step": 9375
    },
    {
      "epoch": 1.9274334463973686,
      "grad_norm": 0.189317524433136,
      "learning_rate": 2.6936223455603357e-05,
      "loss": 0.5275,
      "step": 9376
    },
    {
      "epoch": 1.9276390173707472,
      "grad_norm": 0.193404883146286,
      "learning_rate": 2.6927075840268952e-05,
      "loss": 0.5003,
      "step": 9377
    },
    {
      "epoch": 1.9278445883441258,
      "grad_norm": 0.18967877328395844,
      "learning_rate": 2.6917929115303032e-05,
      "loss": 0.5414,
      "step": 9378
    },
    {
      "epoch": 1.9280501593175043,
      "grad_norm": 0.2023673802614212,
      "learning_rate": 2.690878328115625e-05,
      "loss": 0.5172,
      "step": 9379
    },
    {
      "epoch": 1.928255730290883,
      "grad_norm": 0.1624782383441925,
      "learning_rate": 2.6899638338279148e-05,
      "loss": 0.5076,
      "step": 9380
    },
    {
      "epoch": 1.9284613012642615,
      "grad_norm": 0.1585642248392105,
      "learning_rate": 2.6890494287122268e-05,
      "loss": 0.5232,
      "step": 9381
    },
    {
      "epoch": 1.92866687223764,
      "grad_norm": 0.20032867789268494,
      "learning_rate": 2.6881351128136084e-05,
      "loss": 0.5015,
      "step": 9382
    },
    {
      "epoch": 1.9288724432110187,
      "grad_norm": 0.20595210790634155,
      "learning_rate": 2.6872208861771055e-05,
      "loss": 0.5079,
      "step": 9383
    },
    {
      "epoch": 1.9290780141843973,
      "grad_norm": 0.2049880176782608,
      "learning_rate": 2.6863067488477565e-05,
      "loss": 0.5073,
      "step": 9384
    },
    {
      "epoch": 1.9292835851577759,
      "grad_norm": 0.19586196541786194,
      "learning_rate": 2.6853927008705945e-05,
      "loss": 0.5362,
      "step": 9385
    },
    {
      "epoch": 1.9294891561311542,
      "grad_norm": 0.19678068161010742,
      "learning_rate": 2.684478742290655e-05,
      "loss": 0.507,
      "step": 9386
    },
    {
      "epoch": 1.9296947271045328,
      "grad_norm": 0.15755969285964966,
      "learning_rate": 2.683564873152962e-05,
      "loss": 0.5057,
      "step": 9387
    },
    {
      "epoch": 1.9299002980779114,
      "grad_norm": 0.16498331725597382,
      "learning_rate": 2.6826510935025375e-05,
      "loss": 0.5442,
      "step": 9388
    },
    {
      "epoch": 1.93010586905129,
      "grad_norm": 0.19928227365016937,
      "learning_rate": 2.681737403384399e-05,
      "loss": 0.521,
      "step": 9389
    },
    {
      "epoch": 1.9303114400246684,
      "grad_norm": 0.1977323740720749,
      "learning_rate": 2.680823802843561e-05,
      "loss": 0.528,
      "step": 9390
    },
    {
      "epoch": 1.930517010998047,
      "grad_norm": 0.1704244613647461,
      "learning_rate": 2.67991029192503e-05,
      "loss": 0.509,
      "step": 9391
    },
    {
      "epoch": 1.9307225819714255,
      "grad_norm": 0.16151131689548492,
      "learning_rate": 2.6789968706738123e-05,
      "loss": 0.5298,
      "step": 9392
    },
    {
      "epoch": 1.9309281529448041,
      "grad_norm": 0.18972033262252808,
      "learning_rate": 2.678083539134908e-05,
      "loss": 0.5135,
      "step": 9393
    },
    {
      "epoch": 1.9311337239181827,
      "grad_norm": 0.19905173778533936,
      "learning_rate": 2.677170297353311e-05,
      "loss": 0.496,
      "step": 9394
    },
    {
      "epoch": 1.9313392948915613,
      "grad_norm": 0.21623218059539795,
      "learning_rate": 2.6762571453740148e-05,
      "loss": 0.539,
      "step": 9395
    },
    {
      "epoch": 1.93154486586494,
      "grad_norm": 0.16825906932353973,
      "learning_rate": 2.675344083242005e-05,
      "loss": 0.5124,
      "step": 9396
    },
    {
      "epoch": 1.9317504368383185,
      "grad_norm": 0.12174926698207855,
      "learning_rate": 2.674431111002263e-05,
      "loss": 0.5125,
      "step": 9397
    },
    {
      "epoch": 1.931956007811697,
      "grad_norm": 0.16127155721187592,
      "learning_rate": 2.6735182286997685e-05,
      "loss": 0.5231,
      "step": 9398
    },
    {
      "epoch": 1.9321615787850757,
      "grad_norm": 0.19533561170101166,
      "learning_rate": 2.6726054363794914e-05,
      "loss": 0.5015,
      "step": 9399
    },
    {
      "epoch": 1.9323671497584543,
      "grad_norm": 0.1924934983253479,
      "learning_rate": 2.671692734086405e-05,
      "loss": 0.5085,
      "step": 9400
    },
    {
      "epoch": 1.9325727207318326,
      "grad_norm": 0.1985793113708496,
      "learning_rate": 2.6707801218654726e-05,
      "loss": 0.5133,
      "step": 9401
    },
    {
      "epoch": 1.9327782917052112,
      "grad_norm": 0.17007775604724884,
      "learning_rate": 2.669867599761654e-05,
      "loss": 0.5008,
      "step": 9402
    },
    {
      "epoch": 1.9329838626785898,
      "grad_norm": 0.16425763070583344,
      "learning_rate": 2.6689551678199035e-05,
      "loss": 0.5018,
      "step": 9403
    },
    {
      "epoch": 1.9331894336519684,
      "grad_norm": 0.17384882271289825,
      "learning_rate": 2.6680428260851744e-05,
      "loss": 0.4811,
      "step": 9404
    },
    {
      "epoch": 1.9333950046253467,
      "grad_norm": 0.1561937779188156,
      "learning_rate": 2.6671305746024126e-05,
      "loss": 0.5158,
      "step": 9405
    },
    {
      "epoch": 1.9336005755987253,
      "grad_norm": 0.20057018101215363,
      "learning_rate": 2.6662184134165594e-05,
      "loss": 0.5178,
      "step": 9406
    },
    {
      "epoch": 1.933806146572104,
      "grad_norm": 0.17240118980407715,
      "learning_rate": 2.6653063425725552e-05,
      "loss": 0.4964,
      "step": 9407
    },
    {
      "epoch": 1.9340117175454825,
      "grad_norm": 0.16643132269382477,
      "learning_rate": 2.664394362115332e-05,
      "loss": 0.5116,
      "step": 9408
    },
    {
      "epoch": 1.934217288518861,
      "grad_norm": 0.19673089683055878,
      "learning_rate": 2.6634824720898195e-05,
      "loss": 0.5233,
      "step": 9409
    },
    {
      "epoch": 1.9344228594922397,
      "grad_norm": 0.19296656548976898,
      "learning_rate": 2.6625706725409412e-05,
      "loss": 0.5305,
      "step": 9410
    },
    {
      "epoch": 1.9346284304656183,
      "grad_norm": 0.18779776990413666,
      "learning_rate": 2.6616589635136185e-05,
      "loss": 0.5354,
      "step": 9411
    },
    {
      "epoch": 1.9348340014389969,
      "grad_norm": 0.19164229929447174,
      "learning_rate": 2.6607473450527648e-05,
      "loss": 0.5135,
      "step": 9412
    },
    {
      "epoch": 1.9350395724123755,
      "grad_norm": 0.19808048009872437,
      "learning_rate": 2.6598358172032928e-05,
      "loss": 0.4932,
      "step": 9413
    },
    {
      "epoch": 1.935245143385754,
      "grad_norm": 0.19213752448558807,
      "learning_rate": 2.65892438001011e-05,
      "loss": 0.5196,
      "step": 9414
    },
    {
      "epoch": 1.9354507143591326,
      "grad_norm": 0.19726723432540894,
      "learning_rate": 2.658013033518117e-05,
      "loss": 0.523,
      "step": 9415
    },
    {
      "epoch": 1.935656285332511,
      "grad_norm": 0.19998745620250702,
      "learning_rate": 2.657101777772214e-05,
      "loss": 0.5311,
      "step": 9416
    },
    {
      "epoch": 1.9358618563058896,
      "grad_norm": 0.2027643620967865,
      "learning_rate": 2.6561906128172917e-05,
      "loss": 0.5243,
      "step": 9417
    },
    {
      "epoch": 1.9360674272792682,
      "grad_norm": 0.20316363871097565,
      "learning_rate": 2.6552795386982405e-05,
      "loss": 0.5291,
      "step": 9418
    },
    {
      "epoch": 1.9362729982526468,
      "grad_norm": 0.20627467334270477,
      "learning_rate": 2.6543685554599437e-05,
      "loss": 0.4963,
      "step": 9419
    },
    {
      "epoch": 1.9364785692260251,
      "grad_norm": 0.19964690506458282,
      "learning_rate": 2.6534576631472806e-05,
      "loss": 0.5131,
      "step": 9420
    },
    {
      "epoch": 1.9366841401994037,
      "grad_norm": 0.19893944263458252,
      "learning_rate": 2.6525468618051296e-05,
      "loss": 0.5256,
      "step": 9421
    },
    {
      "epoch": 1.9368897111727823,
      "grad_norm": 0.17132525146007538,
      "learning_rate": 2.6516361514783592e-05,
      "loss": 0.5057,
      "step": 9422
    },
    {
      "epoch": 1.937095282146161,
      "grad_norm": 0.16164752840995789,
      "learning_rate": 2.6507255322118362e-05,
      "loss": 0.5165,
      "step": 9423
    },
    {
      "epoch": 1.9373008531195395,
      "grad_norm": 0.19539949297904968,
      "learning_rate": 2.6498150040504224e-05,
      "loss": 0.5299,
      "step": 9424
    },
    {
      "epoch": 1.937506424092918,
      "grad_norm": 0.1996447741985321,
      "learning_rate": 2.6489045670389765e-05,
      "loss": 0.5199,
      "step": 9425
    },
    {
      "epoch": 1.9377119950662967,
      "grad_norm": 0.20690996944904327,
      "learning_rate": 2.6479942212223494e-05,
      "loss": 0.4806,
      "step": 9426
    },
    {
      "epoch": 1.9379175660396752,
      "grad_norm": 0.19668295979499817,
      "learning_rate": 2.6470839666453906e-05,
      "loss": 0.5259,
      "step": 9427
    },
    {
      "epoch": 1.9381231370130538,
      "grad_norm": 0.200824573636055,
      "learning_rate": 2.6461738033529452e-05,
      "loss": 0.5383,
      "step": 9428
    },
    {
      "epoch": 1.9383287079864324,
      "grad_norm": 0.1936202496290207,
      "learning_rate": 2.6452637313898524e-05,
      "loss": 0.5199,
      "step": 9429
    },
    {
      "epoch": 1.938534278959811,
      "grad_norm": 0.1961507350206375,
      "learning_rate": 2.644353750800946e-05,
      "loss": 0.5261,
      "step": 9430
    },
    {
      "epoch": 1.9387398499331896,
      "grad_norm": 0.1959598958492279,
      "learning_rate": 2.643443861631057e-05,
      "loss": 0.5204,
      "step": 9431
    },
    {
      "epoch": 1.938945420906568,
      "grad_norm": 0.200755774974823,
      "learning_rate": 2.642534063925012e-05,
      "loss": 0.5277,
      "step": 9432
    },
    {
      "epoch": 1.9391509918799466,
      "grad_norm": 0.1906225085258484,
      "learning_rate": 2.6416243577276295e-05,
      "loss": 0.5116,
      "step": 9433
    },
    {
      "epoch": 1.9393565628533251,
      "grad_norm": 0.20025970041751862,
      "learning_rate": 2.6407147430837307e-05,
      "loss": 0.5146,
      "step": 9434
    },
    {
      "epoch": 1.9395621338267035,
      "grad_norm": 0.19855552911758423,
      "learning_rate": 2.6398052200381266e-05,
      "loss": 0.5334,
      "step": 9435
    },
    {
      "epoch": 1.939767704800082,
      "grad_norm": 0.19425593316555023,
      "learning_rate": 2.638895788635623e-05,
      "loss": 0.5349,
      "step": 9436
    },
    {
      "epoch": 1.9399732757734607,
      "grad_norm": 0.1955750733613968,
      "learning_rate": 2.637986448921027e-05,
      "loss": 0.5267,
      "step": 9437
    },
    {
      "epoch": 1.9401788467468393,
      "grad_norm": 0.19604718685150146,
      "learning_rate": 2.637077200939135e-05,
      "loss": 0.5303,
      "step": 9438
    },
    {
      "epoch": 1.9403844177202179,
      "grad_norm": 0.1608019322156906,
      "learning_rate": 2.6361680447347424e-05,
      "loss": 0.4905,
      "step": 9439
    },
    {
      "epoch": 1.9405899886935964,
      "grad_norm": 0.16983415186405182,
      "learning_rate": 2.635258980352637e-05,
      "loss": 0.533,
      "step": 9440
    },
    {
      "epoch": 1.940795559666975,
      "grad_norm": 0.2078002691268921,
      "learning_rate": 2.6343500078376077e-05,
      "loss": 0.5277,
      "step": 9441
    },
    {
      "epoch": 1.9410011306403536,
      "grad_norm": 0.20735982060432434,
      "learning_rate": 2.6334411272344328e-05,
      "loss": 0.5188,
      "step": 9442
    },
    {
      "epoch": 1.9412067016137322,
      "grad_norm": 0.1942051202058792,
      "learning_rate": 2.63253233858789e-05,
      "loss": 0.5166,
      "step": 9443
    },
    {
      "epoch": 1.9414122725871108,
      "grad_norm": 0.1942778080701828,
      "learning_rate": 2.6316236419427502e-05,
      "loss": 0.5302,
      "step": 9444
    },
    {
      "epoch": 1.9416178435604894,
      "grad_norm": 0.19624213874340057,
      "learning_rate": 2.6307150373437803e-05,
      "loss": 0.5376,
      "step": 9445
    },
    {
      "epoch": 1.941823414533868,
      "grad_norm": 0.19899539649486542,
      "learning_rate": 2.629806524835743e-05,
      "loss": 0.5369,
      "step": 9446
    },
    {
      "epoch": 1.9420289855072463,
      "grad_norm": 0.1954500824213028,
      "learning_rate": 2.628898104463397e-05,
      "loss": 0.5101,
      "step": 9447
    },
    {
      "epoch": 1.942234556480625,
      "grad_norm": 0.17353855073451996,
      "learning_rate": 2.627989776271496e-05,
      "loss": 0.5164,
      "step": 9448
    },
    {
      "epoch": 1.9424401274540035,
      "grad_norm": 0.16081948578357697,
      "learning_rate": 2.6270815403047906e-05,
      "loss": 0.5429,
      "step": 9449
    },
    {
      "epoch": 1.942645698427382,
      "grad_norm": 0.19543206691741943,
      "learning_rate": 2.626173396608023e-05,
      "loss": 0.5165,
      "step": 9450
    },
    {
      "epoch": 1.9428512694007605,
      "grad_norm": 0.20097336173057556,
      "learning_rate": 2.6252653452259336e-05,
      "loss": 0.5329,
      "step": 9451
    },
    {
      "epoch": 1.943056840374139,
      "grad_norm": 0.20384319126605988,
      "learning_rate": 2.6243573862032566e-05,
      "loss": 0.5314,
      "step": 9452
    },
    {
      "epoch": 1.9432624113475176,
      "grad_norm": 0.19779393076896667,
      "learning_rate": 2.6234495195847262e-05,
      "loss": 0.489,
      "step": 9453
    },
    {
      "epoch": 1.9434679823208962,
      "grad_norm": 0.16127046942710876,
      "learning_rate": 2.6225417454150668e-05,
      "loss": 0.5033,
      "step": 9454
    },
    {
      "epoch": 1.9436735532942748,
      "grad_norm": 0.1624097228050232,
      "learning_rate": 2.6216340637389987e-05,
      "loss": 0.532,
      "step": 9455
    },
    {
      "epoch": 1.9438791242676534,
      "grad_norm": 0.16971097886562347,
      "learning_rate": 2.620726474601243e-05,
      "loss": 0.5058,
      "step": 9456
    },
    {
      "epoch": 1.944084695241032,
      "grad_norm": 0.12277817726135254,
      "learning_rate": 2.619818978046509e-05,
      "loss": 0.4925,
      "step": 9457
    },
    {
      "epoch": 1.9442902662144106,
      "grad_norm": 0.16644692420959473,
      "learning_rate": 2.618911574119507e-05,
      "loss": 0.5217,
      "step": 9458
    },
    {
      "epoch": 1.9444958371877892,
      "grad_norm": 0.16563105583190918,
      "learning_rate": 2.61800426286494e-05,
      "loss": 0.5091,
      "step": 9459
    },
    {
      "epoch": 1.9447014081611678,
      "grad_norm": 0.1673881858587265,
      "learning_rate": 2.6170970443275054e-05,
      "loss": 0.5416,
      "step": 9460
    },
    {
      "epoch": 1.9449069791345464,
      "grad_norm": 0.20645494759082794,
      "learning_rate": 2.6161899185518977e-05,
      "loss": 0.5182,
      "step": 9461
    },
    {
      "epoch": 1.9451125501079247,
      "grad_norm": 0.19935904443264008,
      "learning_rate": 2.615282885582809e-05,
      "loss": 0.5234,
      "step": 9462
    },
    {
      "epoch": 1.9453181210813033,
      "grad_norm": 0.1983654797077179,
      "learning_rate": 2.614375945464924e-05,
      "loss": 0.5292,
      "step": 9463
    },
    {
      "epoch": 1.945523692054682,
      "grad_norm": 0.20159868896007538,
      "learning_rate": 2.6134690982429228e-05,
      "loss": 0.5162,
      "step": 9464
    },
    {
      "epoch": 1.9457292630280605,
      "grad_norm": 0.2034175992012024,
      "learning_rate": 2.612562343961481e-05,
      "loss": 0.5495,
      "step": 9465
    },
    {
      "epoch": 1.9459348340014389,
      "grad_norm": 0.16713906824588776,
      "learning_rate": 2.611655682665271e-05,
      "loss": 0.5031,
      "step": 9466
    },
    {
      "epoch": 1.9461404049748174,
      "grad_norm": 0.1686525195837021,
      "learning_rate": 2.6107491143989593e-05,
      "loss": 0.5444,
      "step": 9467
    },
    {
      "epoch": 1.946345975948196,
      "grad_norm": 0.19990558922290802,
      "learning_rate": 2.6098426392072068e-05,
      "loss": 0.5149,
      "step": 9468
    },
    {
      "epoch": 1.9465515469215746,
      "grad_norm": 0.1923760622739792,
      "learning_rate": 2.608936257134675e-05,
      "loss": 0.5353,
      "step": 9469
    },
    {
      "epoch": 1.9467571178949532,
      "grad_norm": 0.20133623480796814,
      "learning_rate": 2.6080299682260142e-05,
      "loss": 0.5079,
      "step": 9470
    },
    {
      "epoch": 1.9469626888683318,
      "grad_norm": 0.19276608526706696,
      "learning_rate": 2.6071237725258744e-05,
      "loss": 0.5221,
      "step": 9471
    },
    {
      "epoch": 1.9471682598417104,
      "grad_norm": 0.20174479484558105,
      "learning_rate": 2.6062176700788986e-05,
      "loss": 0.5174,
      "step": 9472
    },
    {
      "epoch": 1.947373830815089,
      "grad_norm": 0.2010992169380188,
      "learning_rate": 2.605311660929725e-05,
      "loss": 0.5325,
      "step": 9473
    },
    {
      "epoch": 1.9475794017884676,
      "grad_norm": 0.16488604247570038,
      "learning_rate": 2.604405745122992e-05,
      "loss": 0.4957,
      "step": 9474
    },
    {
      "epoch": 1.9477849727618461,
      "grad_norm": 0.12317883968353271,
      "learning_rate": 2.6034999227033278e-05,
      "loss": 0.527,
      "step": 9475
    },
    {
      "epoch": 1.9479905437352247,
      "grad_norm": 0.16236087679862976,
      "learning_rate": 2.602594193715357e-05,
      "loss": 0.5246,
      "step": 9476
    },
    {
      "epoch": 1.948196114708603,
      "grad_norm": 0.20070423185825348,
      "learning_rate": 2.6016885582037027e-05,
      "loss": 0.5258,
      "step": 9477
    },
    {
      "epoch": 1.9484016856819817,
      "grad_norm": 0.20194244384765625,
      "learning_rate": 2.6007830162129808e-05,
      "loss": 0.5142,
      "step": 9478
    },
    {
      "epoch": 1.9486072566553603,
      "grad_norm": 0.20240890979766846,
      "learning_rate": 2.599877567787803e-05,
      "loss": 0.5443,
      "step": 9479
    },
    {
      "epoch": 1.9488128276287389,
      "grad_norm": 0.19648049771785736,
      "learning_rate": 2.598972212972776e-05,
      "loss": 0.534,
      "step": 9480
    },
    {
      "epoch": 1.9490183986021172,
      "grad_norm": 0.2065919041633606,
      "learning_rate": 2.5980669518125028e-05,
      "loss": 0.5381,
      "step": 9481
    },
    {
      "epoch": 1.9492239695754958,
      "grad_norm": 0.20330984890460968,
      "learning_rate": 2.59716178435158e-05,
      "loss": 0.5169,
      "step": 9482
    },
    {
      "epoch": 1.9494295405488744,
      "grad_norm": 0.20162275433540344,
      "learning_rate": 2.5962567106346034e-05,
      "loss": 0.521,
      "step": 9483
    },
    {
      "epoch": 1.949635111522253,
      "grad_norm": 0.16646580398082733,
      "learning_rate": 2.5953517307061608e-05,
      "loss": 0.5098,
      "step": 9484
    },
    {
      "epoch": 1.9498406824956316,
      "grad_norm": 0.16191188991069794,
      "learning_rate": 2.594446844610836e-05,
      "loss": 0.5327,
      "step": 9485
    },
    {
      "epoch": 1.9500462534690102,
      "grad_norm": 0.1962418407201767,
      "learning_rate": 2.593542052393209e-05,
      "loss": 0.5035,
      "step": 9486
    },
    {
      "epoch": 1.9502518244423888,
      "grad_norm": 0.16942986845970154,
      "learning_rate": 2.5926373540978536e-05,
      "loss": 0.4919,
      "step": 9487
    },
    {
      "epoch": 1.9504573954157673,
      "grad_norm": 0.16431602835655212,
      "learning_rate": 2.5917327497693413e-05,
      "loss": 0.5368,
      "step": 9488
    },
    {
      "epoch": 1.950662966389146,
      "grad_norm": 0.1935006082057953,
      "learning_rate": 2.590828239452235e-05,
      "loss": 0.5189,
      "step": 9489
    },
    {
      "epoch": 1.9508685373625245,
      "grad_norm": 0.20640498399734497,
      "learning_rate": 2.5899238231911006e-05,
      "loss": 0.4955,
      "step": 9490
    },
    {
      "epoch": 1.9510741083359031,
      "grad_norm": 0.19904139637947083,
      "learning_rate": 2.5890195010304913e-05,
      "loss": 0.5199,
      "step": 9491
    },
    {
      "epoch": 1.9512796793092815,
      "grad_norm": 0.19545705616474152,
      "learning_rate": 2.5881152730149588e-05,
      "loss": 0.491,
      "step": 9492
    },
    {
      "epoch": 1.95148525028266,
      "grad_norm": 0.203142449259758,
      "learning_rate": 2.5872111391890512e-05,
      "loss": 0.5364,
      "step": 9493
    },
    {
      "epoch": 1.9516908212560387,
      "grad_norm": 0.2026265263557434,
      "learning_rate": 2.586307099597308e-05,
      "loss": 0.5117,
      "step": 9494
    },
    {
      "epoch": 1.9518963922294172,
      "grad_norm": 0.1932077556848526,
      "learning_rate": 2.585403154284272e-05,
      "loss": 0.4905,
      "step": 9495
    },
    {
      "epoch": 1.9521019632027956,
      "grad_norm": 0.19804999232292175,
      "learning_rate": 2.5844993032944735e-05,
      "loss": 0.5318,
      "step": 9496
    },
    {
      "epoch": 1.9523075341761742,
      "grad_norm": 0.19540899991989136,
      "learning_rate": 2.58359554667244e-05,
      "loss": 0.5114,
      "step": 9497
    },
    {
      "epoch": 1.9525131051495528,
      "grad_norm": 0.1968623399734497,
      "learning_rate": 2.5826918844626975e-05,
      "loss": 0.5148,
      "step": 9498
    },
    {
      "epoch": 1.9527186761229314,
      "grad_norm": 0.19433245062828064,
      "learning_rate": 2.5817883167097644e-05,
      "loss": 0.5073,
      "step": 9499
    },
    {
      "epoch": 1.95292424709631,
      "grad_norm": 0.2015180140733719,
      "learning_rate": 2.580884843458156e-05,
      "loss": 0.5178,
      "step": 9500
    },
    {
      "epoch": 1.9531298180696886,
      "grad_norm": 0.199843630194664,
      "learning_rate": 2.579981464752381e-05,
      "loss": 0.4852,
      "step": 9501
    },
    {
      "epoch": 1.9533353890430671,
      "grad_norm": 0.16600465774536133,
      "learning_rate": 2.5790781806369435e-05,
      "loss": 0.4928,
      "step": 9502
    },
    {
      "epoch": 1.9535409600164457,
      "grad_norm": 0.16455240547657013,
      "learning_rate": 2.578174991156347e-05,
      "loss": 0.5443,
      "step": 9503
    },
    {
      "epoch": 1.9537465309898243,
      "grad_norm": 0.16569803655147552,
      "learning_rate": 2.5772718963550868e-05,
      "loss": 0.5102,
      "step": 9504
    },
    {
      "epoch": 1.953952101963203,
      "grad_norm": 0.1648106575012207,
      "learning_rate": 2.5763688962776526e-05,
      "loss": 0.5247,
      "step": 9505
    },
    {
      "epoch": 1.9541576729365815,
      "grad_norm": 0.20244595408439636,
      "learning_rate": 2.5754659909685322e-05,
      "loss": 0.5192,
      "step": 9506
    },
    {
      "epoch": 1.95436324390996,
      "grad_norm": 0.20293334126472473,
      "learning_rate": 2.5745631804722077e-05,
      "loss": 0.5294,
      "step": 9507
    },
    {
      "epoch": 1.9545688148833384,
      "grad_norm": 0.19975414872169495,
      "learning_rate": 2.5736604648331552e-05,
      "loss": 0.5245,
      "step": 9508
    },
    {
      "epoch": 1.954774385856717,
      "grad_norm": 0.19464215636253357,
      "learning_rate": 2.5727578440958465e-05,
      "loss": 0.5115,
      "step": 9509
    },
    {
      "epoch": 1.9549799568300956,
      "grad_norm": 0.19542162120342255,
      "learning_rate": 2.571855318304753e-05,
      "loss": 0.5251,
      "step": 9510
    },
    {
      "epoch": 1.955185527803474,
      "grad_norm": 0.19343827664852142,
      "learning_rate": 2.570952887504335e-05,
      "loss": 0.5204,
      "step": 9511
    },
    {
      "epoch": 1.9553910987768526,
      "grad_norm": 0.17137175798416138,
      "learning_rate": 2.5700505517390526e-05,
      "loss": 0.5097,
      "step": 9512
    },
    {
      "epoch": 1.9555966697502312,
      "grad_norm": 0.11898645013570786,
      "learning_rate": 2.569148311053358e-05,
      "loss": 0.5127,
      "step": 9513
    },
    {
      "epoch": 1.9558022407236098,
      "grad_norm": 0.12215547263622284,
      "learning_rate": 2.5682461654917025e-05,
      "loss": 0.5129,
      "step": 9514
    },
    {
      "epoch": 1.9560078116969883,
      "grad_norm": 0.1605924665927887,
      "learning_rate": 2.5673441150985286e-05,
      "loss": 0.508,
      "step": 9515
    },
    {
      "epoch": 1.956213382670367,
      "grad_norm": 0.16376885771751404,
      "learning_rate": 2.5664421599182757e-05,
      "loss": 0.4932,
      "step": 9516
    },
    {
      "epoch": 1.9564189536437455,
      "grad_norm": 0.15705506503582,
      "learning_rate": 2.5655402999953816e-05,
      "loss": 0.5217,
      "step": 9517
    },
    {
      "epoch": 1.956624524617124,
      "grad_norm": 0.19561244547367096,
      "learning_rate": 2.5646385353742732e-05,
      "loss": 0.5084,
      "step": 9518
    },
    {
      "epoch": 1.9568300955905027,
      "grad_norm": 0.19796496629714966,
      "learning_rate": 2.563736866099381e-05,
      "loss": 0.5076,
      "step": 9519
    },
    {
      "epoch": 1.9570356665638813,
      "grad_norm": 0.20186658203601837,
      "learning_rate": 2.562835292215123e-05,
      "loss": 0.5518,
      "step": 9520
    },
    {
      "epoch": 1.9572412375372599,
      "grad_norm": 0.16234740614891052,
      "learning_rate": 2.5619338137659155e-05,
      "loss": 0.4855,
      "step": 9521
    },
    {
      "epoch": 1.9574468085106385,
      "grad_norm": 0.1610114425420761,
      "learning_rate": 2.5610324307961708e-05,
      "loss": 0.5212,
      "step": 9522
    },
    {
      "epoch": 1.9576523794840168,
      "grad_norm": 0.19542771577835083,
      "learning_rate": 2.560131143350294e-05,
      "loss": 0.5029,
      "step": 9523
    },
    {
      "epoch": 1.9578579504573954,
      "grad_norm": 0.20270508527755737,
      "learning_rate": 2.55922995147269e-05,
      "loss": 0.5271,
      "step": 9524
    },
    {
      "epoch": 1.958063521430774,
      "grad_norm": 0.18990576267242432,
      "learning_rate": 2.5583288552077552e-05,
      "loss": 0.5104,
      "step": 9525
    },
    {
      "epoch": 1.9582690924041524,
      "grad_norm": 0.15766002237796783,
      "learning_rate": 2.5574278545998827e-05,
      "loss": 0.4951,
      "step": 9526
    },
    {
      "epoch": 1.958474663377531,
      "grad_norm": 0.16321411728858948,
      "learning_rate": 2.5565269496934602e-05,
      "loss": 0.5406,
      "step": 9527
    },
    {
      "epoch": 1.9586802343509095,
      "grad_norm": 0.2016243040561676,
      "learning_rate": 2.5556261405328712e-05,
      "loss": 0.5152,
      "step": 9528
    },
    {
      "epoch": 1.9588858053242881,
      "grad_norm": 0.17193591594696045,
      "learning_rate": 2.554725427162494e-05,
      "loss": 0.5029,
      "step": 9529
    },
    {
      "epoch": 1.9590913762976667,
      "grad_norm": 0.16781920194625854,
      "learning_rate": 2.553824809626701e-05,
      "loss": 0.5322,
      "step": 9530
    },
    {
      "epoch": 1.9592969472710453,
      "grad_norm": 0.17390578985214233,
      "learning_rate": 2.5529242879698655e-05,
      "loss": 0.5052,
      "step": 9531
    },
    {
      "epoch": 1.959502518244424,
      "grad_norm": 0.12395156174898148,
      "learning_rate": 2.552023862236349e-05,
      "loss": 0.4978,
      "step": 9532
    },
    {
      "epoch": 1.9597080892178025,
      "grad_norm": 0.16266000270843506,
      "learning_rate": 2.5511235324705127e-05,
      "loss": 0.5345,
      "step": 9533
    },
    {
      "epoch": 1.959913660191181,
      "grad_norm": 0.2078227996826172,
      "learning_rate": 2.5502232987167103e-05,
      "loss": 0.5167,
      "step": 9534
    },
    {
      "epoch": 1.9601192311645597,
      "grad_norm": 0.20280295610427856,
      "learning_rate": 2.549323161019293e-05,
      "loss": 0.5316,
      "step": 9535
    },
    {
      "epoch": 1.9603248021379382,
      "grad_norm": 0.20383380353450775,
      "learning_rate": 2.5484231194226058e-05,
      "loss": 0.5124,
      "step": 9536
    },
    {
      "epoch": 1.9605303731113168,
      "grad_norm": 0.19895561039447784,
      "learning_rate": 2.547523173970989e-05,
      "loss": 0.5198,
      "step": 9537
    },
    {
      "epoch": 1.9607359440846952,
      "grad_norm": 0.20123358070850372,
      "learning_rate": 2.546623324708781e-05,
      "loss": 0.5255,
      "step": 9538
    },
    {
      "epoch": 1.9609415150580738,
      "grad_norm": 0.2038145512342453,
      "learning_rate": 2.5457235716803115e-05,
      "loss": 0.5309,
      "step": 9539
    },
    {
      "epoch": 1.9611470860314524,
      "grad_norm": 0.20116189122200012,
      "learning_rate": 2.5448239149299055e-05,
      "loss": 0.5075,
      "step": 9540
    },
    {
      "epoch": 1.961352657004831,
      "grad_norm": 0.2058117836713791,
      "learning_rate": 2.5439243545018884e-05,
      "loss": 0.533,
      "step": 9541
    },
    {
      "epoch": 1.9615582279782093,
      "grad_norm": 0.2008356899023056,
      "learning_rate": 2.543024890440576e-05,
      "loss": 0.5321,
      "step": 9542
    },
    {
      "epoch": 1.961763798951588,
      "grad_norm": 0.19685760140419006,
      "learning_rate": 2.5421255227902804e-05,
      "loss": 0.4969,
      "step": 9543
    },
    {
      "epoch": 1.9619693699249665,
      "grad_norm": 0.1951378434896469,
      "learning_rate": 2.541226251595307e-05,
      "loss": 0.4999,
      "step": 9544
    },
    {
      "epoch": 1.962174940898345,
      "grad_norm": 0.19807179272174835,
      "learning_rate": 2.5403270768999633e-05,
      "loss": 0.5146,
      "step": 9545
    },
    {
      "epoch": 1.9623805118717237,
      "grad_norm": 0.19595085084438324,
      "learning_rate": 2.539427998748544e-05,
      "loss": 0.5223,
      "step": 9546
    },
    {
      "epoch": 1.9625860828451023,
      "grad_norm": 0.19711394608020782,
      "learning_rate": 2.5385290171853446e-05,
      "loss": 0.5196,
      "step": 9547
    },
    {
      "epoch": 1.9627916538184809,
      "grad_norm": 0.20173287391662598,
      "learning_rate": 2.5376301322546523e-05,
      "loss": 0.5277,
      "step": 9548
    },
    {
      "epoch": 1.9629972247918595,
      "grad_norm": 0.20318298041820526,
      "learning_rate": 2.5367313440007513e-05,
      "loss": 0.5174,
      "step": 9549
    },
    {
      "epoch": 1.963202795765238,
      "grad_norm": 0.20232440531253815,
      "learning_rate": 2.5358326524679206e-05,
      "loss": 0.5325,
      "step": 9550
    },
    {
      "epoch": 1.9634083667386166,
      "grad_norm": 0.2035774439573288,
      "learning_rate": 2.534934057700433e-05,
      "loss": 0.516,
      "step": 9551
    },
    {
      "epoch": 1.9636139377119952,
      "grad_norm": 0.20142172276973724,
      "learning_rate": 2.534035559742561e-05,
      "loss": 0.5189,
      "step": 9552
    },
    {
      "epoch": 1.9638195086853736,
      "grad_norm": 0.2012597769498825,
      "learning_rate": 2.5331371586385683e-05,
      "loss": 0.5166,
      "step": 9553
    },
    {
      "epoch": 1.9640250796587522,
      "grad_norm": 0.1986485868692398,
      "learning_rate": 2.532238854432715e-05,
      "loss": 0.5134,
      "step": 9554
    },
    {
      "epoch": 1.9642306506321308,
      "grad_norm": 0.20366504788398743,
      "learning_rate": 2.531340647169256e-05,
      "loss": 0.5146,
      "step": 9555
    },
    {
      "epoch": 1.9644362216055093,
      "grad_norm": 0.19817805290222168,
      "learning_rate": 2.530442536892442e-05,
      "loss": 0.4911,
      "step": 9556
    },
    {
      "epoch": 1.9646417925788877,
      "grad_norm": 0.20008954405784607,
      "learning_rate": 2.529544523646518e-05,
      "loss": 0.574,
      "step": 9557
    },
    {
      "epoch": 1.9648473635522663,
      "grad_norm": 0.2054361253976822,
      "learning_rate": 2.5286466074757237e-05,
      "loss": 0.5204,
      "step": 9558
    },
    {
      "epoch": 1.965052934525645,
      "grad_norm": 0.19738180935382843,
      "learning_rate": 2.527748788424299e-05,
      "loss": 0.5198,
      "step": 9559
    },
    {
      "epoch": 1.9652585054990235,
      "grad_norm": 0.20528697967529297,
      "learning_rate": 2.526851066536473e-05,
      "loss": 0.5439,
      "step": 9560
    },
    {
      "epoch": 1.965464076472402,
      "grad_norm": 0.21813803911209106,
      "learning_rate": 2.5259534418564713e-05,
      "loss": 0.5442,
      "step": 9561
    },
    {
      "epoch": 1.9656696474457807,
      "grad_norm": 0.20172588527202606,
      "learning_rate": 2.5250559144285174e-05,
      "loss": 0.5133,
      "step": 9562
    },
    {
      "epoch": 1.9658752184191592,
      "grad_norm": 0.19807198643684387,
      "learning_rate": 2.5241584842968285e-05,
      "loss": 0.5051,
      "step": 9563
    },
    {
      "epoch": 1.9660807893925378,
      "grad_norm": 0.2078738957643509,
      "learning_rate": 2.5232611515056168e-05,
      "loss": 0.5716,
      "step": 9564
    },
    {
      "epoch": 1.9662863603659164,
      "grad_norm": 0.19806239008903503,
      "learning_rate": 2.522363916099086e-05,
      "loss": 0.5293,
      "step": 9565
    },
    {
      "epoch": 1.966491931339295,
      "grad_norm": 0.20789627730846405,
      "learning_rate": 2.5214667781214436e-05,
      "loss": 0.5446,
      "step": 9566
    },
    {
      "epoch": 1.9666975023126736,
      "grad_norm": 0.20237933099269867,
      "learning_rate": 2.5205697376168853e-05,
      "loss": 0.5286,
      "step": 9567
    },
    {
      "epoch": 1.966903073286052,
      "grad_norm": 0.2071990966796875,
      "learning_rate": 2.5196727946296043e-05,
      "loss": 0.5321,
      "step": 9568
    },
    {
      "epoch": 1.9671086442594305,
      "grad_norm": 0.19845061004161835,
      "learning_rate": 2.518775949203789e-05,
      "loss": 0.5272,
      "step": 9569
    },
    {
      "epoch": 1.9673142152328091,
      "grad_norm": 0.2033272236585617,
      "learning_rate": 2.5178792013836224e-05,
      "loss": 0.513,
      "step": 9570
    },
    {
      "epoch": 1.9675197862061877,
      "grad_norm": 0.20528094470500946,
      "learning_rate": 2.5169825512132833e-05,
      "loss": 0.5322,
      "step": 9571
    },
    {
      "epoch": 1.967725357179566,
      "grad_norm": 0.19687287509441376,
      "learning_rate": 2.516085998736943e-05,
      "loss": 0.5129,
      "step": 9572
    },
    {
      "epoch": 1.9679309281529447,
      "grad_norm": 0.16771896183490753,
      "learning_rate": 2.5151895439987746e-05,
      "loss": 0.5116,
      "step": 9573
    },
    {
      "epoch": 1.9681364991263233,
      "grad_norm": 0.16580241918563843,
      "learning_rate": 2.5142931870429404e-05,
      "loss": 0.527,
      "step": 9574
    },
    {
      "epoch": 1.9683420700997019,
      "grad_norm": 0.20436574518680573,
      "learning_rate": 2.5133969279136e-05,
      "loss": 0.521,
      "step": 9575
    },
    {
      "epoch": 1.9685476410730804,
      "grad_norm": 0.1928415149450302,
      "learning_rate": 2.5125007666549074e-05,
      "loss": 0.5062,
      "step": 9576
    },
    {
      "epoch": 1.968753212046459,
      "grad_norm": 0.19831101596355438,
      "learning_rate": 2.5116047033110125e-05,
      "loss": 0.5124,
      "step": 9577
    },
    {
      "epoch": 1.9689587830198376,
      "grad_norm": 0.1986418068408966,
      "learning_rate": 2.510708737926058e-05,
      "loss": 0.547,
      "step": 9578
    },
    {
      "epoch": 1.9691643539932162,
      "grad_norm": 0.19999928772449493,
      "learning_rate": 2.509812870544189e-05,
      "loss": 0.5286,
      "step": 9579
    },
    {
      "epoch": 1.9693699249665948,
      "grad_norm": 0.1935226321220398,
      "learning_rate": 2.5089171012095367e-05,
      "loss": 0.5285,
      "step": 9580
    },
    {
      "epoch": 1.9695754959399734,
      "grad_norm": 0.18808215856552124,
      "learning_rate": 2.5080214299662322e-05,
      "loss": 0.5051,
      "step": 9581
    },
    {
      "epoch": 1.969781066913352,
      "grad_norm": 0.20196162164211273,
      "learning_rate": 2.507125856858401e-05,
      "loss": 0.5222,
      "step": 9582
    },
    {
      "epoch": 1.9699866378867303,
      "grad_norm": 0.20096677541732788,
      "learning_rate": 2.5062303819301645e-05,
      "loss": 0.5405,
      "step": 9583
    },
    {
      "epoch": 1.970192208860109,
      "grad_norm": 0.20000407099723816,
      "learning_rate": 2.5053350052256393e-05,
      "loss": 0.5173,
      "step": 9584
    },
    {
      "epoch": 1.9703977798334875,
      "grad_norm": 0.19387024641036987,
      "learning_rate": 2.5044397267889327e-05,
      "loss": 0.4956,
      "step": 9585
    },
    {
      "epoch": 1.970603350806866,
      "grad_norm": 0.16809746623039246,
      "learning_rate": 2.5035445466641558e-05,
      "loss": 0.5046,
      "step": 9586
    },
    {
      "epoch": 1.9708089217802445,
      "grad_norm": 0.16820058226585388,
      "learning_rate": 2.502649464895408e-05,
      "loss": 0.5309,
      "step": 9587
    },
    {
      "epoch": 1.971014492753623,
      "grad_norm": 0.21059322357177734,
      "learning_rate": 2.501754481526785e-05,
      "loss": 0.5047,
      "step": 9588
    },
    {
      "epoch": 1.9712200637270016,
      "grad_norm": 0.20109686255455017,
      "learning_rate": 2.5008595966023786e-05,
      "loss": 0.5069,
      "step": 9589
    },
    {
      "epoch": 1.9714256347003802,
      "grad_norm": 0.20082977414131165,
      "learning_rate": 2.4999648101662763e-05,
      "loss": 0.5329,
      "step": 9590
    },
    {
      "epoch": 1.9716312056737588,
      "grad_norm": 0.1989169418811798,
      "learning_rate": 2.4990701222625602e-05,
      "loss": 0.5102,
      "step": 9591
    },
    {
      "epoch": 1.9718367766471374,
      "grad_norm": 0.19520479440689087,
      "learning_rate": 2.4981755329353043e-05,
      "loss": 0.5116,
      "step": 9592
    },
    {
      "epoch": 1.972042347620516,
      "grad_norm": 0.17147661745548248,
      "learning_rate": 2.4972810422285853e-05,
      "loss": 0.4902,
      "step": 9593
    },
    {
      "epoch": 1.9722479185938946,
      "grad_norm": 0.16414588689804077,
      "learning_rate": 2.496386650186469e-05,
      "loss": 0.5109,
      "step": 9594
    },
    {
      "epoch": 1.9724534895672732,
      "grad_norm": 0.20732592046260834,
      "learning_rate": 2.4954923568530175e-05,
      "loss": 0.5128,
      "step": 9595
    },
    {
      "epoch": 1.9726590605406518,
      "grad_norm": 0.19795072078704834,
      "learning_rate": 2.4945981622722878e-05,
      "loss": 0.5122,
      "step": 9596
    },
    {
      "epoch": 1.9728646315140304,
      "grad_norm": 0.2000289112329483,
      "learning_rate": 2.493704066488334e-05,
      "loss": 0.5282,
      "step": 9597
    },
    {
      "epoch": 1.973070202487409,
      "grad_norm": 0.1769014447927475,
      "learning_rate": 2.4928100695452037e-05,
      "loss": 0.4991,
      "step": 9598
    },
    {
      "epoch": 1.9732757734607873,
      "grad_norm": 0.16739298403263092,
      "learning_rate": 2.4919161714869377e-05,
      "loss": 0.507,
      "step": 9599
    },
    {
      "epoch": 1.973481344434166,
      "grad_norm": 0.199861079454422,
      "learning_rate": 2.4910223723575778e-05,
      "loss": 0.5178,
      "step": 9600
    },
    {
      "epoch": 1.9736869154075445,
      "grad_norm": 0.16744980216026306,
      "learning_rate": 2.490128672201156e-05,
      "loss": 0.4671,
      "step": 9601
    },
    {
      "epoch": 1.9738924863809229,
      "grad_norm": 0.16180412471294403,
      "learning_rate": 2.4892350710617003e-05,
      "loss": 0.5274,
      "step": 9602
    },
    {
      "epoch": 1.9740980573543014,
      "grad_norm": 0.2564503848552704,
      "learning_rate": 2.488341568983232e-05,
      "loss": 0.5285,
      "step": 9603
    },
    {
      "epoch": 1.97430362832768,
      "grad_norm": 0.16161498427391052,
      "learning_rate": 2.4874481660097748e-05,
      "loss": 0.4968,
      "step": 9604
    },
    {
      "epoch": 1.9745091993010586,
      "grad_norm": 0.11919713020324707,
      "learning_rate": 2.4865548621853394e-05,
      "loss": 0.5128,
      "step": 9605
    },
    {
      "epoch": 1.9747147702744372,
      "grad_norm": 0.16267365217208862,
      "learning_rate": 2.4856616575539334e-05,
      "loss": 0.5247,
      "step": 9606
    },
    {
      "epoch": 1.9749203412478158,
      "grad_norm": 0.16840054094791412,
      "learning_rate": 2.4847685521595643e-05,
      "loss": 0.4839,
      "step": 9607
    },
    {
      "epoch": 1.9751259122211944,
      "grad_norm": 0.17324216663837433,
      "learning_rate": 2.48387554604623e-05,
      "loss": 0.5092,
      "step": 9608
    },
    {
      "epoch": 1.975331483194573,
      "grad_norm": 0.16955405473709106,
      "learning_rate": 2.4829826392579227e-05,
      "loss": 0.4955,
      "step": 9609
    },
    {
      "epoch": 1.9755370541679516,
      "grad_norm": 0.16968326270580292,
      "learning_rate": 2.4820898318386345e-05,
      "loss": 0.5285,
      "step": 9610
    },
    {
      "epoch": 1.9757426251413301,
      "grad_norm": 0.2073184996843338,
      "learning_rate": 2.481197123832348e-05,
      "loss": 0.5258,
      "step": 9611
    },
    {
      "epoch": 1.9759481961147087,
      "grad_norm": 0.2012372761964798,
      "learning_rate": 2.4803045152830442e-05,
      "loss": 0.5157,
      "step": 9612
    },
    {
      "epoch": 1.9761537670880873,
      "grad_norm": 0.1959368884563446,
      "learning_rate": 2.4794120062346946e-05,
      "loss": 0.5346,
      "step": 9613
    },
    {
      "epoch": 1.9763593380614657,
      "grad_norm": 0.19632303714752197,
      "learning_rate": 2.478519596731273e-05,
      "loss": 0.5138,
      "step": 9614
    },
    {
      "epoch": 1.9765649090348443,
      "grad_norm": 0.19955292344093323,
      "learning_rate": 2.4776272868167424e-05,
      "loss": 0.535,
      "step": 9615
    },
    {
      "epoch": 1.9767704800082229,
      "grad_norm": 0.19841422140598297,
      "learning_rate": 2.476735076535063e-05,
      "loss": 0.5054,
      "step": 9616
    },
    {
      "epoch": 1.9769760509816015,
      "grad_norm": 0.19676409661769867,
      "learning_rate": 2.4758429659301894e-05,
      "loss": 0.5238,
      "step": 9617
    },
    {
      "epoch": 1.9771816219549798,
      "grad_norm": 0.19223178923130035,
      "learning_rate": 2.4749509550460724e-05,
      "loss": 0.5013,
      "step": 9618
    },
    {
      "epoch": 1.9773871929283584,
      "grad_norm": 0.20213696360588074,
      "learning_rate": 2.474059043926656e-05,
      "loss": 0.5086,
      "step": 9619
    },
    {
      "epoch": 1.977592763901737,
      "grad_norm": 0.2001548409461975,
      "learning_rate": 2.4731672326158804e-05,
      "loss": 0.4985,
      "step": 9620
    },
    {
      "epoch": 1.9777983348751156,
      "grad_norm": 0.20245525240898132,
      "learning_rate": 2.4722755211576836e-05,
      "loss": 0.5327,
      "step": 9621
    },
    {
      "epoch": 1.9780039058484942,
      "grad_norm": 0.2233567237854004,
      "learning_rate": 2.4713839095959936e-05,
      "loss": 0.5095,
      "step": 9622
    },
    {
      "epoch": 1.9782094768218728,
      "grad_norm": 0.19729016721248627,
      "learning_rate": 2.470492397974737e-05,
      "loss": 0.4831,
      "step": 9623
    },
    {
      "epoch": 1.9784150477952513,
      "grad_norm": 0.20027440786361694,
      "learning_rate": 2.4696009863378342e-05,
      "loss": 0.5315,
      "step": 9624
    },
    {
      "epoch": 1.97862061876863,
      "grad_norm": 0.20336763560771942,
      "learning_rate": 2.4687096747291987e-05,
      "loss": 0.5019,
      "step": 9625
    },
    {
      "epoch": 1.9788261897420085,
      "grad_norm": 0.16322872042655945,
      "learning_rate": 2.4678184631927453e-05,
      "loss": 0.4873,
      "step": 9626
    },
    {
      "epoch": 1.9790317607153871,
      "grad_norm": 0.1632460653781891,
      "learning_rate": 2.4669273517723777e-05,
      "loss": 0.524,
      "step": 9627
    },
    {
      "epoch": 1.9792373316887657,
      "grad_norm": 0.19479408860206604,
      "learning_rate": 2.466036340511995e-05,
      "loss": 0.5186,
      "step": 9628
    },
    {
      "epoch": 1.979442902662144,
      "grad_norm": 0.19414758682250977,
      "learning_rate": 2.4651454294554972e-05,
      "loss": 0.5153,
      "step": 9629
    },
    {
      "epoch": 1.9796484736355227,
      "grad_norm": 0.1960826814174652,
      "learning_rate": 2.464254618646773e-05,
      "loss": 0.5356,
      "step": 9630
    },
    {
      "epoch": 1.9798540446089012,
      "grad_norm": 0.19612587988376617,
      "learning_rate": 2.4633639081297088e-05,
      "loss": 0.5033,
      "step": 9631
    },
    {
      "epoch": 1.9800596155822798,
      "grad_norm": 1.9576839208602905,
      "learning_rate": 2.462473297948186e-05,
      "loss": 0.5465,
      "step": 9632
    },
    {
      "epoch": 1.9802651865556582,
      "grad_norm": 0.2153571993112564,
      "learning_rate": 2.4615827881460797e-05,
      "loss": 0.531,
      "step": 9633
    },
    {
      "epoch": 1.9804707575290368,
      "grad_norm": 0.20636354386806488,
      "learning_rate": 2.4606923787672607e-05,
      "loss": 0.5394,
      "step": 9634
    },
    {
      "epoch": 1.9806763285024154,
      "grad_norm": 0.19910024106502533,
      "learning_rate": 2.4598020698555975e-05,
      "loss": 0.5212,
      "step": 9635
    },
    {
      "epoch": 1.980881899475794,
      "grad_norm": 0.19475533068180084,
      "learning_rate": 2.458911861454951e-05,
      "loss": 0.5175,
      "step": 9636
    },
    {
      "epoch": 1.9810874704491725,
      "grad_norm": 0.20673874020576477,
      "learning_rate": 2.4580217536091772e-05,
      "loss": 0.5258,
      "step": 9637
    },
    {
      "epoch": 1.9812930414225511,
      "grad_norm": 0.20791196823120117,
      "learning_rate": 2.4571317463621278e-05,
      "loss": 0.5278,
      "step": 9638
    },
    {
      "epoch": 1.9814986123959297,
      "grad_norm": 0.20311853289604187,
      "learning_rate": 2.4562418397576482e-05,
      "loss": 0.5103,
      "step": 9639
    },
    {
      "epoch": 1.9817041833693083,
      "grad_norm": 0.18043197691440582,
      "learning_rate": 2.4553520338395808e-05,
      "loss": 0.5009,
      "step": 9640
    },
    {
      "epoch": 1.981909754342687,
      "grad_norm": 0.16400253772735596,
      "learning_rate": 2.45446232865176e-05,
      "loss": 0.5219,
      "step": 9641
    },
    {
      "epoch": 1.9821153253160655,
      "grad_norm": 0.20592088997364044,
      "learning_rate": 2.453572724238022e-05,
      "loss": 0.5247,
      "step": 9642
    },
    {
      "epoch": 1.982320896289444,
      "grad_norm": 0.22053800523281097,
      "learning_rate": 2.45268322064219e-05,
      "loss": 0.5272,
      "step": 9643
    },
    {
      "epoch": 1.9825264672628224,
      "grad_norm": 0.21963202953338623,
      "learning_rate": 2.451793817908087e-05,
      "loss": 0.51,
      "step": 9644
    },
    {
      "epoch": 1.982732038236201,
      "grad_norm": 0.21020135283470154,
      "learning_rate": 2.4509045160795295e-05,
      "loss": 0.5338,
      "step": 9645
    },
    {
      "epoch": 1.9829376092095796,
      "grad_norm": 0.17611977458000183,
      "learning_rate": 2.450015315200327e-05,
      "loss": 0.5083,
      "step": 9646
    },
    {
      "epoch": 1.9831431801829582,
      "grad_norm": 0.16838988661766052,
      "learning_rate": 2.44912621531429e-05,
      "loss": 0.5075,
      "step": 9647
    },
    {
      "epoch": 1.9833487511563366,
      "grad_norm": 0.20639371871948242,
      "learning_rate": 2.448237216465219e-05,
      "loss": 0.5329,
      "step": 9648
    },
    {
      "epoch": 1.9835543221297152,
      "grad_norm": 0.20562691986560822,
      "learning_rate": 2.4473483186969085e-05,
      "loss": 0.5001,
      "step": 9649
    },
    {
      "epoch": 1.9837598931030938,
      "grad_norm": 0.20028932392597198,
      "learning_rate": 2.4464595220531542e-05,
      "loss": 0.5145,
      "step": 9650
    },
    {
      "epoch": 1.9839654640764723,
      "grad_norm": 0.205689936876297,
      "learning_rate": 2.4455708265777406e-05,
      "loss": 0.5347,
      "step": 9651
    },
    {
      "epoch": 1.984171035049851,
      "grad_norm": 0.20499835908412933,
      "learning_rate": 2.4446822323144497e-05,
      "loss": 0.5239,
      "step": 9652
    },
    {
      "epoch": 1.9843766060232295,
      "grad_norm": 0.20297472178936005,
      "learning_rate": 2.4437937393070596e-05,
      "loss": 0.5307,
      "step": 9653
    },
    {
      "epoch": 1.984582176996608,
      "grad_norm": 0.1985624134540558,
      "learning_rate": 2.442905347599339e-05,
      "loss": 0.5076,
      "step": 9654
    },
    {
      "epoch": 1.9847877479699867,
      "grad_norm": 0.20252910256385803,
      "learning_rate": 2.442017057235059e-05,
      "loss": 0.528,
      "step": 9655
    },
    {
      "epoch": 1.9849933189433653,
      "grad_norm": 0.2101006656885147,
      "learning_rate": 2.441128868257979e-05,
      "loss": 0.5188,
      "step": 9656
    },
    {
      "epoch": 1.9851988899167439,
      "grad_norm": 0.1986953169107437,
      "learning_rate": 2.4402407807118577e-05,
      "loss": 0.5267,
      "step": 9657
    },
    {
      "epoch": 1.9854044608901225,
      "grad_norm": 0.20518286526203156,
      "learning_rate": 2.4393527946404447e-05,
      "loss": 0.5362,
      "step": 9658
    },
    {
      "epoch": 1.9856100318635008,
      "grad_norm": 0.20495247840881348,
      "learning_rate": 2.438464910087489e-05,
      "loss": 0.5306,
      "step": 9659
    },
    {
      "epoch": 1.9858156028368794,
      "grad_norm": 0.20301851630210876,
      "learning_rate": 2.437577127096731e-05,
      "loss": 0.5106,
      "step": 9660
    },
    {
      "epoch": 1.986021173810258,
      "grad_norm": 0.17709769308567047,
      "learning_rate": 2.4366894457119066e-05,
      "loss": 0.4954,
      "step": 9661
    },
    {
      "epoch": 1.9862267447836366,
      "grad_norm": 0.1601599156856537,
      "learning_rate": 2.4358018659767514e-05,
      "loss": 0.4917,
      "step": 9662
    },
    {
      "epoch": 1.986432315757015,
      "grad_norm": 0.19886882603168488,
      "learning_rate": 2.4349143879349898e-05,
      "loss": 0.5363,
      "step": 9663
    },
    {
      "epoch": 1.9866378867303935,
      "grad_norm": 0.16597384214401245,
      "learning_rate": 2.434027011630344e-05,
      "loss": 0.5121,
      "step": 9664
    },
    {
      "epoch": 1.9868434577037721,
      "grad_norm": 0.163084477186203,
      "learning_rate": 2.4331397371065314e-05,
      "loss": 0.5358,
      "step": 9665
    },
    {
      "epoch": 1.9870490286771507,
      "grad_norm": 0.19397611916065216,
      "learning_rate": 2.4322525644072636e-05,
      "loss": 0.4968,
      "step": 9666
    },
    {
      "epoch": 1.9872545996505293,
      "grad_norm": 0.19655869901180267,
      "learning_rate": 2.4313654935762452e-05,
      "loss": 0.5081,
      "step": 9667
    },
    {
      "epoch": 1.987460170623908,
      "grad_norm": 0.20611554384231567,
      "learning_rate": 2.4304785246571817e-05,
      "loss": 0.5285,
      "step": 9668
    },
    {
      "epoch": 1.9876657415972865,
      "grad_norm": 0.20290662348270416,
      "learning_rate": 2.4295916576937687e-05,
      "loss": 0.5153,
      "step": 9669
    },
    {
      "epoch": 1.987871312570665,
      "grad_norm": 0.20132143795490265,
      "learning_rate": 2.428704892729696e-05,
      "loss": 0.5342,
      "step": 9670
    },
    {
      "epoch": 1.9880768835440437,
      "grad_norm": 0.20216117799282074,
      "learning_rate": 2.4278182298086535e-05,
      "loss": 0.517,
      "step": 9671
    },
    {
      "epoch": 1.9882824545174222,
      "grad_norm": 0.19936327636241913,
      "learning_rate": 2.426931668974322e-05,
      "loss": 0.5252,
      "step": 9672
    },
    {
      "epoch": 1.9884880254908008,
      "grad_norm": 0.33940476179122925,
      "learning_rate": 2.426045210270377e-05,
      "loss": 0.5247,
      "step": 9673
    },
    {
      "epoch": 1.9886935964641792,
      "grad_norm": 0.20160600543022156,
      "learning_rate": 2.4251588537404913e-05,
      "loss": 0.5223,
      "step": 9674
    },
    {
      "epoch": 1.9888991674375578,
      "grad_norm": 0.2030128389596939,
      "learning_rate": 2.4242725994283292e-05,
      "loss": 0.5135,
      "step": 9675
    },
    {
      "epoch": 1.9891047384109364,
      "grad_norm": 0.22344298660755157,
      "learning_rate": 2.4233864473775556e-05,
      "loss": 0.5226,
      "step": 9676
    },
    {
      "epoch": 1.989310309384315,
      "grad_norm": 0.20270341634750366,
      "learning_rate": 2.422500397631826e-05,
      "loss": 0.5173,
      "step": 9677
    },
    {
      "epoch": 1.9895158803576933,
      "grad_norm": 0.17036183178424835,
      "learning_rate": 2.421614450234792e-05,
      "loss": 0.4997,
      "step": 9678
    },
    {
      "epoch": 1.989721451331072,
      "grad_norm": 0.16131217777729034,
      "learning_rate": 2.420728605230099e-05,
      "loss": 0.5233,
      "step": 9679
    },
    {
      "epoch": 1.9899270223044505,
      "grad_norm": 0.19689194858074188,
      "learning_rate": 2.4198428626613895e-05,
      "loss": 0.5235,
      "step": 9680
    },
    {
      "epoch": 1.990132593277829,
      "grad_norm": 0.1997881680727005,
      "learning_rate": 2.418957222572299e-05,
      "loss": 0.5469,
      "step": 9681
    },
    {
      "epoch": 1.9903381642512077,
      "grad_norm": 0.1594388335943222,
      "learning_rate": 2.4180716850064584e-05,
      "loss": 0.4688,
      "step": 9682
    },
    {
      "epoch": 1.9905437352245863,
      "grad_norm": 0.1173081025481224,
      "learning_rate": 2.4171862500074968e-05,
      "loss": 0.5151,
      "step": 9683
    },
    {
      "epoch": 1.9907493061979649,
      "grad_norm": 0.16193978488445282,
      "learning_rate": 2.416300917619033e-05,
      "loss": 0.507,
      "step": 9684
    },
    {
      "epoch": 1.9909548771713435,
      "grad_norm": 0.19650469720363617,
      "learning_rate": 2.415415687884684e-05,
      "loss": 0.512,
      "step": 9685
    },
    {
      "epoch": 1.991160448144722,
      "grad_norm": 0.19806897640228271,
      "learning_rate": 2.414530560848061e-05,
      "loss": 0.5165,
      "step": 9686
    },
    {
      "epoch": 1.9913660191181006,
      "grad_norm": 0.20564566552639008,
      "learning_rate": 2.4136455365527692e-05,
      "loss": 0.5088,
      "step": 9687
    },
    {
      "epoch": 1.9915715900914792,
      "grad_norm": 0.20067964494228363,
      "learning_rate": 2.412760615042411e-05,
      "loss": 0.5163,
      "step": 9688
    },
    {
      "epoch": 1.9917771610648578,
      "grad_norm": 0.20195259153842926,
      "learning_rate": 2.4118757963605788e-05,
      "loss": 0.5013,
      "step": 9689
    },
    {
      "epoch": 1.9919827320382362,
      "grad_norm": 0.2007036656141281,
      "learning_rate": 2.410991080550869e-05,
      "loss": 0.5301,
      "step": 9690
    },
    {
      "epoch": 1.9921883030116148,
      "grad_norm": 0.16521452367305756,
      "learning_rate": 2.4101064676568624e-05,
      "loss": 0.4947,
      "step": 9691
    },
    {
      "epoch": 1.9923938739849933,
      "grad_norm": 0.16318975389003754,
      "learning_rate": 2.4092219577221435e-05,
      "loss": 0.5409,
      "step": 9692
    },
    {
      "epoch": 1.9925994449583717,
      "grad_norm": 0.20644515752792358,
      "learning_rate": 2.4083375507902872e-05,
      "loss": 0.5451,
      "step": 9693
    },
    {
      "epoch": 1.9928050159317503,
      "grad_norm": 0.19570566713809967,
      "learning_rate": 2.407453246904863e-05,
      "loss": 0.5005,
      "step": 9694
    },
    {
      "epoch": 1.993010586905129,
      "grad_norm": 0.19532164931297302,
      "learning_rate": 2.4065690461094367e-05,
      "loss": 0.5377,
      "step": 9695
    },
    {
      "epoch": 1.9932161578785075,
      "grad_norm": 0.20121091604232788,
      "learning_rate": 2.405684948447567e-05,
      "loss": 0.5096,
      "step": 9696
    },
    {
      "epoch": 1.993421728851886,
      "grad_norm": 0.1667921096086502,
      "learning_rate": 2.4048009539628128e-05,
      "loss": 0.5165,
      "step": 9697
    },
    {
      "epoch": 1.9936272998252647,
      "grad_norm": 0.12459738552570343,
      "learning_rate": 2.403917062698723e-05,
      "loss": 0.5162,
      "step": 9698
    },
    {
      "epoch": 1.9938328707986432,
      "grad_norm": 0.16275346279144287,
      "learning_rate": 2.4030332746988426e-05,
      "loss": 0.513,
      "step": 9699
    },
    {
      "epoch": 1.9940384417720218,
      "grad_norm": 0.2167256772518158,
      "learning_rate": 2.4021495900067113e-05,
      "loss": 0.5138,
      "step": 9700
    },
    {
      "epoch": 1.9942440127454004,
      "grad_norm": 0.20247885584831238,
      "learning_rate": 2.4012660086658642e-05,
      "loss": 0.5086,
      "step": 9701
    },
    {
      "epoch": 1.994449583718779,
      "grad_norm": 0.20237302780151367,
      "learning_rate": 2.400382530719832e-05,
      "loss": 0.4994,
      "step": 9702
    },
    {
      "epoch": 1.9946551546921576,
      "grad_norm": 0.193708136677742,
      "learning_rate": 2.3994991562121362e-05,
      "loss": 0.5112,
      "step": 9703
    },
    {
      "epoch": 1.9948607256655362,
      "grad_norm": 0.20271430909633636,
      "learning_rate": 2.3986158851863016e-05,
      "loss": 0.5148,
      "step": 9704
    },
    {
      "epoch": 1.9950662966389145,
      "grad_norm": 0.16858288645744324,
      "learning_rate": 2.39773271768584e-05,
      "loss": 0.502,
      "step": 9705
    },
    {
      "epoch": 1.9952718676122931,
      "grad_norm": 0.1224452555179596,
      "learning_rate": 2.3968496537542624e-05,
      "loss": 0.5069,
      "step": 9706
    },
    {
      "epoch": 1.9954774385856717,
      "grad_norm": 0.1615760177373886,
      "learning_rate": 2.3959666934350715e-05,
      "loss": 0.5327,
      "step": 9707
    },
    {
      "epoch": 1.9956830095590503,
      "grad_norm": 0.19293002784252167,
      "learning_rate": 2.3950838367717675e-05,
      "loss": 0.5051,
      "step": 9708
    },
    {
      "epoch": 1.9958885805324287,
      "grad_norm": 0.20506036281585693,
      "learning_rate": 2.394201083807845e-05,
      "loss": 0.5306,
      "step": 9709
    },
    {
      "epoch": 1.9960941515058073,
      "grad_norm": 0.19566957652568817,
      "learning_rate": 2.3933184345867902e-05,
      "loss": 0.5146,
      "step": 9710
    },
    {
      "epoch": 1.9962997224791859,
      "grad_norm": 0.19693787395954132,
      "learning_rate": 2.3924358891520916e-05,
      "loss": 0.5098,
      "step": 9711
    },
    {
      "epoch": 1.9965052934525644,
      "grad_norm": 0.20601771771907806,
      "learning_rate": 2.391553447547226e-05,
      "loss": 0.5345,
      "step": 9712
    },
    {
      "epoch": 1.996710864425943,
      "grad_norm": 0.19721956551074982,
      "learning_rate": 2.3906711098156654e-05,
      "loss": 0.5034,
      "step": 9713
    },
    {
      "epoch": 1.9969164353993216,
      "grad_norm": 0.19830164313316345,
      "learning_rate": 2.389788876000882e-05,
      "loss": 0.5055,
      "step": 9714
    },
    {
      "epoch": 1.9971220063727002,
      "grad_norm": 0.19704151153564453,
      "learning_rate": 2.3889067461463375e-05,
      "loss": 0.4994,
      "step": 9715
    },
    {
      "epoch": 1.9973275773460788,
      "grad_norm": 0.2041328102350235,
      "learning_rate": 2.3880247202954906e-05,
      "loss": 0.5322,
      "step": 9716
    },
    {
      "epoch": 1.9975331483194574,
      "grad_norm": 0.20206472277641296,
      "learning_rate": 2.387142798491792e-05,
      "loss": 0.5115,
      "step": 9717
    },
    {
      "epoch": 1.997738719292836,
      "grad_norm": 0.20135797560214996,
      "learning_rate": 2.386260980778695e-05,
      "loss": 0.5294,
      "step": 9718
    },
    {
      "epoch": 1.9979442902662146,
      "grad_norm": 0.19181190431118011,
      "learning_rate": 2.3853792671996394e-05,
      "loss": 0.5249,
      "step": 9719
    },
    {
      "epoch": 1.998149861239593,
      "grad_norm": 0.199905663728714,
      "learning_rate": 2.3844976577980637e-05,
      "loss": 0.5133,
      "step": 9720
    },
    {
      "epoch": 1.9983554322129715,
      "grad_norm": 0.19756287336349487,
      "learning_rate": 2.3836161526173998e-05,
      "loss": 0.491,
      "step": 9721
    },
    {
      "epoch": 1.99856100318635,
      "grad_norm": 0.16492635011672974,
      "learning_rate": 2.382734751701077e-05,
      "loss": 0.4839,
      "step": 9722
    },
    {
      "epoch": 1.9987665741597287,
      "grad_norm": 0.16064047813415527,
      "learning_rate": 2.3818534550925166e-05,
      "loss": 0.525,
      "step": 9723
    },
    {
      "epoch": 1.998972145133107,
      "grad_norm": 0.1621170938014984,
      "learning_rate": 2.3809722628351345e-05,
      "loss": 0.5041,
      "step": 9724
    },
    {
      "epoch": 1.9991777161064856,
      "grad_norm": 0.1653175801038742,
      "learning_rate": 2.3800911749723466e-05,
      "loss": 0.5125,
      "step": 9725
    },
    {
      "epoch": 1.9993832870798642,
      "grad_norm": 0.16732336580753326,
      "learning_rate": 2.3792101915475583e-05,
      "loss": 0.5047,
      "step": 9726
    },
    {
      "epoch": 1.9995888580532428,
      "grad_norm": 0.1221918985247612,
      "learning_rate": 2.378329312604171e-05,
      "loss": 0.5094,
      "step": 9727
    },
    {
      "epoch": 1.9997944290266214,
      "grad_norm": 0.15841197967529297,
      "learning_rate": 2.3774485381855812e-05,
      "loss": 0.5167,
      "step": 9728
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.17381541430950165,
      "learning_rate": 2.3765678683351824e-05,
      "loss": 0.5104,
      "step": 9729
    },
    {
      "epoch": 2.0002055709733786,
      "grad_norm": 0.35462313890457153,
      "learning_rate": 2.375687303096359e-05,
      "loss": 0.4014,
      "step": 9730
    },
    {
      "epoch": 2.000411141946757,
      "grad_norm": 0.3547631800174713,
      "learning_rate": 2.3748068425124914e-05,
      "loss": 0.4388,
      "step": 9731
    },
    {
      "epoch": 2.0006167129201358,
      "grad_norm": 0.28014928102493286,
      "learning_rate": 2.373926486626959e-05,
      "loss": 0.4034,
      "step": 9732
    },
    {
      "epoch": 2.0008222838935144,
      "grad_norm": 0.23848789930343628,
      "learning_rate": 2.3730462354831326e-05,
      "loss": 0.4079,
      "step": 9733
    },
    {
      "epoch": 2.001027854866893,
      "grad_norm": 0.22783653438091278,
      "learning_rate": 2.3721660891243738e-05,
      "loss": 0.4117,
      "step": 9734
    },
    {
      "epoch": 2.0012334258402715,
      "grad_norm": 0.27238190174102783,
      "learning_rate": 2.371286047594049e-05,
      "loss": 0.3957,
      "step": 9735
    },
    {
      "epoch": 2.00143899681365,
      "grad_norm": 0.35664230585098267,
      "learning_rate": 2.3704061109355107e-05,
      "loss": 0.4034,
      "step": 9736
    },
    {
      "epoch": 2.0016445677870283,
      "grad_norm": 0.3655121326446533,
      "learning_rate": 2.369526279192108e-05,
      "loss": 0.3992,
      "step": 9737
    },
    {
      "epoch": 2.001850138760407,
      "grad_norm": 0.31957703828811646,
      "learning_rate": 2.3686465524071887e-05,
      "loss": 0.4125,
      "step": 9738
    },
    {
      "epoch": 2.0020557097337854,
      "grad_norm": 0.2534150183200836,
      "learning_rate": 2.3677669306240927e-05,
      "loss": 0.3829,
      "step": 9739
    },
    {
      "epoch": 2.002261280707164,
      "grad_norm": 0.23875583708286285,
      "learning_rate": 2.3668874138861533e-05,
      "loss": 0.4053,
      "step": 9740
    },
    {
      "epoch": 2.0024668516805426,
      "grad_norm": 0.24184350669384003,
      "learning_rate": 2.366008002236702e-05,
      "loss": 0.4061,
      "step": 9741
    },
    {
      "epoch": 2.002672422653921,
      "grad_norm": 0.232225701212883,
      "learning_rate": 2.3651286957190612e-05,
      "loss": 0.4083,
      "step": 9742
    },
    {
      "epoch": 2.0028779936273,
      "grad_norm": 0.1986769735813141,
      "learning_rate": 2.3642494943765516e-05,
      "loss": 0.4456,
      "step": 9743
    },
    {
      "epoch": 2.0030835646006784,
      "grad_norm": 0.1628189980983734,
      "learning_rate": 2.363370398252485e-05,
      "loss": 0.4615,
      "step": 9744
    },
    {
      "epoch": 2.003289135574057,
      "grad_norm": 0.13968214392662048,
      "learning_rate": 2.362491407390174e-05,
      "loss": 0.4571,
      "step": 9745
    },
    {
      "epoch": 2.0034947065474356,
      "grad_norm": 0.2863624095916748,
      "learning_rate": 2.3616125218329208e-05,
      "loss": 0.3981,
      "step": 9746
    },
    {
      "epoch": 2.003700277520814,
      "grad_norm": 0.27160152792930603,
      "learning_rate": 2.360733741624024e-05,
      "loss": 0.3855,
      "step": 9747
    },
    {
      "epoch": 2.0039058484941927,
      "grad_norm": 0.24677185714244843,
      "learning_rate": 2.3598550668067765e-05,
      "loss": 0.373,
      "step": 9748
    },
    {
      "epoch": 2.0041114194675713,
      "grad_norm": 0.23863226175308228,
      "learning_rate": 2.358976497424467e-05,
      "loss": 0.4122,
      "step": 9749
    },
    {
      "epoch": 2.00431699044095,
      "grad_norm": 0.23597677052021027,
      "learning_rate": 2.3580980335203787e-05,
      "loss": 0.4114,
      "step": 9750
    },
    {
      "epoch": 2.0045225614143285,
      "grad_norm": 0.24519526958465576,
      "learning_rate": 2.357219675137787e-05,
      "loss": 0.4,
      "step": 9751
    },
    {
      "epoch": 2.0047281323877066,
      "grad_norm": 0.26484453678131104,
      "learning_rate": 2.356341422319968e-05,
      "loss": 0.4106,
      "step": 9752
    },
    {
      "epoch": 2.0049337033610852,
      "grad_norm": 0.2829241156578064,
      "learning_rate": 2.3554632751101882e-05,
      "loss": 0.4101,
      "step": 9753
    },
    {
      "epoch": 2.005139274334464,
      "grad_norm": 0.2894810438156128,
      "learning_rate": 2.354585233551709e-05,
      "loss": 0.4072,
      "step": 9754
    },
    {
      "epoch": 2.0053448453078424,
      "grad_norm": 0.26924699544906616,
      "learning_rate": 2.3537072976877862e-05,
      "loss": 0.3941,
      "step": 9755
    },
    {
      "epoch": 2.005550416281221,
      "grad_norm": 0.26175355911254883,
      "learning_rate": 2.352829467561675e-05,
      "loss": 0.4,
      "step": 9756
    },
    {
      "epoch": 2.0057559872545996,
      "grad_norm": 0.15815532207489014,
      "learning_rate": 2.3519517432166195e-05,
      "loss": 0.4488,
      "step": 9757
    },
    {
      "epoch": 2.005961558227978,
      "grad_norm": 0.1532447189092636,
      "learning_rate": 2.3510741246958602e-05,
      "loss": 0.4491,
      "step": 9758
    },
    {
      "epoch": 2.0061671292013568,
      "grad_norm": 0.2337024062871933,
      "learning_rate": 2.3501966120426364e-05,
      "loss": 0.4038,
      "step": 9759
    },
    {
      "epoch": 2.0063727001747353,
      "grad_norm": 0.2317887842655182,
      "learning_rate": 2.3493192053001774e-05,
      "loss": 0.4058,
      "step": 9760
    },
    {
      "epoch": 2.006578271148114,
      "grad_norm": 0.22884777188301086,
      "learning_rate": 2.3484419045117088e-05,
      "loss": 0.3987,
      "step": 9761
    },
    {
      "epoch": 2.0067838421214925,
      "grad_norm": 0.2271248698234558,
      "learning_rate": 2.3475647097204513e-05,
      "loss": 0.3916,
      "step": 9762
    },
    {
      "epoch": 2.006989413094871,
      "grad_norm": 0.2272649109363556,
      "learning_rate": 2.3466876209696204e-05,
      "loss": 0.4061,
      "step": 9763
    },
    {
      "epoch": 2.0071949840682497,
      "grad_norm": 0.22100196778774261,
      "learning_rate": 2.345810638302425e-05,
      "loss": 0.4063,
      "step": 9764
    },
    {
      "epoch": 2.0074005550416283,
      "grad_norm": 0.22727227210998535,
      "learning_rate": 2.3449337617620705e-05,
      "loss": 0.3931,
      "step": 9765
    },
    {
      "epoch": 2.007606126015007,
      "grad_norm": 0.24030756950378418,
      "learning_rate": 2.344056991391757e-05,
      "loss": 0.4071,
      "step": 9766
    },
    {
      "epoch": 2.007811696988385,
      "grad_norm": 0.2378872036933899,
      "learning_rate": 2.3431803272346795e-05,
      "loss": 0.4167,
      "step": 9767
    },
    {
      "epoch": 2.0080172679617636,
      "grad_norm": 0.23873169720172882,
      "learning_rate": 2.3423037693340263e-05,
      "loss": 0.4025,
      "step": 9768
    },
    {
      "epoch": 2.008222838935142,
      "grad_norm": 0.16406850516796112,
      "learning_rate": 2.341427317732981e-05,
      "loss": 0.4482,
      "step": 9769
    },
    {
      "epoch": 2.008428409908521,
      "grad_norm": 0.24554254114627838,
      "learning_rate": 2.340550972474723e-05,
      "loss": 0.4149,
      "step": 9770
    },
    {
      "epoch": 2.0086339808818994,
      "grad_norm": 0.24509701132774353,
      "learning_rate": 2.339674733602425e-05,
      "loss": 0.3931,
      "step": 9771
    },
    {
      "epoch": 2.008839551855278,
      "grad_norm": 0.2255314290523529,
      "learning_rate": 2.3387986011592542e-05,
      "loss": 0.4023,
      "step": 9772
    },
    {
      "epoch": 2.0090451228286565,
      "grad_norm": 0.22587113082408905,
      "learning_rate": 2.3379225751883768e-05,
      "loss": 0.403,
      "step": 9773
    },
    {
      "epoch": 2.009250693802035,
      "grad_norm": 0.13071568310260773,
      "learning_rate": 2.337046655732948e-05,
      "loss": 0.4701,
      "step": 9774
    },
    {
      "epoch": 2.0094562647754137,
      "grad_norm": 0.2212098240852356,
      "learning_rate": 2.336170842836121e-05,
      "loss": 0.394,
      "step": 9775
    },
    {
      "epoch": 2.0096618357487923,
      "grad_norm": 0.23073311150074005,
      "learning_rate": 2.3352951365410414e-05,
      "loss": 0.421,
      "step": 9776
    },
    {
      "epoch": 2.009867406722171,
      "grad_norm": 0.21537451446056366,
      "learning_rate": 2.334419536890854e-05,
      "loss": 0.3929,
      "step": 9777
    },
    {
      "epoch": 2.0100729776955495,
      "grad_norm": 0.21932470798492432,
      "learning_rate": 2.3335440439286943e-05,
      "loss": 0.3989,
      "step": 9778
    },
    {
      "epoch": 2.010278548668928,
      "grad_norm": 0.2174750566482544,
      "learning_rate": 2.332668657697692e-05,
      "loss": 0.3909,
      "step": 9779
    },
    {
      "epoch": 2.0104841196423067,
      "grad_norm": 0.21708469092845917,
      "learning_rate": 2.3317933782409764e-05,
      "loss": 0.3854,
      "step": 9780
    },
    {
      "epoch": 2.0106896906156853,
      "grad_norm": 0.22329485416412354,
      "learning_rate": 2.330918205601667e-05,
      "loss": 0.4068,
      "step": 9781
    },
    {
      "epoch": 2.010895261589064,
      "grad_norm": 0.22749973833560944,
      "learning_rate": 2.3300431398228786e-05,
      "loss": 0.4065,
      "step": 9782
    },
    {
      "epoch": 2.011100832562442,
      "grad_norm": 0.2266959398984909,
      "learning_rate": 2.3291681809477235e-05,
      "loss": 0.4044,
      "step": 9783
    },
    {
      "epoch": 2.0113064035358206,
      "grad_norm": 0.22487907111644745,
      "learning_rate": 2.3282933290193048e-05,
      "loss": 0.3902,
      "step": 9784
    },
    {
      "epoch": 2.011511974509199,
      "grad_norm": 0.22450290620326996,
      "learning_rate": 2.327418584080724e-05,
      "loss": 0.4144,
      "step": 9785
    },
    {
      "epoch": 2.0117175454825778,
      "grad_norm": 0.13316728174686432,
      "learning_rate": 2.3265439461750727e-05,
      "loss": 0.4369,
      "step": 9786
    },
    {
      "epoch": 2.0119231164559563,
      "grad_norm": 0.23068048059940338,
      "learning_rate": 2.3256694153454446e-05,
      "loss": 0.4071,
      "step": 9787
    },
    {
      "epoch": 2.012128687429335,
      "grad_norm": 0.22546036541461945,
      "learning_rate": 2.324794991634921e-05,
      "loss": 0.392,
      "step": 9788
    },
    {
      "epoch": 2.0123342584027135,
      "grad_norm": 0.2214207649230957,
      "learning_rate": 2.3239206750865813e-05,
      "loss": 0.3871,
      "step": 9789
    },
    {
      "epoch": 2.012539829376092,
      "grad_norm": 0.12996140122413635,
      "learning_rate": 2.3230464657434995e-05,
      "loss": 0.4446,
      "step": 9790
    },
    {
      "epoch": 2.0127454003494707,
      "grad_norm": 0.126758873462677,
      "learning_rate": 2.322172363648743e-05,
      "loss": 0.4344,
      "step": 9791
    },
    {
      "epoch": 2.0129509713228493,
      "grad_norm": 0.21626314520835876,
      "learning_rate": 2.3212983688453753e-05,
      "loss": 0.4197,
      "step": 9792
    },
    {
      "epoch": 2.013156542296228,
      "grad_norm": 0.11778894811868668,
      "learning_rate": 2.3204244813764516e-05,
      "loss": 0.4603,
      "step": 9793
    },
    {
      "epoch": 2.0133621132696065,
      "grad_norm": 0.13116705417633057,
      "learning_rate": 2.3195507012850284e-05,
      "loss": 0.4376,
      "step": 9794
    },
    {
      "epoch": 2.013567684242985,
      "grad_norm": 0.21736088395118713,
      "learning_rate": 2.3186770286141507e-05,
      "loss": 0.3973,
      "step": 9795
    },
    {
      "epoch": 2.0137732552163636,
      "grad_norm": 0.2278052121400833,
      "learning_rate": 2.31780346340686e-05,
      "loss": 0.4055,
      "step": 9796
    },
    {
      "epoch": 2.013978826189742,
      "grad_norm": 0.2270914614200592,
      "learning_rate": 2.3169300057061935e-05,
      "loss": 0.3941,
      "step": 9797
    },
    {
      "epoch": 2.0141843971631204,
      "grad_norm": 0.22449646890163422,
      "learning_rate": 2.31605665555518e-05,
      "loss": 0.3728,
      "step": 9798
    },
    {
      "epoch": 2.014389968136499,
      "grad_norm": 0.12993952631950378,
      "learning_rate": 2.3151834129968495e-05,
      "loss": 0.4542,
      "step": 9799
    },
    {
      "epoch": 2.0145955391098775,
      "grad_norm": 0.21774081885814667,
      "learning_rate": 2.3143102780742185e-05,
      "loss": 0.3867,
      "step": 9800
    },
    {
      "epoch": 2.014801110083256,
      "grad_norm": 0.13234397768974304,
      "learning_rate": 2.3134372508303055e-05,
      "loss": 0.4441,
      "step": 9801
    },
    {
      "epoch": 2.0150066810566347,
      "grad_norm": 0.22552914917469025,
      "learning_rate": 2.3125643313081194e-05,
      "loss": 0.3967,
      "step": 9802
    },
    {
      "epoch": 2.0152122520300133,
      "grad_norm": 0.22355657815933228,
      "learning_rate": 2.311691519550665e-05,
      "loss": 0.3994,
      "step": 9803
    },
    {
      "epoch": 2.015417823003392,
      "grad_norm": 0.22515852749347687,
      "learning_rate": 2.3108188156009412e-05,
      "loss": 0.3941,
      "step": 9804
    },
    {
      "epoch": 2.0156233939767705,
      "grad_norm": 0.2237560749053955,
      "learning_rate": 2.3099462195019416e-05,
      "loss": 0.4045,
      "step": 9805
    },
    {
      "epoch": 2.015828964950149,
      "grad_norm": 0.1298869103193283,
      "learning_rate": 2.309073731296656e-05,
      "loss": 0.4567,
      "step": 9806
    },
    {
      "epoch": 2.0160345359235277,
      "grad_norm": 0.22776378691196442,
      "learning_rate": 2.3082013510280656e-05,
      "loss": 0.4082,
      "step": 9807
    },
    {
      "epoch": 2.0162401068969062,
      "grad_norm": 0.22463855147361755,
      "learning_rate": 2.307329078739152e-05,
      "loss": 0.4022,
      "step": 9808
    },
    {
      "epoch": 2.016445677870285,
      "grad_norm": 0.22342638671398163,
      "learning_rate": 2.3064569144728855e-05,
      "loss": 0.4131,
      "step": 9809
    },
    {
      "epoch": 2.0166512488436634,
      "grad_norm": 0.22417156398296356,
      "learning_rate": 2.3055848582722352e-05,
      "loss": 0.3981,
      "step": 9810
    },
    {
      "epoch": 2.016856819817042,
      "grad_norm": 0.2322673499584198,
      "learning_rate": 2.3047129101801618e-05,
      "loss": 0.4035,
      "step": 9811
    },
    {
      "epoch": 2.0170623907904206,
      "grad_norm": 0.2153014838695526,
      "learning_rate": 2.303841070239622e-05,
      "loss": 0.3957,
      "step": 9812
    },
    {
      "epoch": 2.0172679617637987,
      "grad_norm": 0.22393642365932465,
      "learning_rate": 2.302969338493567e-05,
      "loss": 0.3947,
      "step": 9813
    },
    {
      "epoch": 2.0174735327371773,
      "grad_norm": 0.23003719747066498,
      "learning_rate": 2.302097714984945e-05,
      "loss": 0.3909,
      "step": 9814
    },
    {
      "epoch": 2.017679103710556,
      "grad_norm": 0.22402851283550262,
      "learning_rate": 2.301226199756696e-05,
      "loss": 0.3974,
      "step": 9815
    },
    {
      "epoch": 2.0178846746839345,
      "grad_norm": 0.2208302617073059,
      "learning_rate": 2.3003547928517547e-05,
      "loss": 0.3763,
      "step": 9816
    },
    {
      "epoch": 2.018090245657313,
      "grad_norm": 0.1260402798652649,
      "learning_rate": 2.299483494313052e-05,
      "loss": 0.4457,
      "step": 9817
    },
    {
      "epoch": 2.0182958166306917,
      "grad_norm": 0.226173534989357,
      "learning_rate": 2.298612304183512e-05,
      "loss": 0.4093,
      "step": 9818
    },
    {
      "epoch": 2.0185013876040703,
      "grad_norm": 0.12185024470090866,
      "learning_rate": 2.297741222506053e-05,
      "loss": 0.4517,
      "step": 9819
    },
    {
      "epoch": 2.018706958577449,
      "grad_norm": 0.2621656358242035,
      "learning_rate": 2.2968702493235923e-05,
      "loss": 0.4059,
      "step": 9820
    },
    {
      "epoch": 2.0189125295508275,
      "grad_norm": 0.2253510057926178,
      "learning_rate": 2.2959993846790372e-05,
      "loss": 0.4052,
      "step": 9821
    },
    {
      "epoch": 2.019118100524206,
      "grad_norm": 0.12481515854597092,
      "learning_rate": 2.2951286286152893e-05,
      "loss": 0.4528,
      "step": 9822
    },
    {
      "epoch": 2.0193236714975846,
      "grad_norm": 0.21684333682060242,
      "learning_rate": 2.2942579811752496e-05,
      "loss": 0.3872,
      "step": 9823
    },
    {
      "epoch": 2.019529242470963,
      "grad_norm": 0.13086971640586853,
      "learning_rate": 2.2933874424018093e-05,
      "loss": 0.4632,
      "step": 9824
    },
    {
      "epoch": 2.019734813444342,
      "grad_norm": 0.21728526055812836,
      "learning_rate": 2.292517012337857e-05,
      "loss": 0.3812,
      "step": 9825
    },
    {
      "epoch": 2.0199403844177204,
      "grad_norm": 0.23790940642356873,
      "learning_rate": 2.291646691026273e-05,
      "loss": 0.4011,
      "step": 9826
    },
    {
      "epoch": 2.020145955391099,
      "grad_norm": 0.12328074872493744,
      "learning_rate": 2.290776478509933e-05,
      "loss": 0.4421,
      "step": 9827
    },
    {
      "epoch": 2.020351526364477,
      "grad_norm": 0.23319554328918457,
      "learning_rate": 2.2899063748317123e-05,
      "loss": 0.3795,
      "step": 9828
    },
    {
      "epoch": 2.0205570973378557,
      "grad_norm": 0.21926866471767426,
      "learning_rate": 2.2890363800344744e-05,
      "loss": 0.3943,
      "step": 9829
    },
    {
      "epoch": 2.0207626683112343,
      "grad_norm": 0.2243729531764984,
      "learning_rate": 2.2881664941610796e-05,
      "loss": 0.4123,
      "step": 9830
    },
    {
      "epoch": 2.020968239284613,
      "grad_norm": 0.12581419944763184,
      "learning_rate": 2.2872967172543843e-05,
      "loss": 0.4629,
      "step": 9831
    },
    {
      "epoch": 2.0211738102579915,
      "grad_norm": 0.12486526370048523,
      "learning_rate": 2.286427049357237e-05,
      "loss": 0.4541,
      "step": 9832
    },
    {
      "epoch": 2.02137938123137,
      "grad_norm": 0.2228085696697235,
      "learning_rate": 2.2855574905124826e-05,
      "loss": 0.4007,
      "step": 9833
    },
    {
      "epoch": 2.0215849522047487,
      "grad_norm": 0.1320047229528427,
      "learning_rate": 2.284688040762959e-05,
      "loss": 0.4513,
      "step": 9834
    },
    {
      "epoch": 2.0217905231781272,
      "grad_norm": 0.22697174549102783,
      "learning_rate": 2.283818700151503e-05,
      "loss": 0.3806,
      "step": 9835
    },
    {
      "epoch": 2.021996094151506,
      "grad_norm": 0.12552069127559662,
      "learning_rate": 2.2829494687209413e-05,
      "loss": 0.4545,
      "step": 9836
    },
    {
      "epoch": 2.0222016651248844,
      "grad_norm": 0.22603359818458557,
      "learning_rate": 2.282080346514097e-05,
      "loss": 0.3866,
      "step": 9837
    },
    {
      "epoch": 2.022407236098263,
      "grad_norm": 0.22030943632125854,
      "learning_rate": 2.2812113335737867e-05,
      "loss": 0.3983,
      "step": 9838
    },
    {
      "epoch": 2.0226128070716416,
      "grad_norm": 0.23014822602272034,
      "learning_rate": 2.280342429942824e-05,
      "loss": 0.4008,
      "step": 9839
    },
    {
      "epoch": 2.02281837804502,
      "grad_norm": 0.2164926677942276,
      "learning_rate": 2.279473635664013e-05,
      "loss": 0.4113,
      "step": 9840
    },
    {
      "epoch": 2.0230239490183988,
      "grad_norm": 0.23505493998527527,
      "learning_rate": 2.2786049507801594e-05,
      "loss": 0.4073,
      "step": 9841
    },
    {
      "epoch": 2.0232295199917774,
      "grad_norm": 0.21695363521575928,
      "learning_rate": 2.277736375334057e-05,
      "loss": 0.3937,
      "step": 9842
    },
    {
      "epoch": 2.0234350909651555,
      "grad_norm": 0.21634046733379364,
      "learning_rate": 2.2768679093684948e-05,
      "loss": 0.4001,
      "step": 9843
    },
    {
      "epoch": 2.023640661938534,
      "grad_norm": 0.22589579224586487,
      "learning_rate": 2.2759995529262617e-05,
      "loss": 0.3816,
      "step": 9844
    },
    {
      "epoch": 2.0238462329119127,
      "grad_norm": 0.22080455720424652,
      "learning_rate": 2.2751313060501353e-05,
      "loss": 0.3994,
      "step": 9845
    },
    {
      "epoch": 2.0240518038852913,
      "grad_norm": 0.23890239000320435,
      "learning_rate": 2.2742631687828906e-05,
      "loss": 0.4072,
      "step": 9846
    },
    {
      "epoch": 2.02425737485867,
      "grad_norm": 0.2339673787355423,
      "learning_rate": 2.2733951411672963e-05,
      "loss": 0.4084,
      "step": 9847
    },
    {
      "epoch": 2.0244629458320484,
      "grad_norm": 0.22778230905532837,
      "learning_rate": 2.272527223246115e-05,
      "loss": 0.3973,
      "step": 9848
    },
    {
      "epoch": 2.024668516805427,
      "grad_norm": 0.22321897745132446,
      "learning_rate": 2.271659415062108e-05,
      "loss": 0.4052,
      "step": 9849
    },
    {
      "epoch": 2.0248740877788056,
      "grad_norm": 0.13747207820415497,
      "learning_rate": 2.270791716658026e-05,
      "loss": 0.4596,
      "step": 9850
    },
    {
      "epoch": 2.025079658752184,
      "grad_norm": 0.22815275192260742,
      "learning_rate": 2.2699241280766174e-05,
      "loss": 0.3894,
      "step": 9851
    },
    {
      "epoch": 2.025285229725563,
      "grad_norm": 0.219502255320549,
      "learning_rate": 2.269056649360623e-05,
      "loss": 0.3969,
      "step": 9852
    },
    {
      "epoch": 2.0254908006989414,
      "grad_norm": 0.229275181889534,
      "learning_rate": 2.26818928055278e-05,
      "loss": 0.4055,
      "step": 9853
    },
    {
      "epoch": 2.02569637167232,
      "grad_norm": 0.21822713315486908,
      "learning_rate": 2.2673220216958206e-05,
      "loss": 0.3896,
      "step": 9854
    },
    {
      "epoch": 2.0259019426456986,
      "grad_norm": 0.218753844499588,
      "learning_rate": 2.266454872832467e-05,
      "loss": 0.3951,
      "step": 9855
    },
    {
      "epoch": 2.026107513619077,
      "grad_norm": 0.2237304002046585,
      "learning_rate": 2.2655878340054446e-05,
      "loss": 0.4035,
      "step": 9856
    },
    {
      "epoch": 2.0263130845924557,
      "grad_norm": 0.2183140218257904,
      "learning_rate": 2.2647209052574658e-05,
      "loss": 0.3968,
      "step": 9857
    },
    {
      "epoch": 2.026518655565834,
      "grad_norm": 0.22163569927215576,
      "learning_rate": 2.26385408663124e-05,
      "loss": 0.3805,
      "step": 9858
    },
    {
      "epoch": 2.0267242265392125,
      "grad_norm": 0.22751082479953766,
      "learning_rate": 2.2629873781694717e-05,
      "loss": 0.3994,
      "step": 9859
    },
    {
      "epoch": 2.026929797512591,
      "grad_norm": 0.21998751163482666,
      "learning_rate": 2.2621207799148598e-05,
      "loss": 0.3864,
      "step": 9860
    },
    {
      "epoch": 2.0271353684859696,
      "grad_norm": 0.1337684839963913,
      "learning_rate": 2.2612542919100973e-05,
      "loss": 0.444,
      "step": 9861
    },
    {
      "epoch": 2.0273409394593482,
      "grad_norm": 0.23163475096225739,
      "learning_rate": 2.2603879141978702e-05,
      "loss": 0.4133,
      "step": 9862
    },
    {
      "epoch": 2.027546510432727,
      "grad_norm": 0.1254424899816513,
      "learning_rate": 2.2595216468208643e-05,
      "loss": 0.4527,
      "step": 9863
    },
    {
      "epoch": 2.0277520814061054,
      "grad_norm": 0.23382841050624847,
      "learning_rate": 2.258655489821753e-05,
      "loss": 0.4075,
      "step": 9864
    },
    {
      "epoch": 2.027957652379484,
      "grad_norm": 0.2241084724664688,
      "learning_rate": 2.2577894432432115e-05,
      "loss": 0.4089,
      "step": 9865
    },
    {
      "epoch": 2.0281632233528626,
      "grad_norm": 0.12018263339996338,
      "learning_rate": 2.2569235071279042e-05,
      "loss": 0.4616,
      "step": 9866
    },
    {
      "epoch": 2.028368794326241,
      "grad_norm": 0.21912699937820435,
      "learning_rate": 2.256057681518491e-05,
      "loss": 0.4057,
      "step": 9867
    },
    {
      "epoch": 2.0285743652996198,
      "grad_norm": 0.12558940052986145,
      "learning_rate": 2.255191966457629e-05,
      "loss": 0.437,
      "step": 9868
    },
    {
      "epoch": 2.0287799362729984,
      "grad_norm": 0.22069305181503296,
      "learning_rate": 2.254326361987964e-05,
      "loss": 0.3903,
      "step": 9869
    },
    {
      "epoch": 2.028985507246377,
      "grad_norm": 0.12789428234100342,
      "learning_rate": 2.2534608681521443e-05,
      "loss": 0.4686,
      "step": 9870
    },
    {
      "epoch": 2.0291910782197555,
      "grad_norm": 0.22064268589019775,
      "learning_rate": 2.252595484992808e-05,
      "loss": 0.3867,
      "step": 9871
    },
    {
      "epoch": 2.029396649193134,
      "grad_norm": 0.1297440379858017,
      "learning_rate": 2.251730212552587e-05,
      "loss": 0.471,
      "step": 9872
    },
    {
      "epoch": 2.0296022201665127,
      "grad_norm": 0.227555051445961,
      "learning_rate": 2.2508650508741107e-05,
      "loss": 0.4138,
      "step": 9873
    },
    {
      "epoch": 2.029807791139891,
      "grad_norm": 0.2229832112789154,
      "learning_rate": 2.250000000000001e-05,
      "loss": 0.3846,
      "step": 9874
    },
    {
      "epoch": 2.0300133621132694,
      "grad_norm": 0.12331897765398026,
      "learning_rate": 2.2491350599728745e-05,
      "loss": 0.4309,
      "step": 9875
    },
    {
      "epoch": 2.030218933086648,
      "grad_norm": 0.12525731325149536,
      "learning_rate": 2.2482702308353416e-05,
      "loss": 0.4642,
      "step": 9876
    },
    {
      "epoch": 2.0304245040600266,
      "grad_norm": 0.22697319090366364,
      "learning_rate": 2.2474055126300116e-05,
      "loss": 0.3967,
      "step": 9877
    },
    {
      "epoch": 2.030630075033405,
      "grad_norm": 0.21771733462810516,
      "learning_rate": 2.2465409053994835e-05,
      "loss": 0.396,
      "step": 9878
    },
    {
      "epoch": 2.030835646006784,
      "grad_norm": 0.21557028591632843,
      "learning_rate": 2.2456764091863518e-05,
      "loss": 0.3904,
      "step": 9879
    },
    {
      "epoch": 2.0310412169801624,
      "grad_norm": 0.22535440325737,
      "learning_rate": 2.244812024033207e-05,
      "loss": 0.4019,
      "step": 9880
    },
    {
      "epoch": 2.031246787953541,
      "grad_norm": 0.22445163130760193,
      "learning_rate": 2.243947749982633e-05,
      "loss": 0.3986,
      "step": 9881
    },
    {
      "epoch": 2.0314523589269196,
      "grad_norm": 0.21911373734474182,
      "learning_rate": 2.243083587077209e-05,
      "loss": 0.3931,
      "step": 9882
    },
    {
      "epoch": 2.031657929900298,
      "grad_norm": 0.21471014618873596,
      "learning_rate": 2.2422195353595056e-05,
      "loss": 0.3839,
      "step": 9883
    },
    {
      "epoch": 2.0318635008736767,
      "grad_norm": 0.2156352996826172,
      "learning_rate": 2.2413555948720952e-05,
      "loss": 0.3843,
      "step": 9884
    },
    {
      "epoch": 2.0320690718470553,
      "grad_norm": 0.22156722843647003,
      "learning_rate": 2.240491765657537e-05,
      "loss": 0.4147,
      "step": 9885
    },
    {
      "epoch": 2.032274642820434,
      "grad_norm": 0.22945941984653473,
      "learning_rate": 2.2396280477583874e-05,
      "loss": 0.4038,
      "step": 9886
    },
    {
      "epoch": 2.0324802137938125,
      "grad_norm": 0.217056542634964,
      "learning_rate": 2.2387644412172005e-05,
      "loss": 0.3978,
      "step": 9887
    },
    {
      "epoch": 2.032685784767191,
      "grad_norm": 0.22490544617176056,
      "learning_rate": 2.2379009460765203e-05,
      "loss": 0.3874,
      "step": 9888
    },
    {
      "epoch": 2.0328913557405692,
      "grad_norm": 0.224374920129776,
      "learning_rate": 2.2370375623788862e-05,
      "loss": 0.4149,
      "step": 9889
    },
    {
      "epoch": 2.033096926713948,
      "grad_norm": 0.13248522579669952,
      "learning_rate": 2.236174290166836e-05,
      "loss": 0.4294,
      "step": 9890
    },
    {
      "epoch": 2.0333024976873264,
      "grad_norm": 0.23234902322292328,
      "learning_rate": 2.235311129482897e-05,
      "loss": 0.395,
      "step": 9891
    },
    {
      "epoch": 2.033508068660705,
      "grad_norm": 0.2269185483455658,
      "learning_rate": 2.234448080369594e-05,
      "loss": 0.3915,
      "step": 9892
    },
    {
      "epoch": 2.0337136396340836,
      "grad_norm": 0.22412073612213135,
      "learning_rate": 2.2335851428694447e-05,
      "loss": 0.3766,
      "step": 9893
    },
    {
      "epoch": 2.033919210607462,
      "grad_norm": 0.22921979427337646,
      "learning_rate": 2.2327223170249626e-05,
      "loss": 0.4075,
      "step": 9894
    },
    {
      "epoch": 2.0341247815808408,
      "grad_norm": 0.12206049263477325,
      "learning_rate": 2.2318596028786543e-05,
      "loss": 0.4533,
      "step": 9895
    },
    {
      "epoch": 2.0343303525542193,
      "grad_norm": 0.22003917396068573,
      "learning_rate": 2.2309970004730204e-05,
      "loss": 0.3874,
      "step": 9896
    },
    {
      "epoch": 2.034535923527598,
      "grad_norm": 0.22223718464374542,
      "learning_rate": 2.2301345098505608e-05,
      "loss": 0.4057,
      "step": 9897
    },
    {
      "epoch": 2.0347414945009765,
      "grad_norm": 0.2259814292192459,
      "learning_rate": 2.2292721310537645e-05,
      "loss": 0.3888,
      "step": 9898
    },
    {
      "epoch": 2.034947065474355,
      "grad_norm": 0.21883010864257812,
      "learning_rate": 2.2284098641251172e-05,
      "loss": 0.4222,
      "step": 9899
    },
    {
      "epoch": 2.0351526364477337,
      "grad_norm": 0.12656092643737793,
      "learning_rate": 2.227547709107098e-05,
      "loss": 0.4542,
      "step": 9900
    },
    {
      "epoch": 2.0353582074211123,
      "grad_norm": 0.22307392954826355,
      "learning_rate": 2.2266856660421823e-05,
      "loss": 0.4201,
      "step": 9901
    },
    {
      "epoch": 2.035563778394491,
      "grad_norm": 0.2214750051498413,
      "learning_rate": 2.2258237349728382e-05,
      "loss": 0.3773,
      "step": 9902
    },
    {
      "epoch": 2.0357693493678695,
      "grad_norm": 0.22282758355140686,
      "learning_rate": 2.2249619159415273e-05,
      "loss": 0.4047,
      "step": 9903
    },
    {
      "epoch": 2.0359749203412476,
      "grad_norm": 0.12212900072336197,
      "learning_rate": 2.2241002089907114e-05,
      "loss": 0.4447,
      "step": 9904
    },
    {
      "epoch": 2.036180491314626,
      "grad_norm": 0.2368995100259781,
      "learning_rate": 2.2232386141628407e-05,
      "loss": 0.3965,
      "step": 9905
    },
    {
      "epoch": 2.036386062288005,
      "grad_norm": 0.12619584798812866,
      "learning_rate": 2.222377131500361e-05,
      "loss": 0.453,
      "step": 9906
    },
    {
      "epoch": 2.0365916332613834,
      "grad_norm": 0.1249145120382309,
      "learning_rate": 2.221515761045714e-05,
      "loss": 0.4397,
      "step": 9907
    },
    {
      "epoch": 2.036797204234762,
      "grad_norm": 0.22991523146629333,
      "learning_rate": 2.220654502841337e-05,
      "loss": 0.3909,
      "step": 9908
    },
    {
      "epoch": 2.0370027752081405,
      "grad_norm": 0.2213556170463562,
      "learning_rate": 2.2197933569296587e-05,
      "loss": 0.3813,
      "step": 9909
    },
    {
      "epoch": 2.037208346181519,
      "grad_norm": 0.24218927323818207,
      "learning_rate": 2.218932323353103e-05,
      "loss": 0.4047,
      "step": 9910
    },
    {
      "epoch": 2.0374139171548977,
      "grad_norm": 0.21407100558280945,
      "learning_rate": 2.2180714021540913e-05,
      "loss": 0.3848,
      "step": 9911
    },
    {
      "epoch": 2.0376194881282763,
      "grad_norm": 0.12527808547019958,
      "learning_rate": 2.217210593375036e-05,
      "loss": 0.4478,
      "step": 9912
    },
    {
      "epoch": 2.037825059101655,
      "grad_norm": 0.22188331186771393,
      "learning_rate": 2.216349897058345e-05,
      "loss": 0.3957,
      "step": 9913
    },
    {
      "epoch": 2.0380306300750335,
      "grad_norm": 0.23192797601222992,
      "learning_rate": 2.2154893132464207e-05,
      "loss": 0.3889,
      "step": 9914
    },
    {
      "epoch": 2.038236201048412,
      "grad_norm": 0.2198922336101532,
      "learning_rate": 2.21462884198166e-05,
      "loss": 0.3865,
      "step": 9915
    },
    {
      "epoch": 2.0384417720217907,
      "grad_norm": 0.1324300318956375,
      "learning_rate": 2.213768483306455e-05,
      "loss": 0.4286,
      "step": 9916
    },
    {
      "epoch": 2.0386473429951693,
      "grad_norm": 0.22883708775043488,
      "learning_rate": 2.212908237263189e-05,
      "loss": 0.3945,
      "step": 9917
    },
    {
      "epoch": 2.038852913968548,
      "grad_norm": 0.2269202619791031,
      "learning_rate": 2.212048103894246e-05,
      "loss": 0.4058,
      "step": 9918
    },
    {
      "epoch": 2.039058484941926,
      "grad_norm": 0.23420077562332153,
      "learning_rate": 2.2111880832419995e-05,
      "loss": 0.4064,
      "step": 9919
    },
    {
      "epoch": 2.0392640559153046,
      "grad_norm": 0.12553973495960236,
      "learning_rate": 2.210328175348818e-05,
      "loss": 0.4317,
      "step": 9920
    },
    {
      "epoch": 2.039469626888683,
      "grad_norm": 0.22346656024456024,
      "learning_rate": 2.209468380257065e-05,
      "loss": 0.3767,
      "step": 9921
    },
    {
      "epoch": 2.0396751978620618,
      "grad_norm": 0.2343178391456604,
      "learning_rate": 2.208608698009099e-05,
      "loss": 0.3972,
      "step": 9922
    },
    {
      "epoch": 2.0398807688354403,
      "grad_norm": 0.21974226832389832,
      "learning_rate": 2.207749128647273e-05,
      "loss": 0.3937,
      "step": 9923
    },
    {
      "epoch": 2.040086339808819,
      "grad_norm": 0.12265095114707947,
      "learning_rate": 2.206889672213932e-05,
      "loss": 0.4691,
      "step": 9924
    },
    {
      "epoch": 2.0402919107821975,
      "grad_norm": 0.13189628720283508,
      "learning_rate": 2.2060303287514198e-05,
      "loss": 0.4569,
      "step": 9925
    },
    {
      "epoch": 2.040497481755576,
      "grad_norm": 0.22592967748641968,
      "learning_rate": 2.2051710983020714e-05,
      "loss": 0.411,
      "step": 9926
    },
    {
      "epoch": 2.0407030527289547,
      "grad_norm": 0.22591936588287354,
      "learning_rate": 2.2043119809082176e-05,
      "loss": 0.4061,
      "step": 9927
    },
    {
      "epoch": 2.0409086237023333,
      "grad_norm": 0.22242794930934906,
      "learning_rate": 2.2034529766121802e-05,
      "loss": 0.398,
      "step": 9928
    },
    {
      "epoch": 2.041114194675712,
      "grad_norm": 0.21335627138614655,
      "learning_rate": 2.2025940854562824e-05,
      "loss": 0.4109,
      "step": 9929
    },
    {
      "epoch": 2.0413197656490905,
      "grad_norm": 0.2250111699104309,
      "learning_rate": 2.2017353074828363e-05,
      "loss": 0.3865,
      "step": 9930
    },
    {
      "epoch": 2.041525336622469,
      "grad_norm": 0.2197580337524414,
      "learning_rate": 2.2008766427341477e-05,
      "loss": 0.379,
      "step": 9931
    },
    {
      "epoch": 2.0417309075958476,
      "grad_norm": 0.23078951239585876,
      "learning_rate": 2.2000180912525225e-05,
      "loss": 0.4046,
      "step": 9932
    },
    {
      "epoch": 2.041936478569226,
      "grad_norm": 0.22051231563091278,
      "learning_rate": 2.1991596530802558e-05,
      "loss": 0.3925,
      "step": 9933
    },
    {
      "epoch": 2.042142049542605,
      "grad_norm": 0.23026688396930695,
      "learning_rate": 2.198301328259639e-05,
      "loss": 0.3941,
      "step": 9934
    },
    {
      "epoch": 2.042347620515983,
      "grad_norm": 0.23431305587291718,
      "learning_rate": 2.197443116832958e-05,
      "loss": 0.3928,
      "step": 9935
    },
    {
      "epoch": 2.0425531914893615,
      "grad_norm": 0.22884812951087952,
      "learning_rate": 2.1965850188424914e-05,
      "loss": 0.3915,
      "step": 9936
    },
    {
      "epoch": 2.04275876246274,
      "grad_norm": 0.22704505920410156,
      "learning_rate": 2.195727034330516e-05,
      "loss": 0.4077,
      "step": 9937
    },
    {
      "epoch": 2.0429643334361187,
      "grad_norm": 0.13225297629833221,
      "learning_rate": 2.194869163339297e-05,
      "loss": 0.4588,
      "step": 9938
    },
    {
      "epoch": 2.0431699044094973,
      "grad_norm": 0.220863476395607,
      "learning_rate": 2.194011405911102e-05,
      "loss": 0.3732,
      "step": 9939
    },
    {
      "epoch": 2.043375475382876,
      "grad_norm": 0.22376231849193573,
      "learning_rate": 2.193153762088187e-05,
      "loss": 0.4105,
      "step": 9940
    },
    {
      "epoch": 2.0435810463562545,
      "grad_norm": 0.22367540001869202,
      "learning_rate": 2.192296231912804e-05,
      "loss": 0.393,
      "step": 9941
    },
    {
      "epoch": 2.043786617329633,
      "grad_norm": 0.22071625292301178,
      "learning_rate": 2.1914388154271993e-05,
      "loss": 0.3973,
      "step": 9942
    },
    {
      "epoch": 2.0439921883030117,
      "grad_norm": 0.22081826627254486,
      "learning_rate": 2.1905815126736143e-05,
      "loss": 0.4125,
      "step": 9943
    },
    {
      "epoch": 2.0441977592763902,
      "grad_norm": 0.22600281238555908,
      "learning_rate": 2.1897243236942836e-05,
      "loss": 0.3986,
      "step": 9944
    },
    {
      "epoch": 2.044403330249769,
      "grad_norm": 0.2240431308746338,
      "learning_rate": 2.1888672485314357e-05,
      "loss": 0.4019,
      "step": 9945
    },
    {
      "epoch": 2.0446089012231474,
      "grad_norm": 0.22377148270606995,
      "learning_rate": 2.188010287227298e-05,
      "loss": 0.4098,
      "step": 9946
    },
    {
      "epoch": 2.044814472196526,
      "grad_norm": 0.2262306958436966,
      "learning_rate": 2.1871534398240877e-05,
      "loss": 0.3999,
      "step": 9947
    },
    {
      "epoch": 2.0450200431699046,
      "grad_norm": 0.22286969423294067,
      "learning_rate": 2.1862967063640164e-05,
      "loss": 0.3974,
      "step": 9948
    },
    {
      "epoch": 2.045225614143283,
      "grad_norm": 0.1264716535806656,
      "learning_rate": 2.1854400868892905e-05,
      "loss": 0.4572,
      "step": 9949
    },
    {
      "epoch": 2.0454311851166613,
      "grad_norm": 0.22342973947525024,
      "learning_rate": 2.1845835814421155e-05,
      "loss": 0.3999,
      "step": 9950
    },
    {
      "epoch": 2.04563675609004,
      "grad_norm": 0.22479073703289032,
      "learning_rate": 2.1837271900646852e-05,
      "loss": 0.3997,
      "step": 9951
    },
    {
      "epoch": 2.0458423270634185,
      "grad_norm": 0.22151948511600494,
      "learning_rate": 2.1828709127991884e-05,
      "loss": 0.3914,
      "step": 9952
    },
    {
      "epoch": 2.046047898036797,
      "grad_norm": 0.1296972632408142,
      "learning_rate": 2.1820147496878126e-05,
      "loss": 0.4305,
      "step": 9953
    },
    {
      "epoch": 2.0462534690101757,
      "grad_norm": 0.25065821409225464,
      "learning_rate": 2.181158700772736e-05,
      "loss": 0.3911,
      "step": 9954
    },
    {
      "epoch": 2.0464590399835543,
      "grad_norm": 0.2304956465959549,
      "learning_rate": 2.180302766096132e-05,
      "loss": 0.3961,
      "step": 9955
    },
    {
      "epoch": 2.046664610956933,
      "grad_norm": 0.22731968760490417,
      "learning_rate": 2.179446945700169e-05,
      "loss": 0.3846,
      "step": 9956
    },
    {
      "epoch": 2.0468701819303114,
      "grad_norm": 0.23249146342277527,
      "learning_rate": 2.1785912396270084e-05,
      "loss": 0.4109,
      "step": 9957
    },
    {
      "epoch": 2.04707575290369,
      "grad_norm": 0.22886785864830017,
      "learning_rate": 2.177735647918807e-05,
      "loss": 0.3894,
      "step": 9958
    },
    {
      "epoch": 2.0472813238770686,
      "grad_norm": 0.22079876065254211,
      "learning_rate": 2.176880170617715e-05,
      "loss": 0.4036,
      "step": 9959
    },
    {
      "epoch": 2.047486894850447,
      "grad_norm": 0.21782319247722626,
      "learning_rate": 2.1760248077658796e-05,
      "loss": 0.3954,
      "step": 9960
    },
    {
      "epoch": 2.047692465823826,
      "grad_norm": 0.22487705945968628,
      "learning_rate": 2.1751695594054398e-05,
      "loss": 0.4007,
      "step": 9961
    },
    {
      "epoch": 2.0478980367972044,
      "grad_norm": 0.22865137457847595,
      "learning_rate": 2.1743144255785294e-05,
      "loss": 0.3998,
      "step": 9962
    },
    {
      "epoch": 2.048103607770583,
      "grad_norm": 0.2298915535211563,
      "learning_rate": 2.173459406327278e-05,
      "loss": 0.4107,
      "step": 9963
    },
    {
      "epoch": 2.0483091787439616,
      "grad_norm": 0.2230944037437439,
      "learning_rate": 2.1726045016938065e-05,
      "loss": 0.3866,
      "step": 9964
    },
    {
      "epoch": 2.0485147497173397,
      "grad_norm": 0.23378700017929077,
      "learning_rate": 2.1717497117202314e-05,
      "loss": 0.4049,
      "step": 9965
    },
    {
      "epoch": 2.0487203206907183,
      "grad_norm": 0.22423069179058075,
      "learning_rate": 2.170895036448668e-05,
      "loss": 0.3989,
      "step": 9966
    },
    {
      "epoch": 2.048925891664097,
      "grad_norm": 0.2279648631811142,
      "learning_rate": 2.17004047592122e-05,
      "loss": 0.4052,
      "step": 9967
    },
    {
      "epoch": 2.0491314626374755,
      "grad_norm": 0.2262582629919052,
      "learning_rate": 2.1691860301799867e-05,
      "loss": 0.391,
      "step": 9968
    },
    {
      "epoch": 2.049337033610854,
      "grad_norm": 0.2182939350605011,
      "learning_rate": 2.1683316992670644e-05,
      "loss": 0.3879,
      "step": 9969
    },
    {
      "epoch": 2.0495426045842327,
      "grad_norm": 0.21680088341236115,
      "learning_rate": 2.1674774832245406e-05,
      "loss": 0.3804,
      "step": 9970
    },
    {
      "epoch": 2.0497481755576112,
      "grad_norm": 0.22588318586349487,
      "learning_rate": 2.166623382094497e-05,
      "loss": 0.4107,
      "step": 9971
    },
    {
      "epoch": 2.04995374653099,
      "grad_norm": 0.22498705983161926,
      "learning_rate": 2.165769395919015e-05,
      "loss": 0.3904,
      "step": 9972
    },
    {
      "epoch": 2.0501593175043684,
      "grad_norm": 0.1259543001651764,
      "learning_rate": 2.1649155247401637e-05,
      "loss": 0.4644,
      "step": 9973
    },
    {
      "epoch": 2.050364888477747,
      "grad_norm": 0.22000350058078766,
      "learning_rate": 2.1640617686000116e-05,
      "loss": 0.3917,
      "step": 9974
    },
    {
      "epoch": 2.0505704594511256,
      "grad_norm": 0.23319876194000244,
      "learning_rate": 2.163208127540618e-05,
      "loss": 0.4,
      "step": 9975
    },
    {
      "epoch": 2.050776030424504,
      "grad_norm": 0.22796432673931122,
      "learning_rate": 2.1623546016040378e-05,
      "loss": 0.4044,
      "step": 9976
    },
    {
      "epoch": 2.0509816013978828,
      "grad_norm": 0.2386104017496109,
      "learning_rate": 2.16150119083232e-05,
      "loss": 0.4046,
      "step": 9977
    },
    {
      "epoch": 2.0511871723712614,
      "grad_norm": 0.22699424624443054,
      "learning_rate": 2.160647895267509e-05,
      "loss": 0.3846,
      "step": 9978
    },
    {
      "epoch": 2.05139274334464,
      "grad_norm": 0.22776249051094055,
      "learning_rate": 2.1597947149516403e-05,
      "loss": 0.4042,
      "step": 9979
    },
    {
      "epoch": 2.051598314318018,
      "grad_norm": 0.22444364428520203,
      "learning_rate": 2.1589416499267495e-05,
      "loss": 0.4076,
      "step": 9980
    },
    {
      "epoch": 2.0518038852913967,
      "grad_norm": 0.21514415740966797,
      "learning_rate": 2.158088700234861e-05,
      "loss": 0.391,
      "step": 9981
    },
    {
      "epoch": 2.0520094562647753,
      "grad_norm": 0.12512782216072083,
      "learning_rate": 2.1572358659179968e-05,
      "loss": 0.4546,
      "step": 9982
    },
    {
      "epoch": 2.052215027238154,
      "grad_norm": 0.217271625995636,
      "learning_rate": 2.1563831470181714e-05,
      "loss": 0.392,
      "step": 9983
    },
    {
      "epoch": 2.0524205982115324,
      "grad_norm": 0.12956684827804565,
      "learning_rate": 2.155530543577394e-05,
      "loss": 0.4561,
      "step": 9984
    },
    {
      "epoch": 2.052626169184911,
      "grad_norm": 0.2247815728187561,
      "learning_rate": 2.1546780556376692e-05,
      "loss": 0.401,
      "step": 9985
    },
    {
      "epoch": 2.0528317401582896,
      "grad_norm": 0.22784893214702606,
      "learning_rate": 2.1538256832409923e-05,
      "loss": 0.3878,
      "step": 9986
    },
    {
      "epoch": 2.053037311131668,
      "grad_norm": 0.22039231657981873,
      "learning_rate": 2.1529734264293597e-05,
      "loss": 0.4089,
      "step": 9987
    },
    {
      "epoch": 2.053242882105047,
      "grad_norm": 0.22087042033672333,
      "learning_rate": 2.152121285244757e-05,
      "loss": 0.4153,
      "step": 9988
    },
    {
      "epoch": 2.0534484530784254,
      "grad_norm": 0.15735208988189697,
      "learning_rate": 2.1512692597291642e-05,
      "loss": 0.4635,
      "step": 9989
    },
    {
      "epoch": 2.053654024051804,
      "grad_norm": 0.22379711270332336,
      "learning_rate": 2.1504173499245572e-05,
      "loss": 0.4056,
      "step": 9990
    },
    {
      "epoch": 2.0538595950251826,
      "grad_norm": 0.22105872631072998,
      "learning_rate": 2.1495655558729053e-05,
      "loss": 0.407,
      "step": 9991
    },
    {
      "epoch": 2.054065165998561,
      "grad_norm": 0.2312091439962387,
      "learning_rate": 2.1487138776161708e-05,
      "loss": 0.3885,
      "step": 9992
    },
    {
      "epoch": 2.0542707369719397,
      "grad_norm": 0.22999829053878784,
      "learning_rate": 2.1478623151963156e-05,
      "loss": 0.3916,
      "step": 9993
    },
    {
      "epoch": 2.0544763079453183,
      "grad_norm": 0.2265433371067047,
      "learning_rate": 2.14701086865529e-05,
      "loss": 0.3997,
      "step": 9994
    },
    {
      "epoch": 2.0546818789186965,
      "grad_norm": 0.21633121371269226,
      "learning_rate": 2.1461595380350395e-05,
      "loss": 0.3746,
      "step": 9995
    },
    {
      "epoch": 2.054887449892075,
      "grad_norm": 0.22249945998191833,
      "learning_rate": 2.1453083233775083e-05,
      "loss": 0.3946,
      "step": 9996
    },
    {
      "epoch": 2.0550930208654536,
      "grad_norm": 0.22257232666015625,
      "learning_rate": 2.1444572247246306e-05,
      "loss": 0.4039,
      "step": 9997
    },
    {
      "epoch": 2.0552985918388322,
      "grad_norm": 0.1395193338394165,
      "learning_rate": 2.143606242118335e-05,
      "loss": 0.4434,
      "step": 9998
    },
    {
      "epoch": 2.055504162812211,
      "grad_norm": 0.22854886949062347,
      "learning_rate": 2.1427553756005467e-05,
      "loss": 0.409,
      "step": 9999
    },
    {
      "epoch": 2.0557097337855894,
      "grad_norm": 0.23623695969581604,
      "learning_rate": 2.1419046252131813e-05,
      "loss": 0.3945,
      "step": 10000
    },
    {
      "epoch": 2.055915304758968,
      "grad_norm": 0.22533413767814636,
      "learning_rate": 2.1410539909981554e-05,
      "loss": 0.4078,
      "step": 10001
    },
    {
      "epoch": 2.0561208757323466,
      "grad_norm": 0.21484293043613434,
      "learning_rate": 2.1402034729973735e-05,
      "loss": 0.3971,
      "step": 10002
    },
    {
      "epoch": 2.056326446705725,
      "grad_norm": 0.12295730412006378,
      "learning_rate": 2.1393530712527364e-05,
      "loss": 0.4583,
      "step": 10003
    },
    {
      "epoch": 2.0565320176791038,
      "grad_norm": 0.21692106127738953,
      "learning_rate": 2.1385027858061404e-05,
      "loss": 0.3951,
      "step": 10004
    },
    {
      "epoch": 2.0567375886524824,
      "grad_norm": 0.23760221898555756,
      "learning_rate": 2.137652616699474e-05,
      "loss": 0.4146,
      "step": 10005
    },
    {
      "epoch": 2.056943159625861,
      "grad_norm": 0.2326803058385849,
      "learning_rate": 2.1368025639746222e-05,
      "loss": 0.3751,
      "step": 10006
    },
    {
      "epoch": 2.0571487305992395,
      "grad_norm": 0.12141763418912888,
      "learning_rate": 2.13595262767346e-05,
      "loss": 0.4688,
      "step": 10007
    },
    {
      "epoch": 2.057354301572618,
      "grad_norm": 0.1330864131450653,
      "learning_rate": 2.135102807837865e-05,
      "loss": 0.463,
      "step": 10008
    },
    {
      "epoch": 2.0575598725459967,
      "grad_norm": 0.12697000801563263,
      "learning_rate": 2.1342531045097006e-05,
      "loss": 0.4498,
      "step": 10009
    },
    {
      "epoch": 2.057765443519375,
      "grad_norm": 0.12423637509346008,
      "learning_rate": 2.1334035177308284e-05,
      "loss": 0.4417,
      "step": 10010
    },
    {
      "epoch": 2.0579710144927534,
      "grad_norm": 0.23774953186511993,
      "learning_rate": 2.1325540475431032e-05,
      "loss": 0.4171,
      "step": 10011
    },
    {
      "epoch": 2.058176585466132,
      "grad_norm": 0.12215947359800339,
      "learning_rate": 2.131704693988375e-05,
      "loss": 0.4431,
      "step": 10012
    },
    {
      "epoch": 2.0583821564395106,
      "grad_norm": 0.22526676952838898,
      "learning_rate": 2.130855457108485e-05,
      "loss": 0.4086,
      "step": 10013
    },
    {
      "epoch": 2.058587727412889,
      "grad_norm": 0.2246025949716568,
      "learning_rate": 2.1300063369452754e-05,
      "loss": 0.3882,
      "step": 10014
    },
    {
      "epoch": 2.058793298386268,
      "grad_norm": 0.22365763783454895,
      "learning_rate": 2.1291573335405763e-05,
      "loss": 0.3854,
      "step": 10015
    },
    {
      "epoch": 2.0589988693596464,
      "grad_norm": 0.2273135632276535,
      "learning_rate": 2.1283084469362117e-05,
      "loss": 0.4483,
      "step": 10016
    },
    {
      "epoch": 2.059204440333025,
      "grad_norm": 0.2241649329662323,
      "learning_rate": 2.1274596771740074e-05,
      "loss": 0.4028,
      "step": 10017
    },
    {
      "epoch": 2.0594100113064036,
      "grad_norm": 0.1520613133907318,
      "learning_rate": 2.1266110242957747e-05,
      "loss": 0.4413,
      "step": 10018
    },
    {
      "epoch": 2.059615582279782,
      "grad_norm": 0.22100979089736938,
      "learning_rate": 2.125762488343324e-05,
      "loss": 0.4095,
      "step": 10019
    },
    {
      "epoch": 2.0598211532531607,
      "grad_norm": 0.22822456061840057,
      "learning_rate": 2.1249140693584583e-05,
      "loss": 0.4182,
      "step": 10020
    },
    {
      "epoch": 2.0600267242265393,
      "grad_norm": 0.22433196008205414,
      "learning_rate": 2.1240657673829736e-05,
      "loss": 0.3938,
      "step": 10021
    },
    {
      "epoch": 2.060232295199918,
      "grad_norm": 0.2217511087656021,
      "learning_rate": 2.1232175824586653e-05,
      "loss": 0.3991,
      "step": 10022
    },
    {
      "epoch": 2.0604378661732965,
      "grad_norm": 0.2158900797367096,
      "learning_rate": 2.1223695146273172e-05,
      "loss": 0.3928,
      "step": 10023
    },
    {
      "epoch": 2.060643437146675,
      "grad_norm": 0.22462232410907745,
      "learning_rate": 2.1215215639307106e-05,
      "loss": 0.4,
      "step": 10024
    },
    {
      "epoch": 2.0608490081200532,
      "grad_norm": 0.235184445977211,
      "learning_rate": 2.1206737304106196e-05,
      "loss": 0.4223,
      "step": 10025
    },
    {
      "epoch": 2.061054579093432,
      "grad_norm": 0.23646195232868195,
      "learning_rate": 2.1198260141088127e-05,
      "loss": 0.377,
      "step": 10026
    },
    {
      "epoch": 2.0612601500668104,
      "grad_norm": 0.23219510912895203,
      "learning_rate": 2.1189784150670534e-05,
      "loss": 0.4182,
      "step": 10027
    },
    {
      "epoch": 2.061465721040189,
      "grad_norm": 0.22460506856441498,
      "learning_rate": 2.1181309333270966e-05,
      "loss": 0.3969,
      "step": 10028
    },
    {
      "epoch": 2.0616712920135676,
      "grad_norm": 0.2338314950466156,
      "learning_rate": 2.1172835689306973e-05,
      "loss": 0.3975,
      "step": 10029
    },
    {
      "epoch": 2.061876862986946,
      "grad_norm": 0.22709804773330688,
      "learning_rate": 2.116436321919601e-05,
      "loss": 0.4034,
      "step": 10030
    },
    {
      "epoch": 2.0620824339603248,
      "grad_norm": 0.2227647751569748,
      "learning_rate": 2.115589192335545e-05,
      "loss": 0.3797,
      "step": 10031
    },
    {
      "epoch": 2.0622880049337033,
      "grad_norm": 0.2209719717502594,
      "learning_rate": 2.1147421802202655e-05,
      "loss": 0.3913,
      "step": 10032
    },
    {
      "epoch": 2.062493575907082,
      "grad_norm": 0.2267482727766037,
      "learning_rate": 2.1138952856154907e-05,
      "loss": 0.4176,
      "step": 10033
    },
    {
      "epoch": 2.0626991468804605,
      "grad_norm": 0.22682222723960876,
      "learning_rate": 2.1130485085629413e-05,
      "loss": 0.4015,
      "step": 10034
    },
    {
      "epoch": 2.062904717853839,
      "grad_norm": 0.23114748299121857,
      "learning_rate": 2.1122018491043344e-05,
      "loss": 0.3889,
      "step": 10035
    },
    {
      "epoch": 2.0631102888272177,
      "grad_norm": 0.22637394070625305,
      "learning_rate": 2.1113553072813834e-05,
      "loss": 0.4254,
      "step": 10036
    },
    {
      "epoch": 2.0633158598005963,
      "grad_norm": 0.2336263358592987,
      "learning_rate": 2.1105088831357904e-05,
      "loss": 0.4082,
      "step": 10037
    },
    {
      "epoch": 2.063521430773975,
      "grad_norm": 0.6490523815155029,
      "learning_rate": 2.1096625767092575e-05,
      "loss": 0.438,
      "step": 10038
    },
    {
      "epoch": 2.0637270017473535,
      "grad_norm": 0.22613218426704407,
      "learning_rate": 2.108816388043477e-05,
      "loss": 0.3998,
      "step": 10039
    },
    {
      "epoch": 2.0639325727207316,
      "grad_norm": 0.23520736396312714,
      "learning_rate": 2.1079703171801374e-05,
      "loss": 0.4229,
      "step": 10040
    },
    {
      "epoch": 2.06413814369411,
      "grad_norm": 0.22257588803768158,
      "learning_rate": 2.1071243641609196e-05,
      "loss": 0.3859,
      "step": 10041
    },
    {
      "epoch": 2.064343714667489,
      "grad_norm": 0.22676822543144226,
      "learning_rate": 2.106278529027498e-05,
      "loss": 0.3839,
      "step": 10042
    },
    {
      "epoch": 2.0645492856408674,
      "grad_norm": 0.22315295040607452,
      "learning_rate": 2.1054328118215475e-05,
      "loss": 0.3921,
      "step": 10043
    },
    {
      "epoch": 2.064754856614246,
      "grad_norm": 0.22379836440086365,
      "learning_rate": 2.1045872125847298e-05,
      "loss": 0.3746,
      "step": 10044
    },
    {
      "epoch": 2.0649604275876245,
      "grad_norm": 0.21513979136943817,
      "learning_rate": 2.103741731358704e-05,
      "loss": 0.393,
      "step": 10045
    },
    {
      "epoch": 2.065165998561003,
      "grad_norm": 0.24278521537780762,
      "learning_rate": 2.102896368185123e-05,
      "loss": 0.4002,
      "step": 10046
    },
    {
      "epoch": 2.0653715695343817,
      "grad_norm": 0.1328233927488327,
      "learning_rate": 2.1020511231056337e-05,
      "loss": 0.4638,
      "step": 10047
    },
    {
      "epoch": 2.0655771405077603,
      "grad_norm": 0.23675784468650818,
      "learning_rate": 2.101205996161876e-05,
      "loss": 0.3953,
      "step": 10048
    },
    {
      "epoch": 2.065782711481139,
      "grad_norm": 0.22523106634616852,
      "learning_rate": 2.1003609873954888e-05,
      "loss": 0.4019,
      "step": 10049
    },
    {
      "epoch": 2.0659882824545175,
      "grad_norm": 0.12683424353599548,
      "learning_rate": 2.0995160968480998e-05,
      "loss": 0.4565,
      "step": 10050
    },
    {
      "epoch": 2.066193853427896,
      "grad_norm": 0.22555489838123322,
      "learning_rate": 2.098671324561333e-05,
      "loss": 0.4062,
      "step": 10051
    },
    {
      "epoch": 2.0663994244012747,
      "grad_norm": 0.23419348895549774,
      "learning_rate": 2.0978266705768064e-05,
      "loss": 0.4253,
      "step": 10052
    },
    {
      "epoch": 2.0666049953746533,
      "grad_norm": 0.2320510447025299,
      "learning_rate": 2.0969821349361312e-05,
      "loss": 0.4052,
      "step": 10053
    },
    {
      "epoch": 2.066810566348032,
      "grad_norm": 0.2119479775428772,
      "learning_rate": 2.0961377176809152e-05,
      "loss": 0.3983,
      "step": 10054
    },
    {
      "epoch": 2.0670161373214104,
      "grad_norm": 0.21941865980625153,
      "learning_rate": 2.0952934188527566e-05,
      "loss": 0.3949,
      "step": 10055
    },
    {
      "epoch": 2.0672217082947886,
      "grad_norm": 0.1271030455827713,
      "learning_rate": 2.094449238493253e-05,
      "loss": 0.4436,
      "step": 10056
    },
    {
      "epoch": 2.067427279268167,
      "grad_norm": 0.22050043940544128,
      "learning_rate": 2.093605176643992e-05,
      "loss": 0.4041,
      "step": 10057
    },
    {
      "epoch": 2.0676328502415457,
      "grad_norm": 0.22902661561965942,
      "learning_rate": 2.0927612333465567e-05,
      "loss": 0.4003,
      "step": 10058
    },
    {
      "epoch": 2.0678384212149243,
      "grad_norm": 0.2170822024345398,
      "learning_rate": 2.091917408642522e-05,
      "loss": 0.391,
      "step": 10059
    },
    {
      "epoch": 2.068043992188303,
      "grad_norm": 0.2229936420917511,
      "learning_rate": 2.0910737025734634e-05,
      "loss": 0.403,
      "step": 10060
    },
    {
      "epoch": 2.0682495631616815,
      "grad_norm": 0.2259387969970703,
      "learning_rate": 2.090230115180944e-05,
      "loss": 0.3887,
      "step": 10061
    },
    {
      "epoch": 2.06845513413506,
      "grad_norm": 0.22917728126049042,
      "learning_rate": 2.0893866465065215e-05,
      "loss": 0.4047,
      "step": 10062
    },
    {
      "epoch": 2.0686607051084387,
      "grad_norm": 0.22916476428508759,
      "learning_rate": 2.088543296591754e-05,
      "loss": 0.3906,
      "step": 10063
    },
    {
      "epoch": 2.0688662760818173,
      "grad_norm": 0.22529999911785126,
      "learning_rate": 2.087700065478187e-05,
      "loss": 0.4009,
      "step": 10064
    },
    {
      "epoch": 2.069071847055196,
      "grad_norm": 0.22376291453838348,
      "learning_rate": 2.0868569532073623e-05,
      "loss": 0.4003,
      "step": 10065
    },
    {
      "epoch": 2.0692774180285745,
      "grad_norm": 0.21545644104480743,
      "learning_rate": 2.0860139598208166e-05,
      "loss": 0.4031,
      "step": 10066
    },
    {
      "epoch": 2.069482989001953,
      "grad_norm": 0.1322476714849472,
      "learning_rate": 2.0851710853600806e-05,
      "loss": 0.4664,
      "step": 10067
    },
    {
      "epoch": 2.0696885599753316,
      "grad_norm": 0.22991631925106049,
      "learning_rate": 2.0843283298666783e-05,
      "loss": 0.4024,
      "step": 10068
    },
    {
      "epoch": 2.06989413094871,
      "grad_norm": 0.22085146605968475,
      "learning_rate": 2.0834856933821267e-05,
      "loss": 0.3827,
      "step": 10069
    },
    {
      "epoch": 2.070099701922089,
      "grad_norm": 0.1257437914609909,
      "learning_rate": 2.0826431759479416e-05,
      "loss": 0.4524,
      "step": 10070
    },
    {
      "epoch": 2.070305272895467,
      "grad_norm": 0.1249329000711441,
      "learning_rate": 2.081800777605628e-05,
      "loss": 0.4446,
      "step": 10071
    },
    {
      "epoch": 2.0705108438688455,
      "grad_norm": 0.12916463613510132,
      "learning_rate": 2.0809584983966886e-05,
      "loss": 0.4477,
      "step": 10072
    },
    {
      "epoch": 2.070716414842224,
      "grad_norm": 0.22638201713562012,
      "learning_rate": 2.080116338362617e-05,
      "loss": 0.3862,
      "step": 10073
    },
    {
      "epoch": 2.0709219858156027,
      "grad_norm": 0.21907664835453033,
      "learning_rate": 2.0792742975449027e-05,
      "loss": 0.3962,
      "step": 10074
    },
    {
      "epoch": 2.0711275567889813,
      "grad_norm": 0.12063062191009521,
      "learning_rate": 2.0784323759850295e-05,
      "loss": 0.4442,
      "step": 10075
    },
    {
      "epoch": 2.07133312776236,
      "grad_norm": 0.22785618901252747,
      "learning_rate": 2.0775905737244727e-05,
      "loss": 0.4005,
      "step": 10076
    },
    {
      "epoch": 2.0715386987357385,
      "grad_norm": 0.2289772778749466,
      "learning_rate": 2.076748890804708e-05,
      "loss": 0.4268,
      "step": 10077
    },
    {
      "epoch": 2.071744269709117,
      "grad_norm": 0.22283616662025452,
      "learning_rate": 2.0759073272671997e-05,
      "loss": 0.4003,
      "step": 10078
    },
    {
      "epoch": 2.0719498406824957,
      "grad_norm": 0.23021160066127777,
      "learning_rate": 2.0750658831534067e-05,
      "loss": 0.3948,
      "step": 10079
    },
    {
      "epoch": 2.0721554116558742,
      "grad_norm": 0.22141693532466888,
      "learning_rate": 2.0742245585047817e-05,
      "loss": 0.4089,
      "step": 10080
    },
    {
      "epoch": 2.072360982629253,
      "grad_norm": 0.2241126000881195,
      "learning_rate": 2.0733833533627767e-05,
      "loss": 0.3935,
      "step": 10081
    },
    {
      "epoch": 2.0725665536026314,
      "grad_norm": 0.1273168921470642,
      "learning_rate": 2.0725422677688313e-05,
      "loss": 0.4539,
      "step": 10082
    },
    {
      "epoch": 2.07277212457601,
      "grad_norm": 0.2204464226961136,
      "learning_rate": 2.0717013017643815e-05,
      "loss": 0.4002,
      "step": 10083
    },
    {
      "epoch": 2.0729776955493886,
      "grad_norm": 0.22708940505981445,
      "learning_rate": 2.0708604553908598e-05,
      "loss": 0.4088,
      "step": 10084
    },
    {
      "epoch": 2.073183266522767,
      "grad_norm": 0.23681271076202393,
      "learning_rate": 2.07001972868969e-05,
      "loss": 0.4163,
      "step": 10085
    },
    {
      "epoch": 2.0733888374961453,
      "grad_norm": 0.22358982264995575,
      "learning_rate": 2.0691791217022905e-05,
      "loss": 0.4071,
      "step": 10086
    },
    {
      "epoch": 2.073594408469524,
      "grad_norm": 0.2268630564212799,
      "learning_rate": 2.068338634470074e-05,
      "loss": 0.4045,
      "step": 10087
    },
    {
      "epoch": 2.0737999794429025,
      "grad_norm": 0.22552597522735596,
      "learning_rate": 2.0674982670344475e-05,
      "loss": 0.4144,
      "step": 10088
    },
    {
      "epoch": 2.074005550416281,
      "grad_norm": 0.22645661234855652,
      "learning_rate": 2.0666580194368117e-05,
      "loss": 0.4017,
      "step": 10089
    },
    {
      "epoch": 2.0742111213896597,
      "grad_norm": 0.2267918735742569,
      "learning_rate": 2.0658178917185603e-05,
      "loss": 0.3807,
      "step": 10090
    },
    {
      "epoch": 2.0744166923630383,
      "grad_norm": 0.2314879298210144,
      "learning_rate": 2.0649778839210855e-05,
      "loss": 0.405,
      "step": 10091
    },
    {
      "epoch": 2.074622263336417,
      "grad_norm": 0.22707362473011017,
      "learning_rate": 2.0641379960857693e-05,
      "loss": 0.4071,
      "step": 10092
    },
    {
      "epoch": 2.0748278343097954,
      "grad_norm": 0.22233855724334717,
      "learning_rate": 2.0632982282539892e-05,
      "loss": 0.409,
      "step": 10093
    },
    {
      "epoch": 2.075033405283174,
      "grad_norm": 0.2284967601299286,
      "learning_rate": 2.0624585804671157e-05,
      "loss": 0.3873,
      "step": 10094
    },
    {
      "epoch": 2.0752389762565526,
      "grad_norm": 0.22250832617282867,
      "learning_rate": 2.0616190527665155e-05,
      "loss": 0.4054,
      "step": 10095
    },
    {
      "epoch": 2.075444547229931,
      "grad_norm": 0.2331288605928421,
      "learning_rate": 2.0607796451935468e-05,
      "loss": 0.3975,
      "step": 10096
    },
    {
      "epoch": 2.07565011820331,
      "grad_norm": 0.2304941862821579,
      "learning_rate": 2.059940357789563e-05,
      "loss": 0.3924,
      "step": 10097
    },
    {
      "epoch": 2.0758556891766884,
      "grad_norm": 0.2210913896560669,
      "learning_rate": 2.0591011905959142e-05,
      "loss": 0.383,
      "step": 10098
    },
    {
      "epoch": 2.076061260150067,
      "grad_norm": 0.22776024043560028,
      "learning_rate": 2.0582621436539415e-05,
      "loss": 0.4058,
      "step": 10099
    },
    {
      "epoch": 2.0762668311234456,
      "grad_norm": 0.21400035917758942,
      "learning_rate": 2.0574232170049804e-05,
      "loss": 0.3827,
      "step": 10100
    },
    {
      "epoch": 2.076472402096824,
      "grad_norm": 0.2280118465423584,
      "learning_rate": 2.0565844106903584e-05,
      "loss": 0.4127,
      "step": 10101
    },
    {
      "epoch": 2.0766779730702023,
      "grad_norm": 0.2156902402639389,
      "learning_rate": 2.0557457247514045e-05,
      "loss": 0.4023,
      "step": 10102
    },
    {
      "epoch": 2.076883544043581,
      "grad_norm": 0.22840487957000732,
      "learning_rate": 2.0549071592294338e-05,
      "loss": 0.411,
      "step": 10103
    },
    {
      "epoch": 2.0770891150169595,
      "grad_norm": 0.22176077961921692,
      "learning_rate": 2.0540687141657576e-05,
      "loss": 0.3836,
      "step": 10104
    },
    {
      "epoch": 2.077294685990338,
      "grad_norm": 0.2274215966463089,
      "learning_rate": 2.053230389601685e-05,
      "loss": 0.4141,
      "step": 10105
    },
    {
      "epoch": 2.0775002569637167,
      "grad_norm": 0.2207675725221634,
      "learning_rate": 2.052392185578515e-05,
      "loss": 0.3992,
      "step": 10106
    },
    {
      "epoch": 2.0777058279370952,
      "grad_norm": 0.23283138871192932,
      "learning_rate": 2.051554102137542e-05,
      "loss": 0.3971,
      "step": 10107
    },
    {
      "epoch": 2.077911398910474,
      "grad_norm": 0.24435223639011383,
      "learning_rate": 2.0507161393200547e-05,
      "loss": 0.3989,
      "step": 10108
    },
    {
      "epoch": 2.0781169698838524,
      "grad_norm": 0.22710062563419342,
      "learning_rate": 2.0498782971673353e-05,
      "loss": 0.3999,
      "step": 10109
    },
    {
      "epoch": 2.078322540857231,
      "grad_norm": 0.22904515266418457,
      "learning_rate": 2.0490405757206597e-05,
      "loss": 0.3923,
      "step": 10110
    },
    {
      "epoch": 2.0785281118306096,
      "grad_norm": 0.13042299449443817,
      "learning_rate": 2.0482029750212982e-05,
      "loss": 0.4425,
      "step": 10111
    },
    {
      "epoch": 2.078733682803988,
      "grad_norm": 0.13699179887771606,
      "learning_rate": 2.0473654951105176e-05,
      "loss": 0.4472,
      "step": 10112
    },
    {
      "epoch": 2.0789392537773668,
      "grad_norm": 0.228811115026474,
      "learning_rate": 2.046528136029576e-05,
      "loss": 0.4027,
      "step": 10113
    },
    {
      "epoch": 2.0791448247507454,
      "grad_norm": 0.21991683542728424,
      "learning_rate": 2.0456908978197252e-05,
      "loss": 0.3894,
      "step": 10114
    },
    {
      "epoch": 2.079350395724124,
      "grad_norm": 0.2570091485977173,
      "learning_rate": 2.0448537805222124e-05,
      "loss": 0.3982,
      "step": 10115
    },
    {
      "epoch": 2.0795559666975025,
      "grad_norm": 0.22256134450435638,
      "learning_rate": 2.0440167841782787e-05,
      "loss": 0.387,
      "step": 10116
    },
    {
      "epoch": 2.0797615376708807,
      "grad_norm": 0.22056585550308228,
      "learning_rate": 2.0431799088291588e-05,
      "loss": 0.3988,
      "step": 10117
    },
    {
      "epoch": 2.0799671086442593,
      "grad_norm": 0.21491390466690063,
      "learning_rate": 2.04234315451608e-05,
      "loss": 0.4163,
      "step": 10118
    },
    {
      "epoch": 2.080172679617638,
      "grad_norm": 0.21639686822891235,
      "learning_rate": 2.0415065212802687e-05,
      "loss": 0.3965,
      "step": 10119
    },
    {
      "epoch": 2.0803782505910164,
      "grad_norm": 0.2295675426721573,
      "learning_rate": 2.04067000916294e-05,
      "loss": 0.3914,
      "step": 10120
    },
    {
      "epoch": 2.080583821564395,
      "grad_norm": 0.12334294617176056,
      "learning_rate": 2.039833618205305e-05,
      "loss": 0.4585,
      "step": 10121
    },
    {
      "epoch": 2.0807893925377736,
      "grad_norm": 0.221688911318779,
      "learning_rate": 2.0389973484485674e-05,
      "loss": 0.3932,
      "step": 10122
    },
    {
      "epoch": 2.080994963511152,
      "grad_norm": 0.22646862268447876,
      "learning_rate": 2.0381611999339288e-05,
      "loss": 0.3961,
      "step": 10123
    },
    {
      "epoch": 2.081200534484531,
      "grad_norm": 0.12576346099376678,
      "learning_rate": 2.037325172702582e-05,
      "loss": 0.4689,
      "step": 10124
    },
    {
      "epoch": 2.0814061054579094,
      "grad_norm": 0.2239895462989807,
      "learning_rate": 2.0364892667957114e-05,
      "loss": 0.3882,
      "step": 10125
    },
    {
      "epoch": 2.081611676431288,
      "grad_norm": 0.2263174057006836,
      "learning_rate": 2.035653482254502e-05,
      "loss": 0.4017,
      "step": 10126
    },
    {
      "epoch": 2.0818172474046666,
      "grad_norm": 0.22486351430416107,
      "learning_rate": 2.034817819120127e-05,
      "loss": 0.3867,
      "step": 10127
    },
    {
      "epoch": 2.082022818378045,
      "grad_norm": 0.12152829766273499,
      "learning_rate": 2.0339822774337562e-05,
      "loss": 0.467,
      "step": 10128
    },
    {
      "epoch": 2.0822283893514237,
      "grad_norm": 0.2354230135679245,
      "learning_rate": 2.0331468572365525e-05,
      "loss": 0.4021,
      "step": 10129
    },
    {
      "epoch": 2.0824339603248023,
      "grad_norm": 0.23212236166000366,
      "learning_rate": 2.0323115585696726e-05,
      "loss": 0.3827,
      "step": 10130
    },
    {
      "epoch": 2.082639531298181,
      "grad_norm": 0.2421758621931076,
      "learning_rate": 2.031476381474267e-05,
      "loss": 0.3984,
      "step": 10131
    },
    {
      "epoch": 2.082845102271559,
      "grad_norm": 0.21930502355098724,
      "learning_rate": 2.0306413259914836e-05,
      "loss": 0.3948,
      "step": 10132
    },
    {
      "epoch": 2.0830506732449376,
      "grad_norm": 0.2258896678686142,
      "learning_rate": 2.0298063921624603e-05,
      "loss": 0.3935,
      "step": 10133
    },
    {
      "epoch": 2.0832562442183162,
      "grad_norm": 0.2229209989309311,
      "learning_rate": 2.0289715800283306e-05,
      "loss": 0.3999,
      "step": 10134
    },
    {
      "epoch": 2.083461815191695,
      "grad_norm": 0.22103843092918396,
      "learning_rate": 2.0281368896302212e-05,
      "loss": 0.3988,
      "step": 10135
    },
    {
      "epoch": 2.0836673861650734,
      "grad_norm": 0.22075578570365906,
      "learning_rate": 2.0273023210092543e-05,
      "loss": 0.394,
      "step": 10136
    },
    {
      "epoch": 2.083872957138452,
      "grad_norm": 0.22386351227760315,
      "learning_rate": 2.026467874206545e-05,
      "loss": 0.3929,
      "step": 10137
    },
    {
      "epoch": 2.0840785281118306,
      "grad_norm": 0.22971957921981812,
      "learning_rate": 2.0256335492631997e-05,
      "loss": 0.402,
      "step": 10138
    },
    {
      "epoch": 2.084284099085209,
      "grad_norm": 0.2303125262260437,
      "learning_rate": 2.024799346220326e-05,
      "loss": 0.3955,
      "step": 10139
    },
    {
      "epoch": 2.0844896700585878,
      "grad_norm": 0.23009240627288818,
      "learning_rate": 2.0239652651190203e-05,
      "loss": 0.3969,
      "step": 10140
    },
    {
      "epoch": 2.0846952410319664,
      "grad_norm": 0.22021248936653137,
      "learning_rate": 2.0231313060003725e-05,
      "loss": 0.4248,
      "step": 10141
    },
    {
      "epoch": 2.084900812005345,
      "grad_norm": 0.2278214991092682,
      "learning_rate": 2.0222974689054684e-05,
      "loss": 0.4051,
      "step": 10142
    },
    {
      "epoch": 2.0851063829787235,
      "grad_norm": 0.2289620041847229,
      "learning_rate": 2.0214637538753872e-05,
      "loss": 0.3883,
      "step": 10143
    },
    {
      "epoch": 2.085311953952102,
      "grad_norm": 0.21833720803260803,
      "learning_rate": 2.0206301609512006e-05,
      "loss": 0.3837,
      "step": 10144
    },
    {
      "epoch": 2.0855175249254807,
      "grad_norm": 0.225063756108284,
      "learning_rate": 2.0197966901739792e-05,
      "loss": 0.4063,
      "step": 10145
    },
    {
      "epoch": 2.0857230958988593,
      "grad_norm": 0.22253869473934174,
      "learning_rate": 2.0189633415847808e-05,
      "loss": 0.3882,
      "step": 10146
    },
    {
      "epoch": 2.0859286668722374,
      "grad_norm": 0.22410309314727783,
      "learning_rate": 2.0181301152246636e-05,
      "loss": 0.4163,
      "step": 10147
    },
    {
      "epoch": 2.086134237845616,
      "grad_norm": 0.2274530529975891,
      "learning_rate": 2.0172970111346756e-05,
      "loss": 0.405,
      "step": 10148
    },
    {
      "epoch": 2.0863398088189946,
      "grad_norm": 0.12682540714740753,
      "learning_rate": 2.01646402935586e-05,
      "loss": 0.4591,
      "step": 10149
    },
    {
      "epoch": 2.086545379792373,
      "grad_norm": 0.22507302463054657,
      "learning_rate": 2.015631169929253e-05,
      "loss": 0.4096,
      "step": 10150
    },
    {
      "epoch": 2.086750950765752,
      "grad_norm": 0.2234722524881363,
      "learning_rate": 2.014798432895887e-05,
      "loss": 0.3815,
      "step": 10151
    },
    {
      "epoch": 2.0869565217391304,
      "grad_norm": 0.231471449136734,
      "learning_rate": 2.0139658182967842e-05,
      "loss": 0.4016,
      "step": 10152
    },
    {
      "epoch": 2.087162092712509,
      "grad_norm": 0.2210719883441925,
      "learning_rate": 2.0131333261729683e-05,
      "loss": 0.3896,
      "step": 10153
    },
    {
      "epoch": 2.0873676636858876,
      "grad_norm": 0.22725726664066315,
      "learning_rate": 2.012300956565449e-05,
      "loss": 0.3893,
      "step": 10154
    },
    {
      "epoch": 2.087573234659266,
      "grad_norm": 0.21838733553886414,
      "learning_rate": 2.011468709515234e-05,
      "loss": 0.3981,
      "step": 10155
    },
    {
      "epoch": 2.0877788056326447,
      "grad_norm": 0.22779439389705658,
      "learning_rate": 2.010636585063325e-05,
      "loss": 0.4055,
      "step": 10156
    },
    {
      "epoch": 2.0879843766060233,
      "grad_norm": 0.2215360850095749,
      "learning_rate": 2.009804583250716e-05,
      "loss": 0.3861,
      "step": 10157
    },
    {
      "epoch": 2.088189947579402,
      "grad_norm": 0.22047077119350433,
      "learning_rate": 2.008972704118396e-05,
      "loss": 0.3813,
      "step": 10158
    },
    {
      "epoch": 2.0883955185527805,
      "grad_norm": 0.22012098133563995,
      "learning_rate": 2.008140947707346e-05,
      "loss": 0.4157,
      "step": 10159
    },
    {
      "epoch": 2.088601089526159,
      "grad_norm": 0.22172100841999054,
      "learning_rate": 2.0073093140585463e-05,
      "loss": 0.4031,
      "step": 10160
    },
    {
      "epoch": 2.0888066604995377,
      "grad_norm": 0.2272823601961136,
      "learning_rate": 2.0064778032129662e-05,
      "loss": 0.4071,
      "step": 10161
    },
    {
      "epoch": 2.089012231472916,
      "grad_norm": 0.22149771451950073,
      "learning_rate": 2.0056464152115694e-05,
      "loss": 0.3809,
      "step": 10162
    },
    {
      "epoch": 2.0892178024462944,
      "grad_norm": 0.1271590292453766,
      "learning_rate": 2.004815150095316e-05,
      "loss": 0.4552,
      "step": 10163
    },
    {
      "epoch": 2.089423373419673,
      "grad_norm": 0.21896865963935852,
      "learning_rate": 2.003984007905157e-05,
      "loss": 0.3918,
      "step": 10164
    },
    {
      "epoch": 2.0896289443930516,
      "grad_norm": 0.1302296221256256,
      "learning_rate": 2.003152988682038e-05,
      "loss": 0.4527,
      "step": 10165
    },
    {
      "epoch": 2.08983451536643,
      "grad_norm": 0.2259882539510727,
      "learning_rate": 2.002322092466903e-05,
      "loss": 0.3874,
      "step": 10166
    },
    {
      "epoch": 2.0900400863398088,
      "grad_norm": 0.22086426615715027,
      "learning_rate": 2.001491319300684e-05,
      "loss": 0.3821,
      "step": 10167
    },
    {
      "epoch": 2.0902456573131873,
      "grad_norm": 0.1255428045988083,
      "learning_rate": 2.0006606692243083e-05,
      "loss": 0.4736,
      "step": 10168
    },
    {
      "epoch": 2.090451228286566,
      "grad_norm": 0.21999509632587433,
      "learning_rate": 1.9998301422787013e-05,
      "loss": 0.3945,
      "step": 10169
    },
    {
      "epoch": 2.0906567992599445,
      "grad_norm": 0.22573313117027283,
      "learning_rate": 1.9989997385047776e-05,
      "loss": 0.4072,
      "step": 10170
    },
    {
      "epoch": 2.090862370233323,
      "grad_norm": 0.13236026465892792,
      "learning_rate": 1.9981694579434462e-05,
      "loss": 0.4539,
      "step": 10171
    },
    {
      "epoch": 2.0910679412067017,
      "grad_norm": 0.22156447172164917,
      "learning_rate": 1.997339300635613e-05,
      "loss": 0.3903,
      "step": 10172
    },
    {
      "epoch": 2.0912735121800803,
      "grad_norm": 0.22315345704555511,
      "learning_rate": 1.996509266622173e-05,
      "loss": 0.4011,
      "step": 10173
    },
    {
      "epoch": 2.091479083153459,
      "grad_norm": 0.210943341255188,
      "learning_rate": 1.9956793559440223e-05,
      "loss": 0.4072,
      "step": 10174
    },
    {
      "epoch": 2.0916846541268375,
      "grad_norm": 0.21411919593811035,
      "learning_rate": 1.994849568642044e-05,
      "loss": 0.3907,
      "step": 10175
    },
    {
      "epoch": 2.091890225100216,
      "grad_norm": 0.21381069719791412,
      "learning_rate": 1.9940199047571183e-05,
      "loss": 0.3825,
      "step": 10176
    },
    {
      "epoch": 2.092095796073594,
      "grad_norm": 0.22450023889541626,
      "learning_rate": 1.9931903643301194e-05,
      "loss": 0.4092,
      "step": 10177
    },
    {
      "epoch": 2.092301367046973,
      "grad_norm": 0.22319452464580536,
      "learning_rate": 1.9923609474019144e-05,
      "loss": 0.3992,
      "step": 10178
    },
    {
      "epoch": 2.0925069380203514,
      "grad_norm": 0.22775287926197052,
      "learning_rate": 1.9915316540133648e-05,
      "loss": 0.4082,
      "step": 10179
    },
    {
      "epoch": 2.09271250899373,
      "grad_norm": 0.22660957276821136,
      "learning_rate": 1.990702484205324e-05,
      "loss": 0.4158,
      "step": 10180
    },
    {
      "epoch": 2.0929180799671085,
      "grad_norm": 0.21837587654590607,
      "learning_rate": 1.9898734380186455e-05,
      "loss": 0.4005,
      "step": 10181
    },
    {
      "epoch": 2.093123650940487,
      "grad_norm": 0.22248844802379608,
      "learning_rate": 1.98904451549417e-05,
      "loss": 0.4156,
      "step": 10182
    },
    {
      "epoch": 2.0933292219138657,
      "grad_norm": 0.2321203351020813,
      "learning_rate": 1.988215716672736e-05,
      "loss": 0.3997,
      "step": 10183
    },
    {
      "epoch": 2.0935347928872443,
      "grad_norm": 0.22257383167743683,
      "learning_rate": 1.9873870415951728e-05,
      "loss": 0.4017,
      "step": 10184
    },
    {
      "epoch": 2.093740363860623,
      "grad_norm": 0.22243481874465942,
      "learning_rate": 1.986558490302306e-05,
      "loss": 0.3935,
      "step": 10185
    },
    {
      "epoch": 2.0939459348340015,
      "grad_norm": 0.2248910516500473,
      "learning_rate": 1.9857300628349532e-05,
      "loss": 0.3968,
      "step": 10186
    },
    {
      "epoch": 2.09415150580738,
      "grad_norm": 0.22491000592708588,
      "learning_rate": 1.98490175923393e-05,
      "loss": 0.3925,
      "step": 10187
    },
    {
      "epoch": 2.0943570767807587,
      "grad_norm": 0.22509317100048065,
      "learning_rate": 1.9840735795400418e-05,
      "loss": 0.4006,
      "step": 10188
    },
    {
      "epoch": 2.0945626477541373,
      "grad_norm": 0.2187567800283432,
      "learning_rate": 1.9832455237940873e-05,
      "loss": 0.4097,
      "step": 10189
    },
    {
      "epoch": 2.094768218727516,
      "grad_norm": 0.12614451348781586,
      "learning_rate": 1.9824175920368644e-05,
      "loss": 0.4585,
      "step": 10190
    },
    {
      "epoch": 2.0949737897008944,
      "grad_norm": 0.2270839512348175,
      "learning_rate": 1.981589784309159e-05,
      "loss": 0.4005,
      "step": 10191
    },
    {
      "epoch": 2.0951793606742726,
      "grad_norm": 0.22762618958950043,
      "learning_rate": 1.9807621006517543e-05,
      "loss": 0.386,
      "step": 10192
    },
    {
      "epoch": 2.095384931647651,
      "grad_norm": 0.23058810830116272,
      "learning_rate": 1.9799345411054263e-05,
      "loss": 0.3889,
      "step": 10193
    },
    {
      "epoch": 2.0955905026210297,
      "grad_norm": 0.22418002784252167,
      "learning_rate": 1.9791071057109426e-05,
      "loss": 0.3864,
      "step": 10194
    },
    {
      "epoch": 2.0957960735944083,
      "grad_norm": 0.23092950880527496,
      "learning_rate": 1.9782797945090707e-05,
      "loss": 0.4238,
      "step": 10195
    },
    {
      "epoch": 2.096001644567787,
      "grad_norm": 0.2287166863679886,
      "learning_rate": 1.977452607540567e-05,
      "loss": 0.3985,
      "step": 10196
    },
    {
      "epoch": 2.0962072155411655,
      "grad_norm": 0.22596527636051178,
      "learning_rate": 1.9766255448461836e-05,
      "loss": 0.4052,
      "step": 10197
    },
    {
      "epoch": 2.096412786514544,
      "grad_norm": 0.1205587163567543,
      "learning_rate": 1.9757986064666647e-05,
      "loss": 0.4629,
      "step": 10198
    },
    {
      "epoch": 2.0966183574879227,
      "grad_norm": 0.2247573435306549,
      "learning_rate": 1.9749717924427508e-05,
      "loss": 0.389,
      "step": 10199
    },
    {
      "epoch": 2.0968239284613013,
      "grad_norm": 0.12539906799793243,
      "learning_rate": 1.9741451028151723e-05,
      "loss": 0.4471,
      "step": 10200
    },
    {
      "epoch": 2.09702949943468,
      "grad_norm": 0.22345465421676636,
      "learning_rate": 1.9733185376246612e-05,
      "loss": 0.3977,
      "step": 10201
    },
    {
      "epoch": 2.0972350704080585,
      "grad_norm": 0.21945199370384216,
      "learning_rate": 1.9724920969119356e-05,
      "loss": 0.3732,
      "step": 10202
    },
    {
      "epoch": 2.097440641381437,
      "grad_norm": 0.2249259501695633,
      "learning_rate": 1.9716657807177112e-05,
      "loss": 0.3822,
      "step": 10203
    },
    {
      "epoch": 2.0976462123548156,
      "grad_norm": 0.22166243195533752,
      "learning_rate": 1.9708395890826962e-05,
      "loss": 0.3932,
      "step": 10204
    },
    {
      "epoch": 2.097851783328194,
      "grad_norm": 0.22853392362594604,
      "learning_rate": 1.9700135220475934e-05,
      "loss": 0.4078,
      "step": 10205
    },
    {
      "epoch": 2.098057354301573,
      "grad_norm": 0.22204367816448212,
      "learning_rate": 1.969187579653099e-05,
      "loss": 0.3897,
      "step": 10206
    },
    {
      "epoch": 2.098262925274951,
      "grad_norm": 0.21821551024913788,
      "learning_rate": 1.968361761939902e-05,
      "loss": 0.4099,
      "step": 10207
    },
    {
      "epoch": 2.0984684962483295,
      "grad_norm": 0.21198779344558716,
      "learning_rate": 1.96753606894869e-05,
      "loss": 0.4046,
      "step": 10208
    },
    {
      "epoch": 2.098674067221708,
      "grad_norm": 0.22482189536094666,
      "learning_rate": 1.966710500720139e-05,
      "loss": 0.4052,
      "step": 10209
    },
    {
      "epoch": 2.0988796381950867,
      "grad_norm": 0.22468796372413635,
      "learning_rate": 1.9658850572949195e-05,
      "loss": 0.3828,
      "step": 10210
    },
    {
      "epoch": 2.0990852091684653,
      "grad_norm": 0.1260218471288681,
      "learning_rate": 1.9650597387137008e-05,
      "loss": 0.4485,
      "step": 10211
    },
    {
      "epoch": 2.099290780141844,
      "grad_norm": 0.23379628360271454,
      "learning_rate": 1.96423454501714e-05,
      "loss": 0.4066,
      "step": 10212
    },
    {
      "epoch": 2.0994963511152225,
      "grad_norm": 0.22664855420589447,
      "learning_rate": 1.9634094762458916e-05,
      "loss": 0.4069,
      "step": 10213
    },
    {
      "epoch": 2.099701922088601,
      "grad_norm": 0.23156146705150604,
      "learning_rate": 1.9625845324406e-05,
      "loss": 0.4082,
      "step": 10214
    },
    {
      "epoch": 2.0999074930619797,
      "grad_norm": 0.12826383113861084,
      "learning_rate": 1.9617597136419107e-05,
      "loss": 0.4626,
      "step": 10215
    },
    {
      "epoch": 2.1001130640353582,
      "grad_norm": 0.2237342894077301,
      "learning_rate": 1.960935019890456e-05,
      "loss": 0.4013,
      "step": 10216
    },
    {
      "epoch": 2.100318635008737,
      "grad_norm": 0.1258496642112732,
      "learning_rate": 1.960110451226866e-05,
      "loss": 0.4512,
      "step": 10217
    },
    {
      "epoch": 2.1005242059821154,
      "grad_norm": 0.23888415098190308,
      "learning_rate": 1.9592860076917626e-05,
      "loss": 0.4139,
      "step": 10218
    },
    {
      "epoch": 2.100729776955494,
      "grad_norm": 0.12545999884605408,
      "learning_rate": 1.9584616893257618e-05,
      "loss": 0.4433,
      "step": 10219
    },
    {
      "epoch": 2.1009353479288726,
      "grad_norm": 0.22233633697032928,
      "learning_rate": 1.9576374961694747e-05,
      "loss": 0.4026,
      "step": 10220
    },
    {
      "epoch": 2.101140918902251,
      "grad_norm": 0.218837171792984,
      "learning_rate": 1.956813428263504e-05,
      "loss": 0.3964,
      "step": 10221
    },
    {
      "epoch": 2.1013464898756298,
      "grad_norm": 0.22407136857509613,
      "learning_rate": 1.9559894856484503e-05,
      "loss": 0.3996,
      "step": 10222
    },
    {
      "epoch": 2.101552060849008,
      "grad_norm": 0.22463653981685638,
      "learning_rate": 1.9551656683649034e-05,
      "loss": 0.3896,
      "step": 10223
    },
    {
      "epoch": 2.1017576318223865,
      "grad_norm": 0.22171586751937866,
      "learning_rate": 1.95434197645345e-05,
      "loss": 0.3992,
      "step": 10224
    },
    {
      "epoch": 2.101963202795765,
      "grad_norm": 0.2200179100036621,
      "learning_rate": 1.9535184099546695e-05,
      "loss": 0.4082,
      "step": 10225
    },
    {
      "epoch": 2.1021687737691437,
      "grad_norm": 0.11994064599275589,
      "learning_rate": 1.952694968909134e-05,
      "loss": 0.4613,
      "step": 10226
    },
    {
      "epoch": 2.1023743447425223,
      "grad_norm": 0.23581825196743011,
      "learning_rate": 1.9518716533574114e-05,
      "loss": 0.4014,
      "step": 10227
    },
    {
      "epoch": 2.102579915715901,
      "grad_norm": 0.2292327582836151,
      "learning_rate": 1.9510484633400608e-05,
      "loss": 0.3876,
      "step": 10228
    },
    {
      "epoch": 2.1027854866892794,
      "grad_norm": 0.22429661452770233,
      "learning_rate": 1.9502253988976407e-05,
      "loss": 0.3974,
      "step": 10229
    },
    {
      "epoch": 2.102991057662658,
      "grad_norm": 0.1279507577419281,
      "learning_rate": 1.9494024600706973e-05,
      "loss": 0.458,
      "step": 10230
    },
    {
      "epoch": 2.1031966286360366,
      "grad_norm": 0.14293161034584045,
      "learning_rate": 1.9485796468997733e-05,
      "loss": 0.4781,
      "step": 10231
    },
    {
      "epoch": 2.103402199609415,
      "grad_norm": 2.012324571609497,
      "learning_rate": 1.947756959425403e-05,
      "loss": 0.417,
      "step": 10232
    },
    {
      "epoch": 2.103607770582794,
      "grad_norm": 0.2304982990026474,
      "learning_rate": 1.94693439768812e-05,
      "loss": 0.4055,
      "step": 10233
    },
    {
      "epoch": 2.1038133415561724,
      "grad_norm": 0.23225271701812744,
      "learning_rate": 1.946111961728446e-05,
      "loss": 0.4127,
      "step": 10234
    },
    {
      "epoch": 2.104018912529551,
      "grad_norm": 0.2252538651227951,
      "learning_rate": 1.9452896515868974e-05,
      "loss": 0.3986,
      "step": 10235
    },
    {
      "epoch": 2.1042244835029296,
      "grad_norm": 0.2263312041759491,
      "learning_rate": 1.9444674673039884e-05,
      "loss": 0.3912,
      "step": 10236
    },
    {
      "epoch": 2.104430054476308,
      "grad_norm": 0.13533739745616913,
      "learning_rate": 1.9436454089202226e-05,
      "loss": 0.4608,
      "step": 10237
    },
    {
      "epoch": 2.1046356254496863,
      "grad_norm": 0.22458425164222717,
      "learning_rate": 1.9428234764760997e-05,
      "loss": 0.4091,
      "step": 10238
    },
    {
      "epoch": 2.104841196423065,
      "grad_norm": 0.23281507194042206,
      "learning_rate": 1.9420016700121114e-05,
      "loss": 0.4005,
      "step": 10239
    },
    {
      "epoch": 2.1050467673964435,
      "grad_norm": 0.13586680591106415,
      "learning_rate": 1.941179989568745e-05,
      "loss": 0.4477,
      "step": 10240
    },
    {
      "epoch": 2.105252338369822,
      "grad_norm": 0.23734326660633087,
      "learning_rate": 1.9403584351864806e-05,
      "loss": 0.4007,
      "step": 10241
    },
    {
      "epoch": 2.1054579093432007,
      "grad_norm": 0.14735311269760132,
      "learning_rate": 1.9395370069057907e-05,
      "loss": 0.437,
      "step": 10242
    },
    {
      "epoch": 2.1056634803165792,
      "grad_norm": 0.22844909131526947,
      "learning_rate": 1.9387157047671467e-05,
      "loss": 0.3974,
      "step": 10243
    },
    {
      "epoch": 2.105869051289958,
      "grad_norm": 0.23056820034980774,
      "learning_rate": 1.9378945288110086e-05,
      "loss": 0.4101,
      "step": 10244
    },
    {
      "epoch": 2.1060746222633364,
      "grad_norm": 0.3185328543186188,
      "learning_rate": 1.937073479077831e-05,
      "loss": 0.4083,
      "step": 10245
    },
    {
      "epoch": 2.106280193236715,
      "grad_norm": 0.22201012074947357,
      "learning_rate": 1.9362525556080648e-05,
      "loss": 0.3922,
      "step": 10246
    },
    {
      "epoch": 2.1064857642100936,
      "grad_norm": 0.2239248901605606,
      "learning_rate": 1.935431758442152e-05,
      "loss": 0.3834,
      "step": 10247
    },
    {
      "epoch": 2.106691335183472,
      "grad_norm": 0.23866835236549377,
      "learning_rate": 1.93461108762053e-05,
      "loss": 0.4183,
      "step": 10248
    },
    {
      "epoch": 2.1068969061568508,
      "grad_norm": 0.22566145658493042,
      "learning_rate": 1.933790543183627e-05,
      "loss": 0.3999,
      "step": 10249
    },
    {
      "epoch": 2.1071024771302294,
      "grad_norm": 0.12605851888656616,
      "learning_rate": 1.9329701251718715e-05,
      "loss": 0.4435,
      "step": 10250
    },
    {
      "epoch": 2.107308048103608,
      "grad_norm": 0.2505359947681427,
      "learning_rate": 1.9321498336256792e-05,
      "loss": 0.3997,
      "step": 10251
    },
    {
      "epoch": 2.1075136190769865,
      "grad_norm": 0.22146162390708923,
      "learning_rate": 1.9313296685854628e-05,
      "loss": 0.3939,
      "step": 10252
    },
    {
      "epoch": 2.1077191900503647,
      "grad_norm": 0.23050841689109802,
      "learning_rate": 1.9305096300916266e-05,
      "loss": 0.4322,
      "step": 10253
    },
    {
      "epoch": 2.1079247610237433,
      "grad_norm": 0.12850402295589447,
      "learning_rate": 1.929689718184572e-05,
      "loss": 0.4712,
      "step": 10254
    },
    {
      "epoch": 2.108130331997122,
      "grad_norm": 0.22386091947555542,
      "learning_rate": 1.9288699329046917e-05,
      "loss": 0.3985,
      "step": 10255
    },
    {
      "epoch": 2.1083359029705004,
      "grad_norm": 0.21962010860443115,
      "learning_rate": 1.9280502742923706e-05,
      "loss": 0.3824,
      "step": 10256
    },
    {
      "epoch": 2.108541473943879,
      "grad_norm": 0.2260153442621231,
      "learning_rate": 1.927230742387993e-05,
      "loss": 0.3941,
      "step": 10257
    },
    {
      "epoch": 2.1087470449172576,
      "grad_norm": 0.22931505739688873,
      "learning_rate": 1.926411337231932e-05,
      "loss": 0.3826,
      "step": 10258
    },
    {
      "epoch": 2.108952615890636,
      "grad_norm": 0.22665323317050934,
      "learning_rate": 1.9255920588645544e-05,
      "loss": 0.3905,
      "step": 10259
    },
    {
      "epoch": 2.109158186864015,
      "grad_norm": 0.12365376204252243,
      "learning_rate": 1.924772907326224e-05,
      "loss": 0.4274,
      "step": 10260
    },
    {
      "epoch": 2.1093637578373934,
      "grad_norm": 0.12186730653047562,
      "learning_rate": 1.923953882657296e-05,
      "loss": 0.4518,
      "step": 10261
    },
    {
      "epoch": 2.109569328810772,
      "grad_norm": 0.2289813756942749,
      "learning_rate": 1.9231349848981198e-05,
      "loss": 0.4068,
      "step": 10262
    },
    {
      "epoch": 2.1097748997841506,
      "grad_norm": 0.13003799319267273,
      "learning_rate": 1.922316214089037e-05,
      "loss": 0.4646,
      "step": 10263
    },
    {
      "epoch": 2.109980470757529,
      "grad_norm": 0.22192876040935516,
      "learning_rate": 1.921497570270388e-05,
      "loss": 0.3899,
      "step": 10264
    },
    {
      "epoch": 2.1101860417309077,
      "grad_norm": 0.24169708788394928,
      "learning_rate": 1.9206790534825012e-05,
      "loss": 0.3991,
      "step": 10265
    },
    {
      "epoch": 2.1103916127042863,
      "grad_norm": 0.12277937680482864,
      "learning_rate": 1.919860663765702e-05,
      "loss": 0.4448,
      "step": 10266
    },
    {
      "epoch": 2.110597183677665,
      "grad_norm": 0.22904759645462036,
      "learning_rate": 1.919042401160309e-05,
      "loss": 0.3916,
      "step": 10267
    },
    {
      "epoch": 2.1108027546510435,
      "grad_norm": 0.22709167003631592,
      "learning_rate": 1.9182242657066326e-05,
      "loss": 0.3872,
      "step": 10268
    },
    {
      "epoch": 2.1110083256244216,
      "grad_norm": 0.229196235537529,
      "learning_rate": 1.9174062574449796e-05,
      "loss": 0.4137,
      "step": 10269
    },
    {
      "epoch": 2.1112138965978002,
      "grad_norm": 0.2226954847574234,
      "learning_rate": 1.916588376415648e-05,
      "loss": 0.3845,
      "step": 10270
    },
    {
      "epoch": 2.111419467571179,
      "grad_norm": 0.21747919917106628,
      "learning_rate": 1.915770622658934e-05,
      "loss": 0.404,
      "step": 10271
    },
    {
      "epoch": 2.1116250385445574,
      "grad_norm": 0.22931161522865295,
      "learning_rate": 1.9149529962151223e-05,
      "loss": 0.4024,
      "step": 10272
    },
    {
      "epoch": 2.111830609517936,
      "grad_norm": 0.23304495215415955,
      "learning_rate": 1.9141354971244945e-05,
      "loss": 0.3922,
      "step": 10273
    },
    {
      "epoch": 2.1120361804913146,
      "grad_norm": 0.2212096005678177,
      "learning_rate": 1.9133181254273226e-05,
      "loss": 0.4006,
      "step": 10274
    },
    {
      "epoch": 2.112241751464693,
      "grad_norm": 0.13250161707401276,
      "learning_rate": 1.912500881163878e-05,
      "loss": 0.4599,
      "step": 10275
    },
    {
      "epoch": 2.1124473224380718,
      "grad_norm": 0.22626225650310516,
      "learning_rate": 1.911683764374421e-05,
      "loss": 0.4085,
      "step": 10276
    },
    {
      "epoch": 2.1126528934114503,
      "grad_norm": 0.12602867186069489,
      "learning_rate": 1.9108667750992057e-05,
      "loss": 0.4627,
      "step": 10277
    },
    {
      "epoch": 2.112858464384829,
      "grad_norm": 0.23502740263938904,
      "learning_rate": 1.9100499133784848e-05,
      "loss": 0.4113,
      "step": 10278
    },
    {
      "epoch": 2.1130640353582075,
      "grad_norm": 0.24214279651641846,
      "learning_rate": 1.9092331792524986e-05,
      "loss": 0.3842,
      "step": 10279
    },
    {
      "epoch": 2.113269606331586,
      "grad_norm": 0.23367543518543243,
      "learning_rate": 1.908416572761485e-05,
      "loss": 0.3974,
      "step": 10280
    },
    {
      "epoch": 2.1134751773049647,
      "grad_norm": 0.2227569818496704,
      "learning_rate": 1.907600093945674e-05,
      "loss": 0.4011,
      "step": 10281
    },
    {
      "epoch": 2.1136807482783433,
      "grad_norm": 0.222117081284523,
      "learning_rate": 1.906783742845289e-05,
      "loss": 0.4013,
      "step": 10282
    },
    {
      "epoch": 2.113886319251722,
      "grad_norm": 0.13070207834243774,
      "learning_rate": 1.9059675195005468e-05,
      "loss": 0.4754,
      "step": 10283
    },
    {
      "epoch": 2.1140918902251,
      "grad_norm": 0.23519377410411835,
      "learning_rate": 1.905151423951662e-05,
      "loss": 0.4061,
      "step": 10284
    },
    {
      "epoch": 2.1142974611984786,
      "grad_norm": 0.16713115572929382,
      "learning_rate": 1.9043354562388385e-05,
      "loss": 0.4556,
      "step": 10285
    },
    {
      "epoch": 2.114503032171857,
      "grad_norm": 0.12903447449207306,
      "learning_rate": 1.903519616402275e-05,
      "loss": 0.4728,
      "step": 10286
    },
    {
      "epoch": 2.114708603145236,
      "grad_norm": 0.22464367747306824,
      "learning_rate": 1.9027039044821635e-05,
      "loss": 0.4061,
      "step": 10287
    },
    {
      "epoch": 2.1149141741186144,
      "grad_norm": 0.21755559742450714,
      "learning_rate": 1.9018883205186913e-05,
      "loss": 0.3932,
      "step": 10288
    },
    {
      "epoch": 2.115119745091993,
      "grad_norm": 0.133756622672081,
      "learning_rate": 1.901072864552038e-05,
      "loss": 0.457,
      "step": 10289
    },
    {
      "epoch": 2.1153253160653716,
      "grad_norm": 0.23208466172218323,
      "learning_rate": 1.9002575366223756e-05,
      "loss": 0.4064,
      "step": 10290
    },
    {
      "epoch": 2.11553088703875,
      "grad_norm": 0.13155633211135864,
      "learning_rate": 1.8994423367698753e-05,
      "loss": 0.4419,
      "step": 10291
    },
    {
      "epoch": 2.1157364580121287,
      "grad_norm": 0.2295832335948944,
      "learning_rate": 1.8986272650346955e-05,
      "loss": 0.3953,
      "step": 10292
    },
    {
      "epoch": 2.1159420289855073,
      "grad_norm": 0.2247355431318283,
      "learning_rate": 1.8978123214569915e-05,
      "loss": 0.3978,
      "step": 10293
    },
    {
      "epoch": 2.116147599958886,
      "grad_norm": 0.22480376064777374,
      "learning_rate": 1.8969975060769123e-05,
      "loss": 0.4201,
      "step": 10294
    },
    {
      "epoch": 2.1163531709322645,
      "grad_norm": 0.12718500196933746,
      "learning_rate": 1.896182818934598e-05,
      "loss": 0.4484,
      "step": 10295
    },
    {
      "epoch": 2.116558741905643,
      "grad_norm": 0.2338053286075592,
      "learning_rate": 1.8953682600701873e-05,
      "loss": 0.4009,
      "step": 10296
    },
    {
      "epoch": 2.1167643128790217,
      "grad_norm": 0.23438557982444763,
      "learning_rate": 1.894553829523808e-05,
      "loss": 0.3935,
      "step": 10297
    },
    {
      "epoch": 2.1169698838524003,
      "grad_norm": 0.2157134860754013,
      "learning_rate": 1.8937395273355834e-05,
      "loss": 0.3973,
      "step": 10298
    },
    {
      "epoch": 2.1171754548257784,
      "grad_norm": 0.2266354262828827,
      "learning_rate": 1.8929253535456313e-05,
      "loss": 0.406,
      "step": 10299
    },
    {
      "epoch": 2.117381025799157,
      "grad_norm": 0.2172161191701889,
      "learning_rate": 1.8921113081940612e-05,
      "loss": 0.3979,
      "step": 10300
    },
    {
      "epoch": 2.1175865967725356,
      "grad_norm": 0.22894109785556793,
      "learning_rate": 1.8912973913209784e-05,
      "loss": 0.4039,
      "step": 10301
    },
    {
      "epoch": 2.117792167745914,
      "grad_norm": 0.218611940741539,
      "learning_rate": 1.8904836029664802e-05,
      "loss": 0.3832,
      "step": 10302
    },
    {
      "epoch": 2.1179977387192928,
      "grad_norm": 0.22846931219100952,
      "learning_rate": 1.8896699431706573e-05,
      "loss": 0.4059,
      "step": 10303
    },
    {
      "epoch": 2.1182033096926713,
      "grad_norm": 0.22411483526229858,
      "learning_rate": 1.888856411973595e-05,
      "loss": 0.3933,
      "step": 10304
    },
    {
      "epoch": 2.11840888066605,
      "grad_norm": 0.22474461793899536,
      "learning_rate": 1.8880430094153738e-05,
      "loss": 0.4027,
      "step": 10305
    },
    {
      "epoch": 2.1186144516394285,
      "grad_norm": 0.22956325113773346,
      "learning_rate": 1.8872297355360653e-05,
      "loss": 0.397,
      "step": 10306
    },
    {
      "epoch": 2.118820022612807,
      "grad_norm": 0.23198306560516357,
      "learning_rate": 1.886416590375736e-05,
      "loss": 0.41,
      "step": 10307
    },
    {
      "epoch": 2.1190255935861857,
      "grad_norm": 0.22490225732326508,
      "learning_rate": 1.8856035739744447e-05,
      "loss": 0.396,
      "step": 10308
    },
    {
      "epoch": 2.1192311645595643,
      "grad_norm": 0.23693934082984924,
      "learning_rate": 1.8847906863722467e-05,
      "loss": 0.4054,
      "step": 10309
    },
    {
      "epoch": 2.119436735532943,
      "grad_norm": 0.22398188710212708,
      "learning_rate": 1.8839779276091875e-05,
      "loss": 0.399,
      "step": 10310
    },
    {
      "epoch": 2.1196423065063215,
      "grad_norm": 0.23093253374099731,
      "learning_rate": 1.883165297725307e-05,
      "loss": 0.4094,
      "step": 10311
    },
    {
      "epoch": 2.1198478774797,
      "grad_norm": 0.22496986389160156,
      "learning_rate": 1.8823527967606428e-05,
      "loss": 0.3819,
      "step": 10312
    },
    {
      "epoch": 2.1200534484530786,
      "grad_norm": 0.22796480357646942,
      "learning_rate": 1.8815404247552213e-05,
      "loss": 0.3996,
      "step": 10313
    },
    {
      "epoch": 2.120259019426457,
      "grad_norm": 0.22607813775539398,
      "learning_rate": 1.8807281817490647e-05,
      "loss": 0.3882,
      "step": 10314
    },
    {
      "epoch": 2.1204645903998354,
      "grad_norm": 0.2205992192029953,
      "learning_rate": 1.8799160677821882e-05,
      "loss": 0.3846,
      "step": 10315
    },
    {
      "epoch": 2.120670161373214,
      "grad_norm": 0.12466558814048767,
      "learning_rate": 1.879104082894601e-05,
      "loss": 0.4445,
      "step": 10316
    },
    {
      "epoch": 2.1208757323465925,
      "grad_norm": 0.12291921675205231,
      "learning_rate": 1.8782922271263033e-05,
      "loss": 0.4429,
      "step": 10317
    },
    {
      "epoch": 2.121081303319971,
      "grad_norm": 0.22178338468074799,
      "learning_rate": 1.8774805005172958e-05,
      "loss": 0.3842,
      "step": 10318
    },
    {
      "epoch": 2.1212868742933497,
      "grad_norm": 0.22737297415733337,
      "learning_rate": 1.8766689031075644e-05,
      "loss": 0.3988,
      "step": 10319
    },
    {
      "epoch": 2.1214924452667283,
      "grad_norm": 0.12307467311620712,
      "learning_rate": 1.875857434937097e-05,
      "loss": 0.4426,
      "step": 10320
    },
    {
      "epoch": 2.121698016240107,
      "grad_norm": 0.21922807395458221,
      "learning_rate": 1.8750460960458682e-05,
      "loss": 0.4063,
      "step": 10321
    },
    {
      "epoch": 2.1219035872134855,
      "grad_norm": 0.12798526883125305,
      "learning_rate": 1.8742348864738494e-05,
      "loss": 0.4517,
      "step": 10322
    },
    {
      "epoch": 2.122109158186864,
      "grad_norm": 0.12603412568569183,
      "learning_rate": 1.8734238062610044e-05,
      "loss": 0.4614,
      "step": 10323
    },
    {
      "epoch": 2.1223147291602427,
      "grad_norm": 0.22325001657009125,
      "learning_rate": 1.8726128554472924e-05,
      "loss": 0.3954,
      "step": 10324
    },
    {
      "epoch": 2.1225203001336213,
      "grad_norm": 0.2292872816324234,
      "learning_rate": 1.8718020340726634e-05,
      "loss": 0.3985,
      "step": 10325
    },
    {
      "epoch": 2.122725871107,
      "grad_norm": 0.23180240392684937,
      "learning_rate": 1.8709913421770648e-05,
      "loss": 0.4131,
      "step": 10326
    },
    {
      "epoch": 2.1229314420803784,
      "grad_norm": 0.12431956827640533,
      "learning_rate": 1.870180779800435e-05,
      "loss": 0.4345,
      "step": 10327
    },
    {
      "epoch": 2.123137013053757,
      "grad_norm": 0.13498254120349884,
      "learning_rate": 1.8693703469827067e-05,
      "loss": 0.4681,
      "step": 10328
    },
    {
      "epoch": 2.123342584027135,
      "grad_norm": 0.12030383944511414,
      "learning_rate": 1.8685600437638057e-05,
      "loss": 0.4469,
      "step": 10329
    },
    {
      "epoch": 2.1235481550005137,
      "grad_norm": 0.22829271852970123,
      "learning_rate": 1.867749870183652e-05,
      "loss": 0.3874,
      "step": 10330
    },
    {
      "epoch": 2.1237537259738923,
      "grad_norm": 0.21957705914974213,
      "learning_rate": 1.8669398262821593e-05,
      "loss": 0.3904,
      "step": 10331
    },
    {
      "epoch": 2.123959296947271,
      "grad_norm": 0.22618070244789124,
      "learning_rate": 1.8661299120992332e-05,
      "loss": 0.4029,
      "step": 10332
    },
    {
      "epoch": 2.1241648679206495,
      "grad_norm": 0.2359391301870346,
      "learning_rate": 1.8653201276747767e-05,
      "loss": 0.4119,
      "step": 10333
    },
    {
      "epoch": 2.124370438894028,
      "grad_norm": 0.21867458522319794,
      "learning_rate": 1.8645104730486828e-05,
      "loss": 0.3953,
      "step": 10334
    },
    {
      "epoch": 2.1245760098674067,
      "grad_norm": 0.22511562705039978,
      "learning_rate": 1.86370094826084e-05,
      "loss": 0.3824,
      "step": 10335
    },
    {
      "epoch": 2.1247815808407853,
      "grad_norm": 0.12738649547100067,
      "learning_rate": 1.8628915533511296e-05,
      "loss": 0.4281,
      "step": 10336
    },
    {
      "epoch": 2.124987151814164,
      "grad_norm": 0.22026711702346802,
      "learning_rate": 1.8620822883594267e-05,
      "loss": 0.3925,
      "step": 10337
    },
    {
      "epoch": 2.1251927227875425,
      "grad_norm": 0.22602379322052002,
      "learning_rate": 1.8612731533255976e-05,
      "loss": 0.3959,
      "step": 10338
    },
    {
      "epoch": 2.125398293760921,
      "grad_norm": 0.22942064702510834,
      "learning_rate": 1.860464148289509e-05,
      "loss": 0.4084,
      "step": 10339
    },
    {
      "epoch": 2.1256038647342996,
      "grad_norm": 0.22742587327957153,
      "learning_rate": 1.8596552732910148e-05,
      "loss": 0.4137,
      "step": 10340
    },
    {
      "epoch": 2.125809435707678,
      "grad_norm": 0.12401507049798965,
      "learning_rate": 1.8588465283699622e-05,
      "loss": 0.434,
      "step": 10341
    },
    {
      "epoch": 2.126015006681057,
      "grad_norm": 0.21955260634422302,
      "learning_rate": 1.858037913566198e-05,
      "loss": 0.4068,
      "step": 10342
    },
    {
      "epoch": 2.1262205776544354,
      "grad_norm": 0.1239282488822937,
      "learning_rate": 1.8572294289195576e-05,
      "loss": 0.4364,
      "step": 10343
    },
    {
      "epoch": 2.1264261486278135,
      "grad_norm": 0.2231699824333191,
      "learning_rate": 1.8564210744698707e-05,
      "loss": 0.3928,
      "step": 10344
    },
    {
      "epoch": 2.126631719601192,
      "grad_norm": 0.12479789555072784,
      "learning_rate": 1.8556128502569618e-05,
      "loss": 0.4482,
      "step": 10345
    },
    {
      "epoch": 2.1268372905745707,
      "grad_norm": 0.2382933497428894,
      "learning_rate": 1.8548047563206465e-05,
      "loss": 0.4012,
      "step": 10346
    },
    {
      "epoch": 2.1270428615479493,
      "grad_norm": 0.23470161855220795,
      "learning_rate": 1.853996792700738e-05,
      "loss": 0.3967,
      "step": 10347
    },
    {
      "epoch": 2.127248432521328,
      "grad_norm": 0.22285513579845428,
      "learning_rate": 1.8531889594370406e-05,
      "loss": 0.4076,
      "step": 10348
    },
    {
      "epoch": 2.1274540034947065,
      "grad_norm": 0.23410557210445404,
      "learning_rate": 1.8523812565693522e-05,
      "loss": 0.4086,
      "step": 10349
    },
    {
      "epoch": 2.127659574468085,
      "grad_norm": 0.2240322232246399,
      "learning_rate": 1.8515736841374643e-05,
      "loss": 0.4091,
      "step": 10350
    },
    {
      "epoch": 2.1278651454414637,
      "grad_norm": 0.22299052774906158,
      "learning_rate": 1.8507662421811618e-05,
      "loss": 0.3762,
      "step": 10351
    },
    {
      "epoch": 2.1280707164148422,
      "grad_norm": 0.23107583820819855,
      "learning_rate": 1.8499589307402244e-05,
      "loss": 0.3983,
      "step": 10352
    },
    {
      "epoch": 2.128276287388221,
      "grad_norm": 0.22548127174377441,
      "learning_rate": 1.8491517498544227e-05,
      "loss": 0.4028,
      "step": 10353
    },
    {
      "epoch": 2.1284818583615994,
      "grad_norm": 0.2753831744194031,
      "learning_rate": 1.848344699563526e-05,
      "loss": 0.423,
      "step": 10354
    },
    {
      "epoch": 2.128687429334978,
      "grad_norm": 0.22851170599460602,
      "learning_rate": 1.847537779907292e-05,
      "loss": 0.3987,
      "step": 10355
    },
    {
      "epoch": 2.1288930003083566,
      "grad_norm": 0.2307175248861313,
      "learning_rate": 1.8467309909254737e-05,
      "loss": 0.4081,
      "step": 10356
    },
    {
      "epoch": 2.129098571281735,
      "grad_norm": 0.2297874242067337,
      "learning_rate": 1.8459243326578183e-05,
      "loss": 0.406,
      "step": 10357
    },
    {
      "epoch": 2.1293041422551138,
      "grad_norm": 0.12997567653656006,
      "learning_rate": 1.845117805144066e-05,
      "loss": 0.436,
      "step": 10358
    },
    {
      "epoch": 2.129509713228492,
      "grad_norm": 0.133535698056221,
      "learning_rate": 1.844311408423949e-05,
      "loss": 0.4471,
      "step": 10359
    },
    {
      "epoch": 2.1297152842018705,
      "grad_norm": 0.1276518702507019,
      "learning_rate": 1.843505142537198e-05,
      "loss": 0.4424,
      "step": 10360
    },
    {
      "epoch": 2.129920855175249,
      "grad_norm": 0.12228768318891525,
      "learning_rate": 1.842699007523532e-05,
      "loss": 0.4467,
      "step": 10361
    },
    {
      "epoch": 2.1301264261486277,
      "grad_norm": 0.2285146862268448,
      "learning_rate": 1.841893003422664e-05,
      "loss": 0.4019,
      "step": 10362
    },
    {
      "epoch": 2.1303319971220063,
      "grad_norm": 0.22430342435836792,
      "learning_rate": 1.8410871302743054e-05,
      "loss": 0.4207,
      "step": 10363
    },
    {
      "epoch": 2.130537568095385,
      "grad_norm": 0.214961439371109,
      "learning_rate": 1.8402813881181563e-05,
      "loss": 0.3986,
      "step": 10364
    },
    {
      "epoch": 2.1307431390687634,
      "grad_norm": 0.23033976554870605,
      "learning_rate": 1.8394757769939117e-05,
      "loss": 0.3853,
      "step": 10365
    },
    {
      "epoch": 2.130948710042142,
      "grad_norm": 0.12601739168167114,
      "learning_rate": 1.8386702969412583e-05,
      "loss": 0.438,
      "step": 10366
    },
    {
      "epoch": 2.1311542810155206,
      "grad_norm": 0.23412902653217316,
      "learning_rate": 1.8378649479998827e-05,
      "loss": 0.3996,
      "step": 10367
    },
    {
      "epoch": 2.131359851988899,
      "grad_norm": 0.2274925261735916,
      "learning_rate": 1.8370597302094577e-05,
      "loss": 0.388,
      "step": 10368
    },
    {
      "epoch": 2.131565422962278,
      "grad_norm": 0.1298505961894989,
      "learning_rate": 1.8362546436096537e-05,
      "loss": 0.4471,
      "step": 10369
    },
    {
      "epoch": 2.1317709939356564,
      "grad_norm": 0.23555243015289307,
      "learning_rate": 1.8354496882401327e-05,
      "loss": 0.3892,
      "step": 10370
    },
    {
      "epoch": 2.131976564909035,
      "grad_norm": 0.2312725931406021,
      "learning_rate": 1.8346448641405517e-05,
      "loss": 0.386,
      "step": 10371
    },
    {
      "epoch": 2.1321821358824136,
      "grad_norm": 0.22951969504356384,
      "learning_rate": 1.8338401713505603e-05,
      "loss": 0.407,
      "step": 10372
    },
    {
      "epoch": 2.132387706855792,
      "grad_norm": 0.22569020092487335,
      "learning_rate": 1.8330356099098006e-05,
      "loss": 0.3961,
      "step": 10373
    },
    {
      "epoch": 2.1325932778291703,
      "grad_norm": 0.2186949998140335,
      "learning_rate": 1.8322311798579125e-05,
      "loss": 0.3827,
      "step": 10374
    },
    {
      "epoch": 2.132798848802549,
      "grad_norm": 0.22884011268615723,
      "learning_rate": 1.8314268812345248e-05,
      "loss": 0.3973,
      "step": 10375
    },
    {
      "epoch": 2.1330044197759275,
      "grad_norm": 0.1293371617794037,
      "learning_rate": 1.8306227140792622e-05,
      "loss": 0.4564,
      "step": 10376
    },
    {
      "epoch": 2.133209990749306,
      "grad_norm": 0.22477327287197113,
      "learning_rate": 1.829818678431742e-05,
      "loss": 0.3865,
      "step": 10377
    },
    {
      "epoch": 2.1334155617226847,
      "grad_norm": 0.22367890179157257,
      "learning_rate": 1.8290147743315746e-05,
      "loss": 0.3733,
      "step": 10378
    },
    {
      "epoch": 2.1336211326960632,
      "grad_norm": 0.23502875864505768,
      "learning_rate": 1.8282110018183656e-05,
      "loss": 0.4037,
      "step": 10379
    },
    {
      "epoch": 2.133826703669442,
      "grad_norm": 0.12768757343292236,
      "learning_rate": 1.8274073609317106e-05,
      "loss": 0.4562,
      "step": 10380
    },
    {
      "epoch": 2.1340322746428204,
      "grad_norm": 0.23585356771945953,
      "learning_rate": 1.826603851711205e-05,
      "loss": 0.3938,
      "step": 10381
    },
    {
      "epoch": 2.134237845616199,
      "grad_norm": 0.23149564862251282,
      "learning_rate": 1.825800474196432e-05,
      "loss": 0.3848,
      "step": 10382
    },
    {
      "epoch": 2.1344434165895776,
      "grad_norm": 0.23342165350914001,
      "learning_rate": 1.824997228426969e-05,
      "loss": 0.4179,
      "step": 10383
    },
    {
      "epoch": 2.134648987562956,
      "grad_norm": 0.2237035036087036,
      "learning_rate": 1.8241941144423916e-05,
      "loss": 0.4023,
      "step": 10384
    },
    {
      "epoch": 2.1348545585363348,
      "grad_norm": 0.2252335101366043,
      "learning_rate": 1.8233911322822632e-05,
      "loss": 0.3956,
      "step": 10385
    },
    {
      "epoch": 2.1350601295097134,
      "grad_norm": 0.2148154377937317,
      "learning_rate": 1.822588281986143e-05,
      "loss": 0.3835,
      "step": 10386
    },
    {
      "epoch": 2.135265700483092,
      "grad_norm": 0.11948797851800919,
      "learning_rate": 1.8217855635935827e-05,
      "loss": 0.4476,
      "step": 10387
    },
    {
      "epoch": 2.1354712714564705,
      "grad_norm": 0.22916093468666077,
      "learning_rate": 1.8209829771441314e-05,
      "loss": 0.3903,
      "step": 10388
    },
    {
      "epoch": 2.1356768424298487,
      "grad_norm": 0.21855413913726807,
      "learning_rate": 1.820180522677327e-05,
      "loss": 0.3972,
      "step": 10389
    },
    {
      "epoch": 2.1358824134032273,
      "grad_norm": 0.13248126208782196,
      "learning_rate": 1.819378200232703e-05,
      "loss": 0.4453,
      "step": 10390
    },
    {
      "epoch": 2.136087984376606,
      "grad_norm": 0.22880522906780243,
      "learning_rate": 1.818576009849786e-05,
      "loss": 0.3987,
      "step": 10391
    },
    {
      "epoch": 2.1362935553499844,
      "grad_norm": 0.24837420880794525,
      "learning_rate": 1.8177739515680953e-05,
      "loss": 0.3857,
      "step": 10392
    },
    {
      "epoch": 2.136499126323363,
      "grad_norm": 0.23082508146762848,
      "learning_rate": 1.816972025427146e-05,
      "loss": 0.421,
      "step": 10393
    },
    {
      "epoch": 2.1367046972967416,
      "grad_norm": 0.1307905912399292,
      "learning_rate": 1.8161702314664423e-05,
      "loss": 0.4584,
      "step": 10394
    },
    {
      "epoch": 2.13691026827012,
      "grad_norm": 0.23677071928977966,
      "learning_rate": 1.815368569725489e-05,
      "loss": 0.4082,
      "step": 10395
    },
    {
      "epoch": 2.137115839243499,
      "grad_norm": 0.1245460957288742,
      "learning_rate": 1.8145670402437787e-05,
      "loss": 0.4332,
      "step": 10396
    },
    {
      "epoch": 2.1373214102168774,
      "grad_norm": 0.2281726449728012,
      "learning_rate": 1.8137656430607986e-05,
      "loss": 0.3907,
      "step": 10397
    },
    {
      "epoch": 2.137526981190256,
      "grad_norm": 0.11899819225072861,
      "learning_rate": 1.8129643782160294e-05,
      "loss": 0.449,
      "step": 10398
    },
    {
      "epoch": 2.1377325521636346,
      "grad_norm": 0.23056533932685852,
      "learning_rate": 1.8121632457489465e-05,
      "loss": 0.4015,
      "step": 10399
    },
    {
      "epoch": 2.137938123137013,
      "grad_norm": 0.2260628640651703,
      "learning_rate": 1.8113622456990175e-05,
      "loss": 0.3938,
      "step": 10400
    },
    {
      "epoch": 2.1381436941103917,
      "grad_norm": 0.22494405508041382,
      "learning_rate": 1.810561378105702e-05,
      "loss": 0.3994,
      "step": 10401
    },
    {
      "epoch": 2.1383492650837703,
      "grad_norm": 0.22264499962329865,
      "learning_rate": 1.809760643008459e-05,
      "loss": 0.3904,
      "step": 10402
    },
    {
      "epoch": 2.138554836057149,
      "grad_norm": 0.2253665328025818,
      "learning_rate": 1.808960040446735e-05,
      "loss": 0.3998,
      "step": 10403
    },
    {
      "epoch": 2.138760407030527,
      "grad_norm": 0.12751929461956024,
      "learning_rate": 1.8081595704599718e-05,
      "loss": 0.4584,
      "step": 10404
    },
    {
      "epoch": 2.1389659780039056,
      "grad_norm": 0.1251654028892517,
      "learning_rate": 1.8073592330876034e-05,
      "loss": 0.4494,
      "step": 10405
    },
    {
      "epoch": 2.1391715489772842,
      "grad_norm": 0.12770125269889832,
      "learning_rate": 1.8065590283690614e-05,
      "loss": 0.436,
      "step": 10406
    },
    {
      "epoch": 2.139377119950663,
      "grad_norm": 0.22460491955280304,
      "learning_rate": 1.8057589563437675e-05,
      "loss": 0.3837,
      "step": 10407
    },
    {
      "epoch": 2.1395826909240414,
      "grad_norm": 0.2189689576625824,
      "learning_rate": 1.8049590170511354e-05,
      "loss": 0.4027,
      "step": 10408
    },
    {
      "epoch": 2.13978826189742,
      "grad_norm": 0.22947020828723907,
      "learning_rate": 1.804159210530577e-05,
      "loss": 0.3883,
      "step": 10409
    },
    {
      "epoch": 2.1399938328707986,
      "grad_norm": 0.22392447292804718,
      "learning_rate": 1.8033595368214945e-05,
      "loss": 0.3933,
      "step": 10410
    },
    {
      "epoch": 2.140199403844177,
      "grad_norm": 0.23469264805316925,
      "learning_rate": 1.8025599959632835e-05,
      "loss": 0.4153,
      "step": 10411
    },
    {
      "epoch": 2.1404049748175558,
      "grad_norm": 0.2271226942539215,
      "learning_rate": 1.8017605879953335e-05,
      "loss": 0.396,
      "step": 10412
    },
    {
      "epoch": 2.1406105457909343,
      "grad_norm": 0.2269534021615982,
      "learning_rate": 1.8009613129570278e-05,
      "loss": 0.401,
      "step": 10413
    },
    {
      "epoch": 2.140816116764313,
      "grad_norm": 0.22716417908668518,
      "learning_rate": 1.800162170887743e-05,
      "loss": 0.3846,
      "step": 10414
    },
    {
      "epoch": 2.1410216877376915,
      "grad_norm": 0.13274461030960083,
      "learning_rate": 1.7993631618268472e-05,
      "loss": 0.448,
      "step": 10415
    },
    {
      "epoch": 2.14122725871107,
      "grad_norm": 0.22133229672908783,
      "learning_rate": 1.7985642858137076e-05,
      "loss": 0.3983,
      "step": 10416
    },
    {
      "epoch": 2.1414328296844487,
      "grad_norm": 0.21587035059928894,
      "learning_rate": 1.797765542887679e-05,
      "loss": 0.3917,
      "step": 10417
    },
    {
      "epoch": 2.1416384006578273,
      "grad_norm": 0.2158806473016739,
      "learning_rate": 1.796966933088112e-05,
      "loss": 0.3887,
      "step": 10418
    },
    {
      "epoch": 2.141843971631206,
      "grad_norm": 0.23333343863487244,
      "learning_rate": 1.7961684564543503e-05,
      "loss": 0.393,
      "step": 10419
    },
    {
      "epoch": 2.1420495426045845,
      "grad_norm": 0.21826335787773132,
      "learning_rate": 1.7953701130257313e-05,
      "loss": 0.3817,
      "step": 10420
    },
    {
      "epoch": 2.1422551135779626,
      "grad_norm": 0.12297184020280838,
      "learning_rate": 1.794571902841585e-05,
      "loss": 0.4548,
      "step": 10421
    },
    {
      "epoch": 2.142460684551341,
      "grad_norm": 0.12231001257896423,
      "learning_rate": 1.793773825941234e-05,
      "loss": 0.4505,
      "step": 10422
    },
    {
      "epoch": 2.14266625552472,
      "grad_norm": 0.2218412458896637,
      "learning_rate": 1.792975882364e-05,
      "loss": 0.3939,
      "step": 10423
    },
    {
      "epoch": 2.1428718264980984,
      "grad_norm": 0.1286546289920807,
      "learning_rate": 1.7921780721491914e-05,
      "loss": 0.4586,
      "step": 10424
    },
    {
      "epoch": 2.143077397471477,
      "grad_norm": 0.22066746652126312,
      "learning_rate": 1.7913803953361125e-05,
      "loss": 0.3819,
      "step": 10425
    },
    {
      "epoch": 2.1432829684448556,
      "grad_norm": 0.22369948029518127,
      "learning_rate": 1.7905828519640602e-05,
      "loss": 0.4186,
      "step": 10426
    },
    {
      "epoch": 2.143488539418234,
      "grad_norm": 0.12636181712150574,
      "learning_rate": 1.789785442072329e-05,
      "loss": 0.4643,
      "step": 10427
    },
    {
      "epoch": 2.1436941103916127,
      "grad_norm": 0.22555802762508392,
      "learning_rate": 1.788988165700201e-05,
      "loss": 0.3877,
      "step": 10428
    },
    {
      "epoch": 2.1438996813649913,
      "grad_norm": 0.2376098334789276,
      "learning_rate": 1.7881910228869535e-05,
      "loss": 0.3993,
      "step": 10429
    },
    {
      "epoch": 2.14410525233837,
      "grad_norm": 0.2282724678516388,
      "learning_rate": 1.787394013671861e-05,
      "loss": 0.3815,
      "step": 10430
    },
    {
      "epoch": 2.1443108233117485,
      "grad_norm": 0.22976957261562347,
      "learning_rate": 1.7865971380941866e-05,
      "loss": 0.3869,
      "step": 10431
    },
    {
      "epoch": 2.144516394285127,
      "grad_norm": 0.2277589738368988,
      "learning_rate": 1.7858003961931885e-05,
      "loss": 0.3927,
      "step": 10432
    },
    {
      "epoch": 2.1447219652585057,
      "grad_norm": 0.21987488865852356,
      "learning_rate": 1.785003788008119e-05,
      "loss": 0.3971,
      "step": 10433
    },
    {
      "epoch": 2.1449275362318843,
      "grad_norm": 0.22373713552951813,
      "learning_rate": 1.784207313578223e-05,
      "loss": 0.4124,
      "step": 10434
    },
    {
      "epoch": 2.145133107205263,
      "grad_norm": 0.22595758736133575,
      "learning_rate": 1.7834109729427376e-05,
      "loss": 0.4053,
      "step": 10435
    },
    {
      "epoch": 2.145338678178641,
      "grad_norm": 0.22213847935199738,
      "learning_rate": 1.782614766140898e-05,
      "loss": 0.3875,
      "step": 10436
    },
    {
      "epoch": 2.1455442491520196,
      "grad_norm": 0.127987802028656,
      "learning_rate": 1.7818186932119277e-05,
      "loss": 0.4445,
      "step": 10437
    },
    {
      "epoch": 2.145749820125398,
      "grad_norm": 0.22547675669193268,
      "learning_rate": 1.781022754195045e-05,
      "loss": 0.3897,
      "step": 10438
    },
    {
      "epoch": 2.1459553910987768,
      "grad_norm": 0.23386697471141815,
      "learning_rate": 1.780226949129464e-05,
      "loss": 0.3906,
      "step": 10439
    },
    {
      "epoch": 2.1461609620721553,
      "grad_norm": 0.22901882231235504,
      "learning_rate": 1.7794312780543883e-05,
      "loss": 0.3978,
      "step": 10440
    },
    {
      "epoch": 2.146366533045534,
      "grad_norm": 0.22975675761699677,
      "learning_rate": 1.7786357410090173e-05,
      "loss": 0.3855,
      "step": 10441
    },
    {
      "epoch": 2.1465721040189125,
      "grad_norm": 0.22928237915039062,
      "learning_rate": 1.7778403380325427e-05,
      "loss": 0.3919,
      "step": 10442
    },
    {
      "epoch": 2.146777674992291,
      "grad_norm": 0.22319789230823517,
      "learning_rate": 1.7770450691641526e-05,
      "loss": 0.3921,
      "step": 10443
    },
    {
      "epoch": 2.1469832459656697,
      "grad_norm": 0.23228733241558075,
      "learning_rate": 1.7762499344430253e-05,
      "loss": 0.395,
      "step": 10444
    },
    {
      "epoch": 2.1471888169390483,
      "grad_norm": 0.22841905057430267,
      "learning_rate": 1.7754549339083323e-05,
      "loss": 0.4022,
      "step": 10445
    },
    {
      "epoch": 2.147394387912427,
      "grad_norm": 0.1279844492673874,
      "learning_rate": 1.7746600675992408e-05,
      "loss": 0.4415,
      "step": 10446
    },
    {
      "epoch": 2.1475999588858055,
      "grad_norm": 0.2246563881635666,
      "learning_rate": 1.7738653355549078e-05,
      "loss": 0.3858,
      "step": 10447
    },
    {
      "epoch": 2.147805529859184,
      "grad_norm": 0.225599467754364,
      "learning_rate": 1.773070737814489e-05,
      "loss": 0.4025,
      "step": 10448
    },
    {
      "epoch": 2.1480111008325626,
      "grad_norm": 0.2247907519340515,
      "learning_rate": 1.7722762744171298e-05,
      "loss": 0.4245,
      "step": 10449
    },
    {
      "epoch": 2.1482166718059412,
      "grad_norm": 0.23618023097515106,
      "learning_rate": 1.7714819454019672e-05,
      "loss": 0.4155,
      "step": 10450
    },
    {
      "epoch": 2.1484222427793194,
      "grad_norm": 0.12265011668205261,
      "learning_rate": 1.770687750808138e-05,
      "loss": 0.4512,
      "step": 10451
    },
    {
      "epoch": 2.148627813752698,
      "grad_norm": 0.23683376610279083,
      "learning_rate": 1.7698936906747665e-05,
      "loss": 0.4045,
      "step": 10452
    },
    {
      "epoch": 2.1488333847260765,
      "grad_norm": 0.2286202311515808,
      "learning_rate": 1.7690997650409725e-05,
      "loss": 0.401,
      "step": 10453
    },
    {
      "epoch": 2.149038955699455,
      "grad_norm": 0.21446064114570618,
      "learning_rate": 1.7683059739458683e-05,
      "loss": 0.3898,
      "step": 10454
    },
    {
      "epoch": 2.1492445266728337,
      "grad_norm": 0.12255129218101501,
      "learning_rate": 1.7675123174285614e-05,
      "loss": 0.46,
      "step": 10455
    },
    {
      "epoch": 2.1494500976462123,
      "grad_norm": 0.22888119518756866,
      "learning_rate": 1.766718795528149e-05,
      "loss": 0.3708,
      "step": 10456
    },
    {
      "epoch": 2.149655668619591,
      "grad_norm": 0.2274254858493805,
      "learning_rate": 1.7659254082837288e-05,
      "loss": 0.3951,
      "step": 10457
    },
    {
      "epoch": 2.1498612395929695,
      "grad_norm": 0.12422723323106766,
      "learning_rate": 1.7651321557343836e-05,
      "loss": 0.4547,
      "step": 10458
    },
    {
      "epoch": 2.150066810566348,
      "grad_norm": 0.23370634019374847,
      "learning_rate": 1.7643390379191948e-05,
      "loss": 0.3956,
      "step": 10459
    },
    {
      "epoch": 2.1502723815397267,
      "grad_norm": 0.2372375875711441,
      "learning_rate": 1.7635460548772353e-05,
      "loss": 0.4031,
      "step": 10460
    },
    {
      "epoch": 2.1504779525131053,
      "grad_norm": 0.23817671835422516,
      "learning_rate": 1.762753206647571e-05,
      "loss": 0.3945,
      "step": 10461
    },
    {
      "epoch": 2.150683523486484,
      "grad_norm": 0.23152542114257812,
      "learning_rate": 1.7619604932692628e-05,
      "loss": 0.3837,
      "step": 10462
    },
    {
      "epoch": 2.1508890944598624,
      "grad_norm": 0.21996726095676422,
      "learning_rate": 1.7611679147813618e-05,
      "loss": 0.3971,
      "step": 10463
    },
    {
      "epoch": 2.151094665433241,
      "grad_norm": 0.22144795954227448,
      "learning_rate": 1.760375471222918e-05,
      "loss": 0.3999,
      "step": 10464
    },
    {
      "epoch": 2.1513002364066196,
      "grad_norm": 0.23396509885787964,
      "learning_rate": 1.7595831626329697e-05,
      "loss": 0.3977,
      "step": 10465
    },
    {
      "epoch": 2.1515058073799977,
      "grad_norm": 0.2290705144405365,
      "learning_rate": 1.7587909890505503e-05,
      "loss": 0.3953,
      "step": 10466
    },
    {
      "epoch": 2.1517113783533763,
      "grad_norm": 0.22540703415870667,
      "learning_rate": 1.7579989505146866e-05,
      "loss": 0.3971,
      "step": 10467
    },
    {
      "epoch": 2.151916949326755,
      "grad_norm": 0.12446384131908417,
      "learning_rate": 1.7572070470643973e-05,
      "loss": 0.4507,
      "step": 10468
    },
    {
      "epoch": 2.1521225203001335,
      "grad_norm": 0.12616395950317383,
      "learning_rate": 1.7564152787386977e-05,
      "loss": 0.44,
      "step": 10469
    },
    {
      "epoch": 2.152328091273512,
      "grad_norm": 0.23691080510616302,
      "learning_rate": 1.7556236455765943e-05,
      "loss": 0.3804,
      "step": 10470
    },
    {
      "epoch": 2.1525336622468907,
      "grad_norm": 0.2261635661125183,
      "learning_rate": 1.7548321476170854e-05,
      "loss": 0.3727,
      "step": 10471
    },
    {
      "epoch": 2.1527392332202693,
      "grad_norm": 0.22439588606357574,
      "learning_rate": 1.7540407848991672e-05,
      "loss": 0.3903,
      "step": 10472
    },
    {
      "epoch": 2.152944804193648,
      "grad_norm": 0.13026651740074158,
      "learning_rate": 1.7532495574618246e-05,
      "loss": 0.4672,
      "step": 10473
    },
    {
      "epoch": 2.1531503751670265,
      "grad_norm": 0.21984946727752686,
      "learning_rate": 1.7524584653440377e-05,
      "loss": 0.4064,
      "step": 10474
    },
    {
      "epoch": 2.153355946140405,
      "grad_norm": 0.22405663132667542,
      "learning_rate": 1.7516675085847812e-05,
      "loss": 0.4067,
      "step": 10475
    },
    {
      "epoch": 2.1535615171137836,
      "grad_norm": 0.22605964541435242,
      "learning_rate": 1.75087668722302e-05,
      "loss": 0.4045,
      "step": 10476
    },
    {
      "epoch": 2.153767088087162,
      "grad_norm": 0.1273018717765808,
      "learning_rate": 1.7500860012977142e-05,
      "loss": 0.4456,
      "step": 10477
    },
    {
      "epoch": 2.153972659060541,
      "grad_norm": 0.23210304975509644,
      "learning_rate": 1.7492954508478192e-05,
      "loss": 0.4067,
      "step": 10478
    },
    {
      "epoch": 2.1541782300339194,
      "grad_norm": 0.2308957576751709,
      "learning_rate": 1.7485050359122806e-05,
      "loss": 0.4144,
      "step": 10479
    },
    {
      "epoch": 2.154383801007298,
      "grad_norm": 0.2237699329853058,
      "learning_rate": 1.7477147565300388e-05,
      "loss": 0.3946,
      "step": 10480
    },
    {
      "epoch": 2.154589371980676,
      "grad_norm": 0.12873926758766174,
      "learning_rate": 1.7469246127400262e-05,
      "loss": 0.4475,
      "step": 10481
    },
    {
      "epoch": 2.1547949429540547,
      "grad_norm": 0.24316054582595825,
      "learning_rate": 1.7461346045811703e-05,
      "loss": 0.4043,
      "step": 10482
    },
    {
      "epoch": 2.1550005139274333,
      "grad_norm": 0.21882621943950653,
      "learning_rate": 1.7453447320923914e-05,
      "loss": 0.4072,
      "step": 10483
    },
    {
      "epoch": 2.155206084900812,
      "grad_norm": 0.2260080724954605,
      "learning_rate": 1.7445549953126e-05,
      "loss": 0.3984,
      "step": 10484
    },
    {
      "epoch": 2.1554116558741905,
      "grad_norm": 0.22015734016895294,
      "learning_rate": 1.743765394280707e-05,
      "loss": 0.3975,
      "step": 10485
    },
    {
      "epoch": 2.155617226847569,
      "grad_norm": 0.22426630556583405,
      "learning_rate": 1.7429759290356103e-05,
      "loss": 0.3925,
      "step": 10486
    },
    {
      "epoch": 2.1558227978209477,
      "grad_norm": 0.23523494601249695,
      "learning_rate": 1.7421865996162033e-05,
      "loss": 0.4133,
      "step": 10487
    },
    {
      "epoch": 2.1560283687943262,
      "grad_norm": 0.22726291418075562,
      "learning_rate": 1.7413974060613727e-05,
      "loss": 0.3988,
      "step": 10488
    },
    {
      "epoch": 2.156233939767705,
      "grad_norm": 0.2152286171913147,
      "learning_rate": 1.740608348409998e-05,
      "loss": 0.3935,
      "step": 10489
    },
    {
      "epoch": 2.1564395107410834,
      "grad_norm": 0.22603079676628113,
      "learning_rate": 1.7398194267009514e-05,
      "loss": 0.3965,
      "step": 10490
    },
    {
      "epoch": 2.156645081714462,
      "grad_norm": 0.1339533030986786,
      "learning_rate": 1.739030640973102e-05,
      "loss": 0.435,
      "step": 10491
    },
    {
      "epoch": 2.1568506526878406,
      "grad_norm": 0.23634931445121765,
      "learning_rate": 1.7382419912653064e-05,
      "loss": 0.4006,
      "step": 10492
    },
    {
      "epoch": 2.157056223661219,
      "grad_norm": 0.23838773369789124,
      "learning_rate": 1.7374534776164215e-05,
      "loss": 0.4042,
      "step": 10493
    },
    {
      "epoch": 2.1572617946345978,
      "grad_norm": 0.23160769045352936,
      "learning_rate": 1.736665100065291e-05,
      "loss": 0.3908,
      "step": 10494
    },
    {
      "epoch": 2.1574673656079764,
      "grad_norm": 0.12931808829307556,
      "learning_rate": 1.7358768586507557e-05,
      "loss": 0.4381,
      "step": 10495
    },
    {
      "epoch": 2.1576729365813545,
      "grad_norm": 0.2354772686958313,
      "learning_rate": 1.735088753411648e-05,
      "loss": 0.4097,
      "step": 10496
    },
    {
      "epoch": 2.157878507554733,
      "grad_norm": 0.22520937025547028,
      "learning_rate": 1.734300784386794e-05,
      "loss": 0.4014,
      "step": 10497
    },
    {
      "epoch": 2.1580840785281117,
      "grad_norm": 0.22981365025043488,
      "learning_rate": 1.7335129516150123e-05,
      "loss": 0.3952,
      "step": 10498
    },
    {
      "epoch": 2.1582896495014903,
      "grad_norm": 0.2230282872915268,
      "learning_rate": 1.7327252551351182e-05,
      "loss": 0.405,
      "step": 10499
    },
    {
      "epoch": 2.158495220474869,
      "grad_norm": 0.2350645661354065,
      "learning_rate": 1.731937694985917e-05,
      "loss": 0.3821,
      "step": 10500
    },
    {
      "epoch": 2.1587007914482474,
      "grad_norm": 0.2205275148153305,
      "learning_rate": 1.7311502712062073e-05,
      "loss": 0.4014,
      "step": 10501
    },
    {
      "epoch": 2.158906362421626,
      "grad_norm": 0.2229074090719223,
      "learning_rate": 1.7303629838347825e-05,
      "loss": 0.3965,
      "step": 10502
    },
    {
      "epoch": 2.1591119333950046,
      "grad_norm": 0.2243238240480423,
      "learning_rate": 1.7295758329104277e-05,
      "loss": 0.3978,
      "step": 10503
    },
    {
      "epoch": 2.159317504368383,
      "grad_norm": 0.22528594732284546,
      "learning_rate": 1.728788818471923e-05,
      "loss": 0.395,
      "step": 10504
    },
    {
      "epoch": 2.159523075341762,
      "grad_norm": 0.22361469268798828,
      "learning_rate": 1.7280019405580394e-05,
      "loss": 0.3949,
      "step": 10505
    },
    {
      "epoch": 2.1597286463151404,
      "grad_norm": 0.22868306934833527,
      "learning_rate": 1.727215199207545e-05,
      "loss": 0.396,
      "step": 10506
    },
    {
      "epoch": 2.159934217288519,
      "grad_norm": 0.23044967651367188,
      "learning_rate": 1.7264285944591975e-05,
      "loss": 0.4099,
      "step": 10507
    },
    {
      "epoch": 2.1601397882618976,
      "grad_norm": 0.2305765151977539,
      "learning_rate": 1.7256421263517503e-05,
      "loss": 0.3899,
      "step": 10508
    },
    {
      "epoch": 2.160345359235276,
      "grad_norm": 0.21992215514183044,
      "learning_rate": 1.724855794923948e-05,
      "loss": 0.3854,
      "step": 10509
    },
    {
      "epoch": 2.1605509302086547,
      "grad_norm": 0.21878063678741455,
      "learning_rate": 1.7240696002145292e-05,
      "loss": 0.3825,
      "step": 10510
    },
    {
      "epoch": 2.160756501182033,
      "grad_norm": 0.12538020312786102,
      "learning_rate": 1.7232835422622252e-05,
      "loss": 0.4371,
      "step": 10511
    },
    {
      "epoch": 2.1609620721554115,
      "grad_norm": 0.23171678185462952,
      "learning_rate": 1.7224976211057645e-05,
      "loss": 0.4239,
      "step": 10512
    },
    {
      "epoch": 2.16116764312879,
      "grad_norm": 0.12217391282320023,
      "learning_rate": 1.721711836783864e-05,
      "loss": 0.4505,
      "step": 10513
    },
    {
      "epoch": 2.1613732141021686,
      "grad_norm": 0.23179614543914795,
      "learning_rate": 1.7209261893352335e-05,
      "loss": 0.396,
      "step": 10514
    },
    {
      "epoch": 2.1615787850755472,
      "grad_norm": 0.2259824126958847,
      "learning_rate": 1.7201406787985824e-05,
      "loss": 0.381,
      "step": 10515
    },
    {
      "epoch": 2.161784356048926,
      "grad_norm": 0.2272365540266037,
      "learning_rate": 1.719355305212607e-05,
      "loss": 0.4012,
      "step": 10516
    },
    {
      "epoch": 2.1619899270223044,
      "grad_norm": 0.2351997047662735,
      "learning_rate": 1.718570068615999e-05,
      "loss": 0.4049,
      "step": 10517
    },
    {
      "epoch": 2.162195497995683,
      "grad_norm": 0.22571827471256256,
      "learning_rate": 1.7177849690474415e-05,
      "loss": 0.3954,
      "step": 10518
    },
    {
      "epoch": 2.1624010689690616,
      "grad_norm": 0.22981050610542297,
      "learning_rate": 1.7170000065456165e-05,
      "loss": 0.3959,
      "step": 10519
    },
    {
      "epoch": 2.16260663994244,
      "grad_norm": 0.2381727695465088,
      "learning_rate": 1.7162151811491932e-05,
      "loss": 0.3908,
      "step": 10520
    },
    {
      "epoch": 2.1628122109158188,
      "grad_norm": 0.2317119836807251,
      "learning_rate": 1.7154304928968366e-05,
      "loss": 0.4135,
      "step": 10521
    },
    {
      "epoch": 2.1630177818891974,
      "grad_norm": 0.2339845448732376,
      "learning_rate": 1.714645941827205e-05,
      "loss": 0.3687,
      "step": 10522
    },
    {
      "epoch": 2.163223352862576,
      "grad_norm": 0.12437080591917038,
      "learning_rate": 1.7138615279789484e-05,
      "loss": 0.4476,
      "step": 10523
    },
    {
      "epoch": 2.1634289238359545,
      "grad_norm": 0.12956155836582184,
      "learning_rate": 1.7130772513907122e-05,
      "loss": 0.4388,
      "step": 10524
    },
    {
      "epoch": 2.163634494809333,
      "grad_norm": 0.22595298290252686,
      "learning_rate": 1.7122931121011325e-05,
      "loss": 0.3914,
      "step": 10525
    },
    {
      "epoch": 2.1638400657827113,
      "grad_norm": 0.23524773120880127,
      "learning_rate": 1.711509110148843e-05,
      "loss": 0.394,
      "step": 10526
    },
    {
      "epoch": 2.16404563675609,
      "grad_norm": 0.229460209608078,
      "learning_rate": 1.7107252455724658e-05,
      "loss": 0.3965,
      "step": 10527
    },
    {
      "epoch": 2.1642512077294684,
      "grad_norm": 0.22869658470153809,
      "learning_rate": 1.709941518410619e-05,
      "loss": 0.3887,
      "step": 10528
    },
    {
      "epoch": 2.164456778702847,
      "grad_norm": 0.2369028925895691,
      "learning_rate": 1.7091579287019127e-05,
      "loss": 0.4027,
      "step": 10529
    },
    {
      "epoch": 2.1646623496762256,
      "grad_norm": 0.23322713375091553,
      "learning_rate": 1.7083744764849512e-05,
      "loss": 0.396,
      "step": 10530
    },
    {
      "epoch": 2.164867920649604,
      "grad_norm": 0.23089557886123657,
      "learning_rate": 1.707591161798331e-05,
      "loss": 0.3945,
      "step": 10531
    },
    {
      "epoch": 2.165073491622983,
      "grad_norm": 0.21757075190544128,
      "learning_rate": 1.7068079846806413e-05,
      "loss": 0.3796,
      "step": 10532
    },
    {
      "epoch": 2.1652790625963614,
      "grad_norm": 0.2164604812860489,
      "learning_rate": 1.706024945170468e-05,
      "loss": 0.398,
      "step": 10533
    },
    {
      "epoch": 2.16548463356974,
      "grad_norm": 0.2306961566209793,
      "learning_rate": 1.705242043306387e-05,
      "loss": 0.3956,
      "step": 10534
    },
    {
      "epoch": 2.1656902045431186,
      "grad_norm": 0.2262311577796936,
      "learning_rate": 1.704459279126966e-05,
      "loss": 0.3937,
      "step": 10535
    },
    {
      "epoch": 2.165895775516497,
      "grad_norm": 0.2339993417263031,
      "learning_rate": 1.703676652670772e-05,
      "loss": 0.4147,
      "step": 10536
    },
    {
      "epoch": 2.1661013464898757,
      "grad_norm": 0.22700749337673187,
      "learning_rate": 1.7028941639763586e-05,
      "loss": 0.3932,
      "step": 10537
    },
    {
      "epoch": 2.1663069174632543,
      "grad_norm": 0.22953462600708008,
      "learning_rate": 1.7021118130822766e-05,
      "loss": 0.3856,
      "step": 10538
    },
    {
      "epoch": 2.166512488436633,
      "grad_norm": 0.12440577894449234,
      "learning_rate": 1.7013296000270665e-05,
      "loss": 0.4448,
      "step": 10539
    },
    {
      "epoch": 2.1667180594100115,
      "grad_norm": 0.22885264456272125,
      "learning_rate": 1.7005475248492677e-05,
      "loss": 0.4023,
      "step": 10540
    },
    {
      "epoch": 2.1669236303833896,
      "grad_norm": 0.22612909972667694,
      "learning_rate": 1.6997655875874082e-05,
      "loss": 0.3813,
      "step": 10541
    },
    {
      "epoch": 2.1671292013567682,
      "grad_norm": 0.22638019919395447,
      "learning_rate": 1.6989837882800095e-05,
      "loss": 0.3978,
      "step": 10542
    },
    {
      "epoch": 2.167334772330147,
      "grad_norm": 0.12233424931764603,
      "learning_rate": 1.6982021269655878e-05,
      "loss": 0.4485,
      "step": 10543
    },
    {
      "epoch": 2.1675403433035254,
      "grad_norm": 0.12629348039627075,
      "learning_rate": 1.6974206036826516e-05,
      "loss": 0.4501,
      "step": 10544
    },
    {
      "epoch": 2.167745914276904,
      "grad_norm": 0.12014532089233398,
      "learning_rate": 1.696639218469703e-05,
      "loss": 0.4594,
      "step": 10545
    },
    {
      "epoch": 2.1679514852502826,
      "grad_norm": 0.2178095281124115,
      "learning_rate": 1.6958579713652356e-05,
      "loss": 0.4123,
      "step": 10546
    },
    {
      "epoch": 2.168157056223661,
      "grad_norm": 0.22389446198940277,
      "learning_rate": 1.6950768624077412e-05,
      "loss": 0.3935,
      "step": 10547
    },
    {
      "epoch": 2.1683626271970398,
      "grad_norm": 0.22835230827331543,
      "learning_rate": 1.6942958916356995e-05,
      "loss": 0.4019,
      "step": 10548
    },
    {
      "epoch": 2.1685681981704183,
      "grad_norm": 0.2239934802055359,
      "learning_rate": 1.6935150590875852e-05,
      "loss": 0.4014,
      "step": 10549
    },
    {
      "epoch": 2.168773769143797,
      "grad_norm": 0.22052869200706482,
      "learning_rate": 1.6927343648018667e-05,
      "loss": 0.3964,
      "step": 10550
    },
    {
      "epoch": 2.1689793401171755,
      "grad_norm": 0.22106504440307617,
      "learning_rate": 1.691953808817005e-05,
      "loss": 0.3868,
      "step": 10551
    },
    {
      "epoch": 2.169184911090554,
      "grad_norm": 0.12797969579696655,
      "learning_rate": 1.6911733911714544e-05,
      "loss": 0.4505,
      "step": 10552
    },
    {
      "epoch": 2.1693904820639327,
      "grad_norm": 0.12730328738689423,
      "learning_rate": 1.6903931119036607e-05,
      "loss": 0.4535,
      "step": 10553
    },
    {
      "epoch": 2.1695960530373113,
      "grad_norm": 0.22867700457572937,
      "learning_rate": 1.6896129710520677e-05,
      "loss": 0.4105,
      "step": 10554
    },
    {
      "epoch": 2.16980162401069,
      "grad_norm": 0.22605451941490173,
      "learning_rate": 1.688832968655108e-05,
      "loss": 0.3941,
      "step": 10555
    },
    {
      "epoch": 2.170007194984068,
      "grad_norm": 0.23293885588645935,
      "learning_rate": 1.6880531047512074e-05,
      "loss": 0.4083,
      "step": 10556
    },
    {
      "epoch": 2.1702127659574466,
      "grad_norm": 0.11922682076692581,
      "learning_rate": 1.6872733793787882e-05,
      "loss": 0.449,
      "step": 10557
    },
    {
      "epoch": 2.170418336930825,
      "grad_norm": 0.12665359675884247,
      "learning_rate": 1.6864937925762637e-05,
      "loss": 0.4587,
      "step": 10558
    },
    {
      "epoch": 2.170623907904204,
      "grad_norm": 0.23081457614898682,
      "learning_rate": 1.685714344382039e-05,
      "loss": 0.3861,
      "step": 10559
    },
    {
      "epoch": 2.1708294788775824,
      "grad_norm": 0.2365112155675888,
      "learning_rate": 1.6849350348345137e-05,
      "loss": 0.3958,
      "step": 10560
    },
    {
      "epoch": 2.171035049850961,
      "grad_norm": 0.12257271260023117,
      "learning_rate": 1.684155863972083e-05,
      "loss": 0.46,
      "step": 10561
    },
    {
      "epoch": 2.1712406208243396,
      "grad_norm": 0.2283942699432373,
      "learning_rate": 1.6833768318331313e-05,
      "loss": 0.388,
      "step": 10562
    },
    {
      "epoch": 2.171446191797718,
      "grad_norm": 0.22442100942134857,
      "learning_rate": 1.6825979384560385e-05,
      "loss": 0.3916,
      "step": 10563
    },
    {
      "epoch": 2.1716517627710967,
      "grad_norm": 0.12442784011363983,
      "learning_rate": 1.681819183879177e-05,
      "loss": 0.4635,
      "step": 10564
    },
    {
      "epoch": 2.1718573337444753,
      "grad_norm": 0.22854554653167725,
      "learning_rate": 1.681040568140912e-05,
      "loss": 0.379,
      "step": 10565
    },
    {
      "epoch": 2.172062904717854,
      "grad_norm": 0.12427257746458054,
      "learning_rate": 1.680262091279602e-05,
      "loss": 0.4719,
      "step": 10566
    },
    {
      "epoch": 2.1722684756912325,
      "grad_norm": 0.22989091277122498,
      "learning_rate": 1.6794837533335984e-05,
      "loss": 0.4118,
      "step": 10567
    },
    {
      "epoch": 2.172474046664611,
      "grad_norm": 0.23249632120132446,
      "learning_rate": 1.6787055543412484e-05,
      "loss": 0.3812,
      "step": 10568
    },
    {
      "epoch": 2.1726796176379897,
      "grad_norm": 0.21678483486175537,
      "learning_rate": 1.677927494340889e-05,
      "loss": 0.4007,
      "step": 10569
    },
    {
      "epoch": 2.1728851886113683,
      "grad_norm": 0.2254790961742401,
      "learning_rate": 1.677149573370852e-05,
      "loss": 0.395,
      "step": 10570
    },
    {
      "epoch": 2.1730907595847464,
      "grad_norm": 0.2205883264541626,
      "learning_rate": 1.6763717914694613e-05,
      "loss": 0.3865,
      "step": 10571
    },
    {
      "epoch": 2.173296330558125,
      "grad_norm": 0.12380865216255188,
      "learning_rate": 1.675594148675035e-05,
      "loss": 0.4542,
      "step": 10572
    },
    {
      "epoch": 2.1735019015315036,
      "grad_norm": 0.22934816777706146,
      "learning_rate": 1.6748166450258836e-05,
      "loss": 0.3885,
      "step": 10573
    },
    {
      "epoch": 2.173707472504882,
      "grad_norm": 0.2283497005701065,
      "learning_rate": 1.6740392805603097e-05,
      "loss": 0.385,
      "step": 10574
    },
    {
      "epoch": 2.1739130434782608,
      "grad_norm": 0.22790871560573578,
      "learning_rate": 1.6732620553166136e-05,
      "loss": 0.3862,
      "step": 10575
    },
    {
      "epoch": 2.1741186144516393,
      "grad_norm": 0.2244972586631775,
      "learning_rate": 1.6724849693330837e-05,
      "loss": 0.4012,
      "step": 10576
    },
    {
      "epoch": 2.174324185425018,
      "grad_norm": 0.23788417875766754,
      "learning_rate": 1.6717080226480034e-05,
      "loss": 0.4071,
      "step": 10577
    },
    {
      "epoch": 2.1745297563983965,
      "grad_norm": 0.22114843130111694,
      "learning_rate": 1.6709312152996484e-05,
      "loss": 0.3793,
      "step": 10578
    },
    {
      "epoch": 2.174735327371775,
      "grad_norm": 0.23666070401668549,
      "learning_rate": 1.6701545473262907e-05,
      "loss": 0.4066,
      "step": 10579
    },
    {
      "epoch": 2.1749408983451537,
      "grad_norm": 0.23616231977939606,
      "learning_rate": 1.669378018766192e-05,
      "loss": 0.4042,
      "step": 10580
    },
    {
      "epoch": 2.1751464693185323,
      "grad_norm": 0.2265489399433136,
      "learning_rate": 1.668601629657606e-05,
      "loss": 0.3877,
      "step": 10581
    },
    {
      "epoch": 2.175352040291911,
      "grad_norm": 0.223519966006279,
      "learning_rate": 1.6678253800387857e-05,
      "loss": 0.4095,
      "step": 10582
    },
    {
      "epoch": 2.1755576112652895,
      "grad_norm": 0.12714464962482452,
      "learning_rate": 1.6670492699479713e-05,
      "loss": 0.4789,
      "step": 10583
    },
    {
      "epoch": 2.175763182238668,
      "grad_norm": 0.22280433773994446,
      "learning_rate": 1.6662732994233978e-05,
      "loss": 0.3944,
      "step": 10584
    },
    {
      "epoch": 2.1759687532120466,
      "grad_norm": 0.2261977344751358,
      "learning_rate": 1.6654974685032947e-05,
      "loss": 0.3955,
      "step": 10585
    },
    {
      "epoch": 2.1761743241854252,
      "grad_norm": 0.23589631915092468,
      "learning_rate": 1.6647217772258825e-05,
      "loss": 0.3948,
      "step": 10586
    },
    {
      "epoch": 2.176379895158804,
      "grad_norm": 0.1299065500497818,
      "learning_rate": 1.6639462256293747e-05,
      "loss": 0.4561,
      "step": 10587
    },
    {
      "epoch": 2.176585466132182,
      "grad_norm": 0.24209356307983398,
      "learning_rate": 1.6631708137519825e-05,
      "loss": 0.4137,
      "step": 10588
    },
    {
      "epoch": 2.1767910371055605,
      "grad_norm": 0.2254961133003235,
      "learning_rate": 1.6623955416319047e-05,
      "loss": 0.3962,
      "step": 10589
    },
    {
      "epoch": 2.176996608078939,
      "grad_norm": 0.1276281327009201,
      "learning_rate": 1.661620409307336e-05,
      "loss": 0.4605,
      "step": 10590
    },
    {
      "epoch": 2.1772021790523177,
      "grad_norm": 0.22398579120635986,
      "learning_rate": 1.660845416816463e-05,
      "loss": 0.396,
      "step": 10591
    },
    {
      "epoch": 2.1774077500256963,
      "grad_norm": 0.22290287911891937,
      "learning_rate": 1.660070564197466e-05,
      "loss": 0.4096,
      "step": 10592
    },
    {
      "epoch": 2.177613320999075,
      "grad_norm": 0.22636477649211884,
      "learning_rate": 1.6592958514885183e-05,
      "loss": 0.3942,
      "step": 10593
    },
    {
      "epoch": 2.1778188919724535,
      "grad_norm": 0.21956631541252136,
      "learning_rate": 1.6585212787277854e-05,
      "loss": 0.4021,
      "step": 10594
    },
    {
      "epoch": 2.178024462945832,
      "grad_norm": 0.2394167184829712,
      "learning_rate": 1.6577468459534298e-05,
      "loss": 0.397,
      "step": 10595
    },
    {
      "epoch": 2.1782300339192107,
      "grad_norm": 0.22891393303871155,
      "learning_rate": 1.656972553203602e-05,
      "loss": 0.3938,
      "step": 10596
    },
    {
      "epoch": 2.1784356048925893,
      "grad_norm": 0.2175266295671463,
      "learning_rate": 1.6561984005164483e-05,
      "loss": 0.3902,
      "step": 10597
    },
    {
      "epoch": 2.178641175865968,
      "grad_norm": 0.22040759027004242,
      "learning_rate": 1.6554243879301076e-05,
      "loss": 0.3728,
      "step": 10598
    },
    {
      "epoch": 2.1788467468393464,
      "grad_norm": 0.22119790315628052,
      "learning_rate": 1.65465051548271e-05,
      "loss": 0.4136,
      "step": 10599
    },
    {
      "epoch": 2.179052317812725,
      "grad_norm": 0.22910022735595703,
      "learning_rate": 1.6538767832123844e-05,
      "loss": 0.4046,
      "step": 10600
    },
    {
      "epoch": 2.1792578887861036,
      "grad_norm": 0.129209503531456,
      "learning_rate": 1.653103191157247e-05,
      "loss": 0.439,
      "step": 10601
    },
    {
      "epoch": 2.179463459759482,
      "grad_norm": 0.23198646306991577,
      "learning_rate": 1.6523297393554072e-05,
      "loss": 0.4143,
      "step": 10602
    },
    {
      "epoch": 2.1796690307328603,
      "grad_norm": 0.22791431844234467,
      "learning_rate": 1.6515564278449728e-05,
      "loss": 0.3833,
      "step": 10603
    },
    {
      "epoch": 2.179874601706239,
      "grad_norm": 0.2255294919013977,
      "learning_rate": 1.6507832566640392e-05,
      "loss": 0.3928,
      "step": 10604
    },
    {
      "epoch": 2.1800801726796175,
      "grad_norm": 0.23165516555309296,
      "learning_rate": 1.6500102258506978e-05,
      "loss": 0.3914,
      "step": 10605
    },
    {
      "epoch": 2.180285743652996,
      "grad_norm": 0.2258346527814865,
      "learning_rate": 1.6492373354430316e-05,
      "loss": 0.3953,
      "step": 10606
    },
    {
      "epoch": 2.1804913146263747,
      "grad_norm": 0.22352395951747894,
      "learning_rate": 1.6484645854791174e-05,
      "loss": 0.3852,
      "step": 10607
    },
    {
      "epoch": 2.1806968855997533,
      "grad_norm": 0.22954273223876953,
      "learning_rate": 1.6476919759970236e-05,
      "loss": 0.4085,
      "step": 10608
    },
    {
      "epoch": 2.180902456573132,
      "grad_norm": 0.22188891470432281,
      "learning_rate": 1.6469195070348158e-05,
      "loss": 0.3917,
      "step": 10609
    },
    {
      "epoch": 2.1811080275465105,
      "grad_norm": 0.12909865379333496,
      "learning_rate": 1.6461471786305488e-05,
      "loss": 0.4633,
      "step": 10610
    },
    {
      "epoch": 2.181313598519889,
      "grad_norm": 0.2231685221195221,
      "learning_rate": 1.6453749908222718e-05,
      "loss": 0.3876,
      "step": 10611
    },
    {
      "epoch": 2.1815191694932676,
      "grad_norm": 0.22691339254379272,
      "learning_rate": 1.6446029436480263e-05,
      "loss": 0.3948,
      "step": 10612
    },
    {
      "epoch": 2.181724740466646,
      "grad_norm": 0.23698212206363678,
      "learning_rate": 1.643831037145847e-05,
      "loss": 0.3962,
      "step": 10613
    },
    {
      "epoch": 2.181930311440025,
      "grad_norm": 0.22960902750492096,
      "learning_rate": 1.6430592713537634e-05,
      "loss": 0.3989,
      "step": 10614
    },
    {
      "epoch": 2.1821358824134034,
      "grad_norm": 0.2320588082075119,
      "learning_rate": 1.642287646309795e-05,
      "loss": 0.392,
      "step": 10615
    },
    {
      "epoch": 2.182341453386782,
      "grad_norm": 0.560815155506134,
      "learning_rate": 1.641516162051958e-05,
      "loss": 0.3986,
      "step": 10616
    },
    {
      "epoch": 2.1825470243601606,
      "grad_norm": 0.12423614412546158,
      "learning_rate": 1.6407448186182598e-05,
      "loss": 0.4408,
      "step": 10617
    },
    {
      "epoch": 2.1827525953335387,
      "grad_norm": 0.2267366200685501,
      "learning_rate": 1.6399736160467e-05,
      "loss": 0.3849,
      "step": 10618
    },
    {
      "epoch": 2.1829581663069173,
      "grad_norm": 0.2252301126718521,
      "learning_rate": 1.6392025543752726e-05,
      "loss": 0.3939,
      "step": 10619
    },
    {
      "epoch": 2.183163737280296,
      "grad_norm": 0.1241535022854805,
      "learning_rate": 1.6384316336419625e-05,
      "loss": 0.4509,
      "step": 10620
    },
    {
      "epoch": 2.1833693082536745,
      "grad_norm": 0.22740307450294495,
      "learning_rate": 1.637660853884752e-05,
      "loss": 0.4052,
      "step": 10621
    },
    {
      "epoch": 2.183574879227053,
      "grad_norm": 0.2271934300661087,
      "learning_rate": 1.6368902151416132e-05,
      "loss": 0.3804,
      "step": 10622
    },
    {
      "epoch": 2.1837804502004317,
      "grad_norm": 0.23072363436222076,
      "learning_rate": 1.6361197174505098e-05,
      "loss": 0.3939,
      "step": 10623
    },
    {
      "epoch": 2.1839860211738102,
      "grad_norm": 0.2331043779850006,
      "learning_rate": 1.6353493608494032e-05,
      "loss": 0.3989,
      "step": 10624
    },
    {
      "epoch": 2.184191592147189,
      "grad_norm": 0.12475959211587906,
      "learning_rate": 1.634579145376245e-05,
      "loss": 0.4525,
      "step": 10625
    },
    {
      "epoch": 2.1843971631205674,
      "grad_norm": 0.22251753509044647,
      "learning_rate": 1.633809071068979e-05,
      "loss": 0.4049,
      "step": 10626
    },
    {
      "epoch": 2.184602734093946,
      "grad_norm": 0.22629208862781525,
      "learning_rate": 1.633039137965543e-05,
      "loss": 0.4039,
      "step": 10627
    },
    {
      "epoch": 2.1848083050673246,
      "grad_norm": 0.22912812232971191,
      "learning_rate": 1.632269346103869e-05,
      "loss": 0.4004,
      "step": 10628
    },
    {
      "epoch": 2.185013876040703,
      "grad_norm": 0.2214146852493286,
      "learning_rate": 1.6314996955218792e-05,
      "loss": 0.3727,
      "step": 10629
    },
    {
      "epoch": 2.1852194470140818,
      "grad_norm": 0.22701111435890198,
      "learning_rate": 1.6307301862574933e-05,
      "loss": 0.4044,
      "step": 10630
    },
    {
      "epoch": 2.1854250179874604,
      "grad_norm": 0.22968102991580963,
      "learning_rate": 1.6299608183486206e-05,
      "loss": 0.399,
      "step": 10631
    },
    {
      "epoch": 2.185630588960839,
      "grad_norm": 0.2261413037776947,
      "learning_rate": 1.6291915918331637e-05,
      "loss": 0.3978,
      "step": 10632
    },
    {
      "epoch": 2.185836159934217,
      "grad_norm": 0.2443215698003769,
      "learning_rate": 1.6284225067490187e-05,
      "loss": 0.3938,
      "step": 10633
    },
    {
      "epoch": 2.1860417309075957,
      "grad_norm": 0.1367214322090149,
      "learning_rate": 1.6276535631340756e-05,
      "loss": 0.459,
      "step": 10634
    },
    {
      "epoch": 2.1862473018809743,
      "grad_norm": 0.1239805743098259,
      "learning_rate": 1.6268847610262154e-05,
      "loss": 0.445,
      "step": 10635
    },
    {
      "epoch": 2.186452872854353,
      "grad_norm": 0.23008181154727936,
      "learning_rate": 1.626116100463313e-05,
      "loss": 0.3968,
      "step": 10636
    },
    {
      "epoch": 2.1866584438277314,
      "grad_norm": 0.22786974906921387,
      "learning_rate": 1.625347581483239e-05,
      "loss": 0.3968,
      "step": 10637
    },
    {
      "epoch": 2.18686401480111,
      "grad_norm": 0.2298787385225296,
      "learning_rate": 1.6245792041238542e-05,
      "loss": 0.3913,
      "step": 10638
    },
    {
      "epoch": 2.1870695857744886,
      "grad_norm": 0.23194655776023865,
      "learning_rate": 1.623810968423012e-05,
      "loss": 0.3976,
      "step": 10639
    },
    {
      "epoch": 2.187275156747867,
      "grad_norm": 0.23695392906665802,
      "learning_rate": 1.62304287441856e-05,
      "loss": 0.4161,
      "step": 10640
    },
    {
      "epoch": 2.187480727721246,
      "grad_norm": 0.22045163810253143,
      "learning_rate": 1.6222749221483375e-05,
      "loss": 0.412,
      "step": 10641
    },
    {
      "epoch": 2.1876862986946244,
      "grad_norm": 0.22696349024772644,
      "learning_rate": 1.62150711165018e-05,
      "loss": 0.3791,
      "step": 10642
    },
    {
      "epoch": 2.187891869668003,
      "grad_norm": 0.23293721675872803,
      "learning_rate": 1.6207394429619136e-05,
      "loss": 0.4014,
      "step": 10643
    },
    {
      "epoch": 2.1880974406413816,
      "grad_norm": 0.12806709110736847,
      "learning_rate": 1.619971916121356e-05,
      "loss": 0.449,
      "step": 10644
    },
    {
      "epoch": 2.18830301161476,
      "grad_norm": 0.21958725154399872,
      "learning_rate": 1.6192045311663218e-05,
      "loss": 0.3836,
      "step": 10645
    },
    {
      "epoch": 2.1885085825881387,
      "grad_norm": 0.22592249512672424,
      "learning_rate": 1.6184372881346154e-05,
      "loss": 0.3945,
      "step": 10646
    },
    {
      "epoch": 2.1887141535615173,
      "grad_norm": 0.12806597352027893,
      "learning_rate": 1.6176701870640362e-05,
      "loss": 0.4394,
      "step": 10647
    },
    {
      "epoch": 2.1889197245348955,
      "grad_norm": 0.2250743955373764,
      "learning_rate": 1.616903227992374e-05,
      "loss": 0.3952,
      "step": 10648
    },
    {
      "epoch": 2.189125295508274,
      "grad_norm": 0.1263757050037384,
      "learning_rate": 1.616136410957415e-05,
      "loss": 0.4591,
      "step": 10649
    },
    {
      "epoch": 2.1893308664816526,
      "grad_norm": 0.237161323428154,
      "learning_rate": 1.6153697359969344e-05,
      "loss": 0.4032,
      "step": 10650
    },
    {
      "epoch": 2.1895364374550312,
      "grad_norm": 0.22208333015441895,
      "learning_rate": 1.614603203148705e-05,
      "loss": 0.3927,
      "step": 10651
    },
    {
      "epoch": 2.18974200842841,
      "grad_norm": 0.22636909782886505,
      "learning_rate": 1.61383681245049e-05,
      "loss": 0.3784,
      "step": 10652
    },
    {
      "epoch": 2.1899475794017884,
      "grad_norm": 0.23345516622066498,
      "learning_rate": 1.6130705639400447e-05,
      "loss": 0.4156,
      "step": 10653
    },
    {
      "epoch": 2.190153150375167,
      "grad_norm": 0.2252190262079239,
      "learning_rate": 1.6123044576551202e-05,
      "loss": 0.3922,
      "step": 10654
    },
    {
      "epoch": 2.1903587213485456,
      "grad_norm": 0.23159563541412354,
      "learning_rate": 1.6115384936334575e-05,
      "loss": 0.4089,
      "step": 10655
    },
    {
      "epoch": 2.190564292321924,
      "grad_norm": 0.22487987577915192,
      "learning_rate": 1.6107726719127926e-05,
      "loss": 0.3992,
      "step": 10656
    },
    {
      "epoch": 2.1907698632953028,
      "grad_norm": 0.23709611594676971,
      "learning_rate": 1.6100069925308523e-05,
      "loss": 0.4198,
      "step": 10657
    },
    {
      "epoch": 2.1909754342686814,
      "grad_norm": 0.21871237456798553,
      "learning_rate": 1.609241455525361e-05,
      "loss": 0.4042,
      "step": 10658
    },
    {
      "epoch": 2.19118100524206,
      "grad_norm": 0.2315407693386078,
      "learning_rate": 1.6084760609340326e-05,
      "loss": 0.4062,
      "step": 10659
    },
    {
      "epoch": 2.1913865762154385,
      "grad_norm": 0.2263568639755249,
      "learning_rate": 1.6077108087945734e-05,
      "loss": 0.3908,
      "step": 10660
    },
    {
      "epoch": 2.191592147188817,
      "grad_norm": 0.12639762461185455,
      "learning_rate": 1.6069456991446842e-05,
      "loss": 0.4546,
      "step": 10661
    },
    {
      "epoch": 2.1917977181621957,
      "grad_norm": 0.2350437194108963,
      "learning_rate": 1.606180732022058e-05,
      "loss": 0.4115,
      "step": 10662
    },
    {
      "epoch": 2.192003289135574,
      "grad_norm": 0.21677015721797943,
      "learning_rate": 1.60541590746438e-05,
      "loss": 0.3724,
      "step": 10663
    },
    {
      "epoch": 2.1922088601089524,
      "grad_norm": 0.22756123542785645,
      "learning_rate": 1.6046512255093326e-05,
      "loss": 0.3916,
      "step": 10664
    },
    {
      "epoch": 2.192414431082331,
      "grad_norm": 0.12300966680049896,
      "learning_rate": 1.6038866861945847e-05,
      "loss": 0.4532,
      "step": 10665
    },
    {
      "epoch": 2.1926200020557096,
      "grad_norm": 0.23039010167121887,
      "learning_rate": 1.6031222895578052e-05,
      "loss": 0.3941,
      "step": 10666
    },
    {
      "epoch": 2.192825573029088,
      "grad_norm": 0.2256508469581604,
      "learning_rate": 1.6023580356366502e-05,
      "loss": 0.4022,
      "step": 10667
    },
    {
      "epoch": 2.193031144002467,
      "grad_norm": 0.21880964934825897,
      "learning_rate": 1.6015939244687717e-05,
      "loss": 0.3848,
      "step": 10668
    },
    {
      "epoch": 2.1932367149758454,
      "grad_norm": 0.23204973340034485,
      "learning_rate": 1.600829956091813e-05,
      "loss": 0.3865,
      "step": 10669
    },
    {
      "epoch": 2.193442285949224,
      "grad_norm": 0.24459494650363922,
      "learning_rate": 1.6000661305434108e-05,
      "loss": 0.3947,
      "step": 10670
    },
    {
      "epoch": 2.1936478569226026,
      "grad_norm": 0.23136425018310547,
      "learning_rate": 1.5993024478611972e-05,
      "loss": 0.3957,
      "step": 10671
    },
    {
      "epoch": 2.193853427895981,
      "grad_norm": 0.22914138436317444,
      "learning_rate": 1.5985389080827937e-05,
      "loss": 0.3889,
      "step": 10672
    },
    {
      "epoch": 2.1940589988693597,
      "grad_norm": 0.22302468121051788,
      "learning_rate": 1.5977755112458174e-05,
      "loss": 0.385,
      "step": 10673
    },
    {
      "epoch": 2.1942645698427383,
      "grad_norm": 0.2292277216911316,
      "learning_rate": 1.5970122573878766e-05,
      "loss": 0.4123,
      "step": 10674
    },
    {
      "epoch": 2.194470140816117,
      "grad_norm": 0.2244681715965271,
      "learning_rate": 1.5962491465465733e-05,
      "loss": 0.3681,
      "step": 10675
    },
    {
      "epoch": 2.1946757117894955,
      "grad_norm": 0.2233274132013321,
      "learning_rate": 1.5954861787595024e-05,
      "loss": 0.4046,
      "step": 10676
    },
    {
      "epoch": 2.194881282762874,
      "grad_norm": 0.23008307814598083,
      "learning_rate": 1.5947233540642505e-05,
      "loss": 0.408,
      "step": 10677
    },
    {
      "epoch": 2.1950868537362522,
      "grad_norm": 0.2235502302646637,
      "learning_rate": 1.593960672498401e-05,
      "loss": 0.3884,
      "step": 10678
    },
    {
      "epoch": 2.195292424709631,
      "grad_norm": 0.12918898463249207,
      "learning_rate": 1.5931981340995262e-05,
      "loss": 0.4728,
      "step": 10679
    },
    {
      "epoch": 2.1954979956830094,
      "grad_norm": 0.21759852766990662,
      "learning_rate": 1.5924357389051935e-05,
      "loss": 0.3975,
      "step": 10680
    },
    {
      "epoch": 2.195703566656388,
      "grad_norm": 0.22451691329479218,
      "learning_rate": 1.5916734869529616e-05,
      "loss": 0.3896,
      "step": 10681
    },
    {
      "epoch": 2.1959091376297666,
      "grad_norm": 0.13441641628742218,
      "learning_rate": 1.5909113782803837e-05,
      "loss": 0.4687,
      "step": 10682
    },
    {
      "epoch": 2.196114708603145,
      "grad_norm": 0.23042891919612885,
      "learning_rate": 1.5901494129250052e-05,
      "loss": 0.3967,
      "step": 10683
    },
    {
      "epoch": 2.1963202795765238,
      "grad_norm": 0.2289479672908783,
      "learning_rate": 1.589387590924363e-05,
      "loss": 0.3911,
      "step": 10684
    },
    {
      "epoch": 2.1965258505499023,
      "grad_norm": 0.22492031753063202,
      "learning_rate": 1.5886259123159917e-05,
      "loss": 0.3867,
      "step": 10685
    },
    {
      "epoch": 2.196731421523281,
      "grad_norm": 0.2289929836988449,
      "learning_rate": 1.5878643771374133e-05,
      "loss": 0.3915,
      "step": 10686
    },
    {
      "epoch": 2.1969369924966595,
      "grad_norm": 0.12365361303091049,
      "learning_rate": 1.5871029854261445e-05,
      "loss": 0.4289,
      "step": 10687
    },
    {
      "epoch": 2.197142563470038,
      "grad_norm": 0.21747228503227234,
      "learning_rate": 1.5863417372196988e-05,
      "loss": 0.401,
      "step": 10688
    },
    {
      "epoch": 2.1973481344434167,
      "grad_norm": 0.21652854979038239,
      "learning_rate": 1.585580632555577e-05,
      "loss": 0.3908,
      "step": 10689
    },
    {
      "epoch": 2.1975537054167953,
      "grad_norm": 0.22147879004478455,
      "learning_rate": 1.584819671471275e-05,
      "loss": 0.3968,
      "step": 10690
    },
    {
      "epoch": 2.197759276390174,
      "grad_norm": 0.2206578552722931,
      "learning_rate": 1.5840588540042816e-05,
      "loss": 0.3972,
      "step": 10691
    },
    {
      "epoch": 2.1979648473635525,
      "grad_norm": 0.23885060846805573,
      "learning_rate": 1.5832981801920806e-05,
      "loss": 0.385,
      "step": 10692
    },
    {
      "epoch": 2.1981704183369306,
      "grad_norm": 0.23165802657604218,
      "learning_rate": 1.582537650072145e-05,
      "loss": 0.3954,
      "step": 10693
    },
    {
      "epoch": 2.198375989310309,
      "grad_norm": 0.23803496360778809,
      "learning_rate": 1.5817772636819437e-05,
      "loss": 0.4089,
      "step": 10694
    },
    {
      "epoch": 2.198581560283688,
      "grad_norm": 0.22591203451156616,
      "learning_rate": 1.581017021058937e-05,
      "loss": 0.3965,
      "step": 10695
    },
    {
      "epoch": 2.1987871312570664,
      "grad_norm": 0.23487183451652527,
      "learning_rate": 1.5802569222405785e-05,
      "loss": 0.4041,
      "step": 10696
    },
    {
      "epoch": 2.198992702230445,
      "grad_norm": 0.12291015684604645,
      "learning_rate": 1.5794969672643143e-05,
      "loss": 0.4483,
      "step": 10697
    },
    {
      "epoch": 2.1991982732038236,
      "grad_norm": 0.2258739024400711,
      "learning_rate": 1.5787371561675826e-05,
      "loss": 0.3911,
      "step": 10698
    },
    {
      "epoch": 2.199403844177202,
      "grad_norm": 0.2271280735731125,
      "learning_rate": 1.5779774889878188e-05,
      "loss": 0.39,
      "step": 10699
    },
    {
      "epoch": 2.1996094151505807,
      "grad_norm": 0.12247934192419052,
      "learning_rate": 1.5772179657624468e-05,
      "loss": 0.4543,
      "step": 10700
    },
    {
      "epoch": 2.1998149861239593,
      "grad_norm": 0.22866493463516235,
      "learning_rate": 1.5764585865288846e-05,
      "loss": 0.3903,
      "step": 10701
    },
    {
      "epoch": 2.200020557097338,
      "grad_norm": 0.12255199253559113,
      "learning_rate": 1.5756993513245428e-05,
      "loss": 0.453,
      "step": 10702
    },
    {
      "epoch": 2.2002261280707165,
      "grad_norm": 0.2146882563829422,
      "learning_rate": 1.574940260186826e-05,
      "loss": 0.3789,
      "step": 10703
    },
    {
      "epoch": 2.200431699044095,
      "grad_norm": 0.23465701937675476,
      "learning_rate": 1.5741813131531313e-05,
      "loss": 0.3917,
      "step": 10704
    },
    {
      "epoch": 2.2006372700174737,
      "grad_norm": 0.2412889301776886,
      "learning_rate": 1.5734225102608464e-05,
      "loss": 0.4213,
      "step": 10705
    },
    {
      "epoch": 2.2008428409908523,
      "grad_norm": 0.22149762511253357,
      "learning_rate": 1.5726638515473566e-05,
      "loss": 0.3988,
      "step": 10706
    },
    {
      "epoch": 2.201048411964231,
      "grad_norm": 0.23268526792526245,
      "learning_rate": 1.571905337050037e-05,
      "loss": 0.3857,
      "step": 10707
    },
    {
      "epoch": 2.201253982937609,
      "grad_norm": 0.22317472100257874,
      "learning_rate": 1.571146966806254e-05,
      "loss": 0.3828,
      "step": 10708
    },
    {
      "epoch": 2.2014595539109876,
      "grad_norm": 0.22195008397102356,
      "learning_rate": 1.570388740853372e-05,
      "loss": 0.4056,
      "step": 10709
    },
    {
      "epoch": 2.201665124884366,
      "grad_norm": 0.21876020729541779,
      "learning_rate": 1.569630659228744e-05,
      "loss": 0.4002,
      "step": 10710
    },
    {
      "epoch": 2.2018706958577448,
      "grad_norm": 0.2204761803150177,
      "learning_rate": 1.5688727219697163e-05,
      "loss": 0.3963,
      "step": 10711
    },
    {
      "epoch": 2.2020762668311233,
      "grad_norm": 0.22541974484920502,
      "learning_rate": 1.5681149291136285e-05,
      "loss": 0.3829,
      "step": 10712
    },
    {
      "epoch": 2.202281837804502,
      "grad_norm": 0.22481369972229004,
      "learning_rate": 1.567357280697816e-05,
      "loss": 0.3834,
      "step": 10713
    },
    {
      "epoch": 2.2024874087778805,
      "grad_norm": 0.23171178996562958,
      "learning_rate": 1.5665997767596033e-05,
      "loss": 0.4008,
      "step": 10714
    },
    {
      "epoch": 2.202692979751259,
      "grad_norm": 0.22620131075382233,
      "learning_rate": 1.5658424173363085e-05,
      "loss": 0.3997,
      "step": 10715
    },
    {
      "epoch": 2.2028985507246377,
      "grad_norm": 0.22562332451343536,
      "learning_rate": 1.5650852024652435e-05,
      "loss": 0.4104,
      "step": 10716
    },
    {
      "epoch": 2.2031041216980163,
      "grad_norm": 0.2276526838541031,
      "learning_rate": 1.5643281321837135e-05,
      "loss": 0.392,
      "step": 10717
    },
    {
      "epoch": 2.203309692671395,
      "grad_norm": 0.12458810210227966,
      "learning_rate": 1.5635712065290146e-05,
      "loss": 0.4551,
      "step": 10718
    },
    {
      "epoch": 2.2035152636447735,
      "grad_norm": 0.23165149986743927,
      "learning_rate": 1.5628144255384365e-05,
      "loss": 0.3855,
      "step": 10719
    },
    {
      "epoch": 2.203720834618152,
      "grad_norm": 0.2240263819694519,
      "learning_rate": 1.562057789249264e-05,
      "loss": 0.3825,
      "step": 10720
    },
    {
      "epoch": 2.2039264055915306,
      "grad_norm": 0.21997642517089844,
      "learning_rate": 1.5613012976987728e-05,
      "loss": 0.3813,
      "step": 10721
    },
    {
      "epoch": 2.2041319765649092,
      "grad_norm": 1.4580494165420532,
      "learning_rate": 1.5605449509242312e-05,
      "loss": 0.408,
      "step": 10722
    },
    {
      "epoch": 2.2043375475382874,
      "grad_norm": 0.23071999847888947,
      "learning_rate": 1.5597887489629008e-05,
      "loss": 0.3983,
      "step": 10723
    },
    {
      "epoch": 2.204543118511666,
      "grad_norm": 0.22993268072605133,
      "learning_rate": 1.559032691852036e-05,
      "loss": 0.392,
      "step": 10724
    },
    {
      "epoch": 2.2047486894850445,
      "grad_norm": 0.12808802723884583,
      "learning_rate": 1.5582767796288852e-05,
      "loss": 0.4491,
      "step": 10725
    },
    {
      "epoch": 2.204954260458423,
      "grad_norm": 0.22585633397102356,
      "learning_rate": 1.5575210123306855e-05,
      "loss": 0.4,
      "step": 10726
    },
    {
      "epoch": 2.2051598314318017,
      "grad_norm": 0.12611474096775055,
      "learning_rate": 1.5567653899946745e-05,
      "loss": 0.4577,
      "step": 10727
    },
    {
      "epoch": 2.2053654024051803,
      "grad_norm": 0.17360465228557587,
      "learning_rate": 1.5560099126580757e-05,
      "loss": 0.4583,
      "step": 10728
    },
    {
      "epoch": 2.205570973378559,
      "grad_norm": 0.23249217867851257,
      "learning_rate": 1.5552545803581072e-05,
      "loss": 0.3971,
      "step": 10729
    },
    {
      "epoch": 2.2057765443519375,
      "grad_norm": 0.2386702597141266,
      "learning_rate": 1.5544993931319832e-05,
      "loss": 0.3891,
      "step": 10730
    },
    {
      "epoch": 2.205982115325316,
      "grad_norm": 0.12809514999389648,
      "learning_rate": 1.5537443510169068e-05,
      "loss": 0.4534,
      "step": 10731
    },
    {
      "epoch": 2.2061876862986947,
      "grad_norm": 0.2297258824110031,
      "learning_rate": 1.5529894540500755e-05,
      "loss": 0.3897,
      "step": 10732
    },
    {
      "epoch": 2.2063932572720732,
      "grad_norm": 0.22300571203231812,
      "learning_rate": 1.5522347022686782e-05,
      "loss": 0.3961,
      "step": 10733
    },
    {
      "epoch": 2.206598828245452,
      "grad_norm": 0.23077335953712463,
      "learning_rate": 1.5514800957099003e-05,
      "loss": 0.4094,
      "step": 10734
    },
    {
      "epoch": 2.2068043992188304,
      "grad_norm": 0.22444140911102295,
      "learning_rate": 1.550725634410917e-05,
      "loss": 0.4009,
      "step": 10735
    },
    {
      "epoch": 2.207009970192209,
      "grad_norm": 0.13065902888774872,
      "learning_rate": 1.549971318408897e-05,
      "loss": 0.4443,
      "step": 10736
    },
    {
      "epoch": 2.2072155411655876,
      "grad_norm": 0.12475431710481644,
      "learning_rate": 1.5492171477410013e-05,
      "loss": 0.4383,
      "step": 10737
    },
    {
      "epoch": 2.2074211121389657,
      "grad_norm": 0.23084284365177155,
      "learning_rate": 1.5484631224443852e-05,
      "loss": 0.4043,
      "step": 10738
    },
    {
      "epoch": 2.2076266831123443,
      "grad_norm": 0.12472715973854065,
      "learning_rate": 1.5477092425561953e-05,
      "loss": 0.4307,
      "step": 10739
    },
    {
      "epoch": 2.207832254085723,
      "grad_norm": 0.1253010481595993,
      "learning_rate": 1.546955508113571e-05,
      "loss": 0.4488,
      "step": 10740
    },
    {
      "epoch": 2.2080378250591015,
      "grad_norm": 0.12054693698883057,
      "learning_rate": 1.5462019191536478e-05,
      "loss": 0.4402,
      "step": 10741
    },
    {
      "epoch": 2.20824339603248,
      "grad_norm": 0.2258850783109665,
      "learning_rate": 1.5454484757135496e-05,
      "loss": 0.3804,
      "step": 10742
    },
    {
      "epoch": 2.2084489670058587,
      "grad_norm": 0.23322363197803497,
      "learning_rate": 1.5446951778303958e-05,
      "loss": 0.4058,
      "step": 10743
    },
    {
      "epoch": 2.2086545379792373,
      "grad_norm": 0.23911800980567932,
      "learning_rate": 1.543942025541297e-05,
      "loss": 0.3821,
      "step": 10744
    },
    {
      "epoch": 2.208860108952616,
      "grad_norm": 0.22474057972431183,
      "learning_rate": 1.5431890188833585e-05,
      "loss": 0.3981,
      "step": 10745
    },
    {
      "epoch": 2.2090656799259945,
      "grad_norm": 0.22120480239391327,
      "learning_rate": 1.5424361578936754e-05,
      "loss": 0.4036,
      "step": 10746
    },
    {
      "epoch": 2.209271250899373,
      "grad_norm": 0.23113922774791718,
      "learning_rate": 1.5416834426093406e-05,
      "loss": 0.3996,
      "step": 10747
    },
    {
      "epoch": 2.2094768218727516,
      "grad_norm": 0.23626331984996796,
      "learning_rate": 1.5409308730674354e-05,
      "loss": 0.409,
      "step": 10748
    },
    {
      "epoch": 2.20968239284613,
      "grad_norm": 0.22344759106636047,
      "learning_rate": 1.540178449305036e-05,
      "loss": 0.3952,
      "step": 10749
    },
    {
      "epoch": 2.209887963819509,
      "grad_norm": 0.23070107400417328,
      "learning_rate": 1.5394261713592094e-05,
      "loss": 0.3839,
      "step": 10750
    },
    {
      "epoch": 2.2100935347928874,
      "grad_norm": 0.22357220947742462,
      "learning_rate": 1.5386740392670165e-05,
      "loss": 0.3963,
      "step": 10751
    },
    {
      "epoch": 2.210299105766266,
      "grad_norm": 0.2235075831413269,
      "learning_rate": 1.5379220530655138e-05,
      "loss": 0.3847,
      "step": 10752
    },
    {
      "epoch": 2.2105046767396446,
      "grad_norm": 0.2250668853521347,
      "learning_rate": 1.5371702127917458e-05,
      "loss": 0.3854,
      "step": 10753
    },
    {
      "epoch": 2.2107102477130227,
      "grad_norm": 0.230119988322258,
      "learning_rate": 1.5364185184827543e-05,
      "loss": 0.3914,
      "step": 10754
    },
    {
      "epoch": 2.2109158186864013,
      "grad_norm": 0.22010499238967896,
      "learning_rate": 1.5356669701755708e-05,
      "loss": 0.4028,
      "step": 10755
    },
    {
      "epoch": 2.21112138965978,
      "grad_norm": 0.22333703935146332,
      "learning_rate": 1.5349155679072205e-05,
      "loss": 0.385,
      "step": 10756
    },
    {
      "epoch": 2.2113269606331585,
      "grad_norm": 0.22866930067539215,
      "learning_rate": 1.534164311714721e-05,
      "loss": 0.4027,
      "step": 10757
    },
    {
      "epoch": 2.211532531606537,
      "grad_norm": 0.22447089850902557,
      "learning_rate": 1.533413201635084e-05,
      "loss": 0.4108,
      "step": 10758
    },
    {
      "epoch": 2.2117381025799157,
      "grad_norm": 0.23292423784732819,
      "learning_rate": 1.5326622377053125e-05,
      "loss": 0.4173,
      "step": 10759
    },
    {
      "epoch": 2.2119436735532942,
      "grad_norm": 0.23067182302474976,
      "learning_rate": 1.5319114199624018e-05,
      "loss": 0.3871,
      "step": 10760
    },
    {
      "epoch": 2.212149244526673,
      "grad_norm": 0.13341167569160461,
      "learning_rate": 1.5311607484433443e-05,
      "loss": 0.4604,
      "step": 10761
    },
    {
      "epoch": 2.2123548155000514,
      "grad_norm": 0.2339571863412857,
      "learning_rate": 1.53041022318512e-05,
      "loss": 0.3879,
      "step": 10762
    },
    {
      "epoch": 2.21256038647343,
      "grad_norm": 0.22482730448246002,
      "learning_rate": 1.5296598442247045e-05,
      "loss": 0.4002,
      "step": 10763
    },
    {
      "epoch": 2.2127659574468086,
      "grad_norm": 0.2297281175851822,
      "learning_rate": 1.5289096115990654e-05,
      "loss": 0.4032,
      "step": 10764
    },
    {
      "epoch": 2.212971528420187,
      "grad_norm": 0.12835589051246643,
      "learning_rate": 1.5281595253451624e-05,
      "loss": 0.4497,
      "step": 10765
    },
    {
      "epoch": 2.2131770993935658,
      "grad_norm": 0.23261982202529907,
      "learning_rate": 1.52740958549995e-05,
      "loss": 0.4021,
      "step": 10766
    },
    {
      "epoch": 2.2133826703669444,
      "grad_norm": 0.22967736423015594,
      "learning_rate": 1.526659792100371e-05,
      "loss": 0.3974,
      "step": 10767
    },
    {
      "epoch": 2.213588241340323,
      "grad_norm": 0.1222897469997406,
      "learning_rate": 1.5259101451833683e-05,
      "loss": 0.454,
      "step": 10768
    },
    {
      "epoch": 2.2137938123137015,
      "grad_norm": 0.22212044894695282,
      "learning_rate": 1.5251606447858725e-05,
      "loss": 0.3908,
      "step": 10769
    },
    {
      "epoch": 2.2139993832870797,
      "grad_norm": 0.23276306688785553,
      "learning_rate": 1.5244112909448069e-05,
      "loss": 0.3877,
      "step": 10770
    },
    {
      "epoch": 2.2142049542604583,
      "grad_norm": 0.12715481221675873,
      "learning_rate": 1.5236620836970893e-05,
      "loss": 0.4706,
      "step": 10771
    },
    {
      "epoch": 2.214410525233837,
      "grad_norm": 0.22773075103759766,
      "learning_rate": 1.5229130230796281e-05,
      "loss": 0.4008,
      "step": 10772
    },
    {
      "epoch": 2.2146160962072154,
      "grad_norm": 0.23511482775211334,
      "learning_rate": 1.5221641091293283e-05,
      "loss": 0.4078,
      "step": 10773
    },
    {
      "epoch": 2.214821667180594,
      "grad_norm": 0.21598058938980103,
      "learning_rate": 1.521415341883085e-05,
      "loss": 0.3908,
      "step": 10774
    },
    {
      "epoch": 2.2150272381539726,
      "grad_norm": 0.23073440790176392,
      "learning_rate": 1.5206667213777846e-05,
      "loss": 0.404,
      "step": 10775
    },
    {
      "epoch": 2.215232809127351,
      "grad_norm": 0.22900259494781494,
      "learning_rate": 1.5199182476503105e-05,
      "loss": 0.3845,
      "step": 10776
    },
    {
      "epoch": 2.21543838010073,
      "grad_norm": 0.26081186532974243,
      "learning_rate": 1.519169920737536e-05,
      "loss": 0.397,
      "step": 10777
    },
    {
      "epoch": 2.2156439510741084,
      "grad_norm": 0.2252834439277649,
      "learning_rate": 1.5184217406763266e-05,
      "loss": 0.3678,
      "step": 10778
    },
    {
      "epoch": 2.215849522047487,
      "grad_norm": 0.2190970927476883,
      "learning_rate": 1.5176737075035423e-05,
      "loss": 0.3733,
      "step": 10779
    },
    {
      "epoch": 2.2160550930208656,
      "grad_norm": 0.23575487732887268,
      "learning_rate": 1.5169258212560354e-05,
      "loss": 0.4151,
      "step": 10780
    },
    {
      "epoch": 2.216260663994244,
      "grad_norm": 0.22723565995693207,
      "learning_rate": 1.5161780819706485e-05,
      "loss": 0.382,
      "step": 10781
    },
    {
      "epoch": 2.2164662349676227,
      "grad_norm": 0.23032769560813904,
      "learning_rate": 1.5154304896842231e-05,
      "loss": 0.3863,
      "step": 10782
    },
    {
      "epoch": 2.2166718059410013,
      "grad_norm": 0.2345583289861679,
      "learning_rate": 1.5146830444335872e-05,
      "loss": 0.4049,
      "step": 10783
    },
    {
      "epoch": 2.21687737691438,
      "grad_norm": 0.22362026572227478,
      "learning_rate": 1.5139357462555645e-05,
      "loss": 0.3943,
      "step": 10784
    },
    {
      "epoch": 2.217082947887758,
      "grad_norm": 0.23059040307998657,
      "learning_rate": 1.513188595186971e-05,
      "loss": 0.4008,
      "step": 10785
    },
    {
      "epoch": 2.2172885188611366,
      "grad_norm": 0.12331248074769974,
      "learning_rate": 1.5124415912646149e-05,
      "loss": 0.4494,
      "step": 10786
    },
    {
      "epoch": 2.2174940898345152,
      "grad_norm": 0.23354892432689667,
      "learning_rate": 1.5116947345252977e-05,
      "loss": 0.4016,
      "step": 10787
    },
    {
      "epoch": 2.217699660807894,
      "grad_norm": 0.232215017080307,
      "learning_rate": 1.5109480250058124e-05,
      "loss": 0.403,
      "step": 10788
    },
    {
      "epoch": 2.2179052317812724,
      "grad_norm": 0.22965744137763977,
      "learning_rate": 1.5102014627429483e-05,
      "loss": 0.4111,
      "step": 10789
    },
    {
      "epoch": 2.218110802754651,
      "grad_norm": 0.22863295674324036,
      "learning_rate": 1.5094550477734838e-05,
      "loss": 0.395,
      "step": 10790
    },
    {
      "epoch": 2.2183163737280296,
      "grad_norm": 0.22686706483364105,
      "learning_rate": 1.5087087801341914e-05,
      "loss": 0.4058,
      "step": 10791
    },
    {
      "epoch": 2.218521944701408,
      "grad_norm": 0.2347644418478012,
      "learning_rate": 1.5079626598618362e-05,
      "loss": 0.3953,
      "step": 10792
    },
    {
      "epoch": 2.2187275156747868,
      "grad_norm": 0.23546837270259857,
      "learning_rate": 1.5072166869931748e-05,
      "loss": 0.4049,
      "step": 10793
    },
    {
      "epoch": 2.2189330866481654,
      "grad_norm": 0.12171991914510727,
      "learning_rate": 1.5064708615649601e-05,
      "loss": 0.4516,
      "step": 10794
    },
    {
      "epoch": 2.219138657621544,
      "grad_norm": 0.23397013545036316,
      "learning_rate": 1.5057251836139343e-05,
      "loss": 0.3816,
      "step": 10795
    },
    {
      "epoch": 2.2193442285949225,
      "grad_norm": 0.22694621980190277,
      "learning_rate": 1.5049796531768323e-05,
      "loss": 0.3838,
      "step": 10796
    },
    {
      "epoch": 2.219549799568301,
      "grad_norm": 0.234305739402771,
      "learning_rate": 1.5042342702903859e-05,
      "loss": 0.3874,
      "step": 10797
    },
    {
      "epoch": 2.2197553705416797,
      "grad_norm": 0.2361372858285904,
      "learning_rate": 1.5034890349913142e-05,
      "loss": 0.3964,
      "step": 10798
    },
    {
      "epoch": 2.2199609415150583,
      "grad_norm": 0.23526331782341003,
      "learning_rate": 1.502743947316332e-05,
      "loss": 0.3981,
      "step": 10799
    },
    {
      "epoch": 2.2201665124884364,
      "grad_norm": 0.23586028814315796,
      "learning_rate": 1.501999007302147e-05,
      "loss": 0.4084,
      "step": 10800
    },
    {
      "epoch": 2.220372083461815,
      "grad_norm": 0.2271769642829895,
      "learning_rate": 1.5012542149854576e-05,
      "loss": 0.3905,
      "step": 10801
    },
    {
      "epoch": 2.2205776544351936,
      "grad_norm": 0.22880828380584717,
      "learning_rate": 1.5005095704029562e-05,
      "loss": 0.3896,
      "step": 10802
    },
    {
      "epoch": 2.220783225408572,
      "grad_norm": 0.2337990701198578,
      "learning_rate": 1.4997650735913297e-05,
      "loss": 0.3984,
      "step": 10803
    },
    {
      "epoch": 2.220988796381951,
      "grad_norm": 0.2161635160446167,
      "learning_rate": 1.499020724587255e-05,
      "loss": 0.4006,
      "step": 10804
    },
    {
      "epoch": 2.2211943673553294,
      "grad_norm": 0.22818011045455933,
      "learning_rate": 1.4982765234274027e-05,
      "loss": 0.3912,
      "step": 10805
    },
    {
      "epoch": 2.221399938328708,
      "grad_norm": 0.22331209480762482,
      "learning_rate": 1.4975324701484358e-05,
      "loss": 0.4113,
      "step": 10806
    },
    {
      "epoch": 2.2216055093020866,
      "grad_norm": 0.21700911223888397,
      "learning_rate": 1.4967885647870107e-05,
      "loss": 0.3738,
      "step": 10807
    },
    {
      "epoch": 2.221811080275465,
      "grad_norm": 0.12261340767145157,
      "learning_rate": 1.4960448073797765e-05,
      "loss": 0.4559,
      "step": 10808
    },
    {
      "epoch": 2.2220166512488437,
      "grad_norm": 0.22570718824863434,
      "learning_rate": 1.4953011979633725e-05,
      "loss": 0.4089,
      "step": 10809
    },
    {
      "epoch": 2.2222222222222223,
      "grad_norm": 0.22284522652626038,
      "learning_rate": 1.4945577365744356e-05,
      "loss": 0.406,
      "step": 10810
    },
    {
      "epoch": 2.222427793195601,
      "grad_norm": 0.2190810590982437,
      "learning_rate": 1.4938144232495923e-05,
      "loss": 0.396,
      "step": 10811
    },
    {
      "epoch": 2.2226333641689795,
      "grad_norm": 0.2320832461118698,
      "learning_rate": 1.4930712580254612e-05,
      "loss": 0.4115,
      "step": 10812
    },
    {
      "epoch": 2.222838935142358,
      "grad_norm": 0.12574470043182373,
      "learning_rate": 1.4923282409386543e-05,
      "loss": 0.4488,
      "step": 10813
    },
    {
      "epoch": 2.2230445061157367,
      "grad_norm": 0.21672125160694122,
      "learning_rate": 1.4915853720257762e-05,
      "loss": 0.4069,
      "step": 10814
    },
    {
      "epoch": 2.223250077089115,
      "grad_norm": 0.2291223555803299,
      "learning_rate": 1.490842651323427e-05,
      "loss": 0.4088,
      "step": 10815
    },
    {
      "epoch": 2.2234556480624934,
      "grad_norm": 0.23085300624370575,
      "learning_rate": 1.4901000788681959e-05,
      "loss": 0.3894,
      "step": 10816
    },
    {
      "epoch": 2.223661219035872,
      "grad_norm": 0.11973418295383453,
      "learning_rate": 1.489357654696664e-05,
      "loss": 0.4637,
      "step": 10817
    },
    {
      "epoch": 2.2238667900092506,
      "grad_norm": 0.2691250741481781,
      "learning_rate": 1.4886153788454096e-05,
      "loss": 0.4024,
      "step": 10818
    },
    {
      "epoch": 2.224072360982629,
      "grad_norm": 0.12348726391792297,
      "learning_rate": 1.4878732513510012e-05,
      "loss": 0.4423,
      "step": 10819
    },
    {
      "epoch": 2.2242779319560078,
      "grad_norm": 0.1290557086467743,
      "learning_rate": 1.4871312722499987e-05,
      "loss": 0.4628,
      "step": 10820
    },
    {
      "epoch": 2.2244835029293863,
      "grad_norm": 0.2316775619983673,
      "learning_rate": 1.4863894415789562e-05,
      "loss": 0.3948,
      "step": 10821
    },
    {
      "epoch": 2.224689073902765,
      "grad_norm": 0.2387668341398239,
      "learning_rate": 1.4856477593744187e-05,
      "loss": 0.379,
      "step": 10822
    },
    {
      "epoch": 2.2248946448761435,
      "grad_norm": 0.22780825197696686,
      "learning_rate": 1.4849062256729289e-05,
      "loss": 0.3708,
      "step": 10823
    },
    {
      "epoch": 2.225100215849522,
      "grad_norm": 0.22622719407081604,
      "learning_rate": 1.484164840511017e-05,
      "loss": 0.3871,
      "step": 10824
    },
    {
      "epoch": 2.2253057868229007,
      "grad_norm": 0.22779934108257294,
      "learning_rate": 1.4834236039252069e-05,
      "loss": 0.3736,
      "step": 10825
    },
    {
      "epoch": 2.2255113577962793,
      "grad_norm": 0.22025705873966217,
      "learning_rate": 1.4826825159520165e-05,
      "loss": 0.3883,
      "step": 10826
    },
    {
      "epoch": 2.225716928769658,
      "grad_norm": 0.21935100853443146,
      "learning_rate": 1.481941576627956e-05,
      "loss": 0.3932,
      "step": 10827
    },
    {
      "epoch": 2.2259224997430365,
      "grad_norm": 0.11909017711877823,
      "learning_rate": 1.4812007859895275e-05,
      "loss": 0.4316,
      "step": 10828
    },
    {
      "epoch": 2.226128070716415,
      "grad_norm": 0.2229301780462265,
      "learning_rate": 1.4804601440732245e-05,
      "loss": 0.3889,
      "step": 10829
    },
    {
      "epoch": 2.226333641689793,
      "grad_norm": 0.2314000278711319,
      "learning_rate": 1.479719650915539e-05,
      "loss": 0.4042,
      "step": 10830
    },
    {
      "epoch": 2.226539212663172,
      "grad_norm": 0.23769402503967285,
      "learning_rate": 1.4789793065529492e-05,
      "loss": 0.4003,
      "step": 10831
    },
    {
      "epoch": 2.2267447836365504,
      "grad_norm": 0.2327127605676651,
      "learning_rate": 1.478239111021929e-05,
      "loss": 0.3853,
      "step": 10832
    },
    {
      "epoch": 2.226950354609929,
      "grad_norm": 0.23596766591072083,
      "learning_rate": 1.4774990643589441e-05,
      "loss": 0.4041,
      "step": 10833
    },
    {
      "epoch": 2.2271559255833075,
      "grad_norm": 0.22967597842216492,
      "learning_rate": 1.476759166600453e-05,
      "loss": 0.413,
      "step": 10834
    },
    {
      "epoch": 2.227361496556686,
      "grad_norm": 0.223694309592247,
      "learning_rate": 1.476019417782907e-05,
      "loss": 0.3922,
      "step": 10835
    },
    {
      "epoch": 2.2275670675300647,
      "grad_norm": 0.22924546897411346,
      "learning_rate": 1.4752798179427489e-05,
      "loss": 0.3925,
      "step": 10836
    },
    {
      "epoch": 2.2277726385034433,
      "grad_norm": 0.2322525531053543,
      "learning_rate": 1.474540367116418e-05,
      "loss": 0.4093,
      "step": 10837
    },
    {
      "epoch": 2.227978209476822,
      "grad_norm": 0.22837835550308228,
      "learning_rate": 1.4738010653403414e-05,
      "loss": 0.3959,
      "step": 10838
    },
    {
      "epoch": 2.2281837804502005,
      "grad_norm": 0.13115087151527405,
      "learning_rate": 1.4730619126509427e-05,
      "loss": 0.4592,
      "step": 10839
    },
    {
      "epoch": 2.228389351423579,
      "grad_norm": 0.24123218655586243,
      "learning_rate": 1.472322909084636e-05,
      "loss": 0.389,
      "step": 10840
    },
    {
      "epoch": 2.2285949223969577,
      "grad_norm": 0.24346770346164703,
      "learning_rate": 1.4715840546778284e-05,
      "loss": 0.419,
      "step": 10841
    },
    {
      "epoch": 2.2288004933703363,
      "grad_norm": 0.2285340279340744,
      "learning_rate": 1.4708453494669196e-05,
      "loss": 0.4022,
      "step": 10842
    },
    {
      "epoch": 2.229006064343715,
      "grad_norm": 0.22701993584632874,
      "learning_rate": 1.4701067934883007e-05,
      "loss": 0.3926,
      "step": 10843
    },
    {
      "epoch": 2.2292116353170934,
      "grad_norm": 0.2268943190574646,
      "learning_rate": 1.4693683867783597e-05,
      "loss": 0.3891,
      "step": 10844
    },
    {
      "epoch": 2.2294172062904716,
      "grad_norm": 0.23047508299350739,
      "learning_rate": 1.468630129373473e-05,
      "loss": 0.3973,
      "step": 10845
    },
    {
      "epoch": 2.22962277726385,
      "grad_norm": 0.2280137687921524,
      "learning_rate": 1.4678920213100116e-05,
      "loss": 0.3851,
      "step": 10846
    },
    {
      "epoch": 2.2298283482372288,
      "grad_norm": 0.2208314836025238,
      "learning_rate": 1.4671540626243379e-05,
      "loss": 0.3931,
      "step": 10847
    },
    {
      "epoch": 2.2300339192106073,
      "grad_norm": 0.23788389563560486,
      "learning_rate": 1.4664162533528081e-05,
      "loss": 0.4042,
      "step": 10848
    },
    {
      "epoch": 2.230239490183986,
      "grad_norm": 0.2255765050649643,
      "learning_rate": 1.4656785935317708e-05,
      "loss": 0.3875,
      "step": 10849
    },
    {
      "epoch": 2.2304450611573645,
      "grad_norm": 0.22221685945987701,
      "learning_rate": 1.4649410831975656e-05,
      "loss": 0.3858,
      "step": 10850
    },
    {
      "epoch": 2.230650632130743,
      "grad_norm": 0.22361934185028076,
      "learning_rate": 1.4642037223865281e-05,
      "loss": 0.3891,
      "step": 10851
    },
    {
      "epoch": 2.2308562031041217,
      "grad_norm": 0.12343227863311768,
      "learning_rate": 1.4634665111349843e-05,
      "loss": 0.482,
      "step": 10852
    },
    {
      "epoch": 2.2310617740775003,
      "grad_norm": 0.12411545217037201,
      "learning_rate": 1.462729449479253e-05,
      "loss": 0.4664,
      "step": 10853
    },
    {
      "epoch": 2.231267345050879,
      "grad_norm": 0.2260737121105194,
      "learning_rate": 1.4619925374556457e-05,
      "loss": 0.392,
      "step": 10854
    },
    {
      "epoch": 2.2314729160242575,
      "grad_norm": 0.2308768928050995,
      "learning_rate": 1.461255775100466e-05,
      "loss": 0.4033,
      "step": 10855
    },
    {
      "epoch": 2.231678486997636,
      "grad_norm": 0.12042105197906494,
      "learning_rate": 1.460519162450011e-05,
      "loss": 0.4485,
      "step": 10856
    },
    {
      "epoch": 2.2318840579710146,
      "grad_norm": 0.22707884013652802,
      "learning_rate": 1.4597826995405697e-05,
      "loss": 0.3747,
      "step": 10857
    },
    {
      "epoch": 2.2320896289443932,
      "grad_norm": 0.23044802248477936,
      "learning_rate": 1.4590463864084258e-05,
      "loss": 0.3896,
      "step": 10858
    },
    {
      "epoch": 2.232295199917772,
      "grad_norm": 0.2284078150987625,
      "learning_rate": 1.458310223089853e-05,
      "loss": 0.3806,
      "step": 10859
    },
    {
      "epoch": 2.23250077089115,
      "grad_norm": 0.12638430297374725,
      "learning_rate": 1.4575742096211172e-05,
      "loss": 0.4579,
      "step": 10860
    },
    {
      "epoch": 2.2327063418645285,
      "grad_norm": 0.12327645719051361,
      "learning_rate": 1.4568383460384815e-05,
      "loss": 0.4572,
      "step": 10861
    },
    {
      "epoch": 2.232911912837907,
      "grad_norm": 0.22871337831020355,
      "learning_rate": 1.4561026323781969e-05,
      "loss": 0.3938,
      "step": 10862
    },
    {
      "epoch": 2.2331174838112857,
      "grad_norm": 0.1175784319639206,
      "learning_rate": 1.4553670686765082e-05,
      "loss": 0.4228,
      "step": 10863
    },
    {
      "epoch": 2.2333230547846643,
      "grad_norm": 0.23156176507472992,
      "learning_rate": 1.4546316549696521e-05,
      "loss": 0.3983,
      "step": 10864
    },
    {
      "epoch": 2.233528625758043,
      "grad_norm": 0.22325018048286438,
      "learning_rate": 1.453896391293862e-05,
      "loss": 0.4036,
      "step": 10865
    },
    {
      "epoch": 2.2337341967314215,
      "grad_norm": 0.2427932471036911,
      "learning_rate": 1.4531612776853592e-05,
      "loss": 0.3779,
      "step": 10866
    },
    {
      "epoch": 2.2339397677048,
      "grad_norm": 0.12050554901361465,
      "learning_rate": 1.452426314180359e-05,
      "loss": 0.4408,
      "step": 10867
    },
    {
      "epoch": 2.2341453386781787,
      "grad_norm": 0.2303098738193512,
      "learning_rate": 1.4516915008150703e-05,
      "loss": 0.3944,
      "step": 10868
    },
    {
      "epoch": 2.2343509096515572,
      "grad_norm": 0.22475799918174744,
      "learning_rate": 1.4509568376256933e-05,
      "loss": 0.3911,
      "step": 10869
    },
    {
      "epoch": 2.234556480624936,
      "grad_norm": 0.12232775241136551,
      "learning_rate": 1.4502223246484222e-05,
      "loss": 0.4503,
      "step": 10870
    },
    {
      "epoch": 2.2347620515983144,
      "grad_norm": 0.23218752443790436,
      "learning_rate": 1.4494879619194408e-05,
      "loss": 0.3916,
      "step": 10871
    },
    {
      "epoch": 2.234967622571693,
      "grad_norm": 0.22913837432861328,
      "learning_rate": 1.4487537494749308e-05,
      "loss": 0.3967,
      "step": 10872
    },
    {
      "epoch": 2.2351731935450716,
      "grad_norm": 0.22640950977802277,
      "learning_rate": 1.4480196873510623e-05,
      "loss": 0.3938,
      "step": 10873
    },
    {
      "epoch": 2.23537876451845,
      "grad_norm": 0.22983142733573914,
      "learning_rate": 1.4472857755839987e-05,
      "loss": 0.3957,
      "step": 10874
    },
    {
      "epoch": 2.2355843354918283,
      "grad_norm": 0.13250325620174408,
      "learning_rate": 1.4465520142098968e-05,
      "loss": 0.4521,
      "step": 10875
    },
    {
      "epoch": 2.235789906465207,
      "grad_norm": 0.12669454514980316,
      "learning_rate": 1.4458184032649049e-05,
      "loss": 0.4651,
      "step": 10876
    },
    {
      "epoch": 2.2359954774385855,
      "grad_norm": 0.22359710931777954,
      "learning_rate": 1.4450849427851654e-05,
      "loss": 0.3771,
      "step": 10877
    },
    {
      "epoch": 2.236201048411964,
      "grad_norm": 0.22868263721466064,
      "learning_rate": 1.4443516328068107e-05,
      "loss": 0.3723,
      "step": 10878
    },
    {
      "epoch": 2.2364066193853427,
      "grad_norm": 0.2262980043888092,
      "learning_rate": 1.4436184733659704e-05,
      "loss": 0.3886,
      "step": 10879
    },
    {
      "epoch": 2.2366121903587213,
      "grad_norm": 0.22829292714595795,
      "learning_rate": 1.4428854644987623e-05,
      "loss": 0.3879,
      "step": 10880
    },
    {
      "epoch": 2.2368177613321,
      "grad_norm": 0.22236782312393188,
      "learning_rate": 1.4421526062412972e-05,
      "loss": 0.3716,
      "step": 10881
    },
    {
      "epoch": 2.2370233323054785,
      "grad_norm": 0.2244395762681961,
      "learning_rate": 1.4414198986296825e-05,
      "loss": 0.3716,
      "step": 10882
    },
    {
      "epoch": 2.237228903278857,
      "grad_norm": 0.23614956438541412,
      "learning_rate": 1.4406873417000133e-05,
      "loss": 0.4046,
      "step": 10883
    },
    {
      "epoch": 2.2374344742522356,
      "grad_norm": 0.23262259364128113,
      "learning_rate": 1.4399549354883795e-05,
      "loss": 0.392,
      "step": 10884
    },
    {
      "epoch": 2.237640045225614,
      "grad_norm": 0.23623405396938324,
      "learning_rate": 1.439222680030862e-05,
      "loss": 0.4101,
      "step": 10885
    },
    {
      "epoch": 2.237845616198993,
      "grad_norm": 0.12626418471336365,
      "learning_rate": 1.4384905753635388e-05,
      "loss": 0.436,
      "step": 10886
    },
    {
      "epoch": 2.2380511871723714,
      "grad_norm": 0.2217606157064438,
      "learning_rate": 1.437758621522475e-05,
      "loss": 0.3971,
      "step": 10887
    },
    {
      "epoch": 2.23825675814575,
      "grad_norm": 0.22895729541778564,
      "learning_rate": 1.4370268185437314e-05,
      "loss": 0.4164,
      "step": 10888
    },
    {
      "epoch": 2.2384623291191286,
      "grad_norm": 0.26154306530952454,
      "learning_rate": 1.4362951664633601e-05,
      "loss": 0.411,
      "step": 10889
    },
    {
      "epoch": 2.2386679000925067,
      "grad_norm": 0.12071531265974045,
      "learning_rate": 1.4355636653174064e-05,
      "loss": 0.46,
      "step": 10890
    },
    {
      "epoch": 2.2388734710658853,
      "grad_norm": 0.23138496279716492,
      "learning_rate": 1.4348323151419076e-05,
      "loss": 0.3929,
      "step": 10891
    },
    {
      "epoch": 2.239079042039264,
      "grad_norm": 0.22143509984016418,
      "learning_rate": 1.4341011159728923e-05,
      "loss": 0.3937,
      "step": 10892
    },
    {
      "epoch": 2.2392846130126425,
      "grad_norm": 0.23120230436325073,
      "learning_rate": 1.433370067846387e-05,
      "loss": 0.4061,
      "step": 10893
    },
    {
      "epoch": 2.239490183986021,
      "grad_norm": 0.22361977398395538,
      "learning_rate": 1.4326391707984047e-05,
      "loss": 0.3993,
      "step": 10894
    },
    {
      "epoch": 2.2396957549593997,
      "grad_norm": 0.1270783543586731,
      "learning_rate": 1.431908424864954e-05,
      "loss": 0.424,
      "step": 10895
    },
    {
      "epoch": 2.2399013259327782,
      "grad_norm": 0.22819988429546356,
      "learning_rate": 1.4311778300820347e-05,
      "loss": 0.4009,
      "step": 10896
    },
    {
      "epoch": 2.240106896906157,
      "grad_norm": 0.22298060357570648,
      "learning_rate": 1.4304473864856404e-05,
      "loss": 0.3959,
      "step": 10897
    },
    {
      "epoch": 2.2403124678795354,
      "grad_norm": 0.22824987769126892,
      "learning_rate": 1.4297170941117544e-05,
      "loss": 0.4174,
      "step": 10898
    },
    {
      "epoch": 2.240518038852914,
      "grad_norm": 0.1287529617547989,
      "learning_rate": 1.4289869529963582e-05,
      "loss": 0.4321,
      "step": 10899
    },
    {
      "epoch": 2.2407236098262926,
      "grad_norm": 0.2339385449886322,
      "learning_rate": 1.428256963175421e-05,
      "loss": 0.4036,
      "step": 10900
    },
    {
      "epoch": 2.240929180799671,
      "grad_norm": 0.22810976207256317,
      "learning_rate": 1.4275271246849061e-05,
      "loss": 0.4073,
      "step": 10901
    },
    {
      "epoch": 2.2411347517730498,
      "grad_norm": 0.22102433443069458,
      "learning_rate": 1.4267974375607675e-05,
      "loss": 0.3761,
      "step": 10902
    },
    {
      "epoch": 2.2413403227464284,
      "grad_norm": 0.2228943556547165,
      "learning_rate": 1.4260679018389566e-05,
      "loss": 0.3958,
      "step": 10903
    },
    {
      "epoch": 2.241545893719807,
      "grad_norm": 0.22356650233268738,
      "learning_rate": 1.4253385175554126e-05,
      "loss": 0.3841,
      "step": 10904
    },
    {
      "epoch": 2.241751464693185,
      "grad_norm": 0.1219724789261818,
      "learning_rate": 1.4246092847460679e-05,
      "loss": 0.4373,
      "step": 10905
    },
    {
      "epoch": 2.2419570356665637,
      "grad_norm": 0.22389782965183258,
      "learning_rate": 1.42388020344685e-05,
      "loss": 0.3908,
      "step": 10906
    },
    {
      "epoch": 2.2421626066399423,
      "grad_norm": 0.22778619825839996,
      "learning_rate": 1.4231512736936774e-05,
      "loss": 0.4086,
      "step": 10907
    },
    {
      "epoch": 2.242368177613321,
      "grad_norm": 0.24095553159713745,
      "learning_rate": 1.4224224955224604e-05,
      "loss": 0.3859,
      "step": 10908
    },
    {
      "epoch": 2.2425737485866994,
      "grad_norm": 0.2397175282239914,
      "learning_rate": 1.4216938689691019e-05,
      "loss": 0.4006,
      "step": 10909
    },
    {
      "epoch": 2.242779319560078,
      "grad_norm": 0.22254031896591187,
      "learning_rate": 1.4209653940694986e-05,
      "loss": 0.4021,
      "step": 10910
    },
    {
      "epoch": 2.2429848905334566,
      "grad_norm": 0.12882784008979797,
      "learning_rate": 1.4202370708595396e-05,
      "loss": 0.4369,
      "step": 10911
    },
    {
      "epoch": 2.243190461506835,
      "grad_norm": 0.13095501065254211,
      "learning_rate": 1.4195088993751034e-05,
      "loss": 0.4539,
      "step": 10912
    },
    {
      "epoch": 2.243396032480214,
      "grad_norm": 0.2357592135667801,
      "learning_rate": 1.418780879652067e-05,
      "loss": 0.3915,
      "step": 10913
    },
    {
      "epoch": 2.2436016034535924,
      "grad_norm": 0.23308870196342468,
      "learning_rate": 1.4180530117262953e-05,
      "loss": 0.4003,
      "step": 10914
    },
    {
      "epoch": 2.243807174426971,
      "grad_norm": 0.22599655389785767,
      "learning_rate": 1.4173252956336463e-05,
      "loss": 0.3978,
      "step": 10915
    },
    {
      "epoch": 2.2440127454003496,
      "grad_norm": 0.23513002693653107,
      "learning_rate": 1.416597731409972e-05,
      "loss": 0.3943,
      "step": 10916
    },
    {
      "epoch": 2.244218316373728,
      "grad_norm": 0.1267446130514145,
      "learning_rate": 1.4158703190911157e-05,
      "loss": 0.4464,
      "step": 10917
    },
    {
      "epoch": 2.2444238873471067,
      "grad_norm": 0.22103582322597504,
      "learning_rate": 1.4151430587129133e-05,
      "loss": 0.3842,
      "step": 10918
    },
    {
      "epoch": 2.2446294583204853,
      "grad_norm": 0.2322588562965393,
      "learning_rate": 1.4144159503111928e-05,
      "loss": 0.4096,
      "step": 10919
    },
    {
      "epoch": 2.2448350292938635,
      "grad_norm": 0.1323188990354538,
      "learning_rate": 1.4136889939217776e-05,
      "loss": 0.4459,
      "step": 10920
    },
    {
      "epoch": 2.245040600267242,
      "grad_norm": 0.2242937535047531,
      "learning_rate": 1.41296218958048e-05,
      "loss": 0.3859,
      "step": 10921
    },
    {
      "epoch": 2.2452461712406206,
      "grad_norm": 0.22466784715652466,
      "learning_rate": 1.4122355373231073e-05,
      "loss": 0.3982,
      "step": 10922
    },
    {
      "epoch": 2.2454517422139992,
      "grad_norm": 0.22480922937393188,
      "learning_rate": 1.411509037185457e-05,
      "loss": 0.4073,
      "step": 10923
    },
    {
      "epoch": 2.245657313187378,
      "grad_norm": 0.12106183916330338,
      "learning_rate": 1.4107826892033194e-05,
      "loss": 0.4505,
      "step": 10924
    },
    {
      "epoch": 2.2458628841607564,
      "grad_norm": 0.2291100174188614,
      "learning_rate": 1.4100564934124812e-05,
      "loss": 0.3902,
      "step": 10925
    },
    {
      "epoch": 2.246068455134135,
      "grad_norm": 0.22419095039367676,
      "learning_rate": 1.409330449848716e-05,
      "loss": 0.3931,
      "step": 10926
    },
    {
      "epoch": 2.2462740261075136,
      "grad_norm": 0.22613660991191864,
      "learning_rate": 1.4086045585477947e-05,
      "loss": 0.3922,
      "step": 10927
    },
    {
      "epoch": 2.246479597080892,
      "grad_norm": 0.22982370853424072,
      "learning_rate": 1.407878819545478e-05,
      "loss": 0.399,
      "step": 10928
    },
    {
      "epoch": 2.2466851680542708,
      "grad_norm": 0.23034709692001343,
      "learning_rate": 1.4071532328775196e-05,
      "loss": 0.3812,
      "step": 10929
    },
    {
      "epoch": 2.2468907390276494,
      "grad_norm": 0.23110920190811157,
      "learning_rate": 1.4064277985796652e-05,
      "loss": 0.389,
      "step": 10930
    },
    {
      "epoch": 2.247096310001028,
      "grad_norm": 0.2307683825492859,
      "learning_rate": 1.4057025166876537e-05,
      "loss": 0.4113,
      "step": 10931
    },
    {
      "epoch": 2.2473018809744065,
      "grad_norm": 0.23556135594844818,
      "learning_rate": 1.4049773872372172e-05,
      "loss": 0.3884,
      "step": 10932
    },
    {
      "epoch": 2.247507451947785,
      "grad_norm": 0.230165496468544,
      "learning_rate": 1.4042524102640763e-05,
      "loss": 0.3956,
      "step": 10933
    },
    {
      "epoch": 2.2477130229211637,
      "grad_norm": 0.22927415370941162,
      "learning_rate": 1.4035275858039516e-05,
      "loss": 0.3868,
      "step": 10934
    },
    {
      "epoch": 2.2479185938945423,
      "grad_norm": 0.22793439030647278,
      "learning_rate": 1.4028029138925497e-05,
      "loss": 0.3894,
      "step": 10935
    },
    {
      "epoch": 2.248124164867921,
      "grad_norm": 0.2283446490764618,
      "learning_rate": 1.4020783945655724e-05,
      "loss": 0.3903,
      "step": 10936
    },
    {
      "epoch": 2.248329735841299,
      "grad_norm": 0.22100144624710083,
      "learning_rate": 1.4013540278587125e-05,
      "loss": 0.3942,
      "step": 10937
    },
    {
      "epoch": 2.2485353068146776,
      "grad_norm": 0.12830045819282532,
      "learning_rate": 1.4006298138076567e-05,
      "loss": 0.4512,
      "step": 10938
    },
    {
      "epoch": 2.248740877788056,
      "grad_norm": 0.2236565202474594,
      "learning_rate": 1.3999057524480838e-05,
      "loss": 0.4032,
      "step": 10939
    },
    {
      "epoch": 2.248946448761435,
      "grad_norm": 0.22065366804599762,
      "learning_rate": 1.3991818438156628e-05,
      "loss": 0.3844,
      "step": 10940
    },
    {
      "epoch": 2.2491520197348134,
      "grad_norm": 0.12815195322036743,
      "learning_rate": 1.3984580879460613e-05,
      "loss": 0.4361,
      "step": 10941
    },
    {
      "epoch": 2.249357590708192,
      "grad_norm": 0.23110713064670563,
      "learning_rate": 1.3977344848749327e-05,
      "loss": 0.3976,
      "step": 10942
    },
    {
      "epoch": 2.2495631616815706,
      "grad_norm": 0.23048558831214905,
      "learning_rate": 1.3970110346379258e-05,
      "loss": 0.3893,
      "step": 10943
    },
    {
      "epoch": 2.249768732654949,
      "grad_norm": 0.12720687687397003,
      "learning_rate": 1.3962877372706823e-05,
      "loss": 0.4534,
      "step": 10944
    },
    {
      "epoch": 2.2499743036283277,
      "grad_norm": 0.2292504608631134,
      "learning_rate": 1.3955645928088343e-05,
      "loss": 0.4032,
      "step": 10945
    },
    {
      "epoch": 2.2501798746017063,
      "grad_norm": 0.26804453134536743,
      "learning_rate": 1.3948416012880095e-05,
      "loss": 0.3896,
      "step": 10946
    },
    {
      "epoch": 2.250385445575085,
      "grad_norm": 0.24208854138851166,
      "learning_rate": 1.3941187627438255e-05,
      "loss": 0.4036,
      "step": 10947
    },
    {
      "epoch": 2.2505910165484635,
      "grad_norm": 0.21898695826530457,
      "learning_rate": 1.393396077211892e-05,
      "loss": 0.3847,
      "step": 10948
    },
    {
      "epoch": 2.250796587521842,
      "grad_norm": 0.24147653579711914,
      "learning_rate": 1.3926735447278149e-05,
      "loss": 0.399,
      "step": 10949
    },
    {
      "epoch": 2.2510021584952202,
      "grad_norm": 0.21761365234851837,
      "learning_rate": 1.3919511653271885e-05,
      "loss": 0.3977,
      "step": 10950
    },
    {
      "epoch": 2.2512077294685993,
      "grad_norm": 0.23133422434329987,
      "learning_rate": 1.3912289390456018e-05,
      "loss": 0.3832,
      "step": 10951
    },
    {
      "epoch": 2.2514133004419774,
      "grad_norm": 0.23142319917678833,
      "learning_rate": 1.3905068659186345e-05,
      "loss": 0.4152,
      "step": 10952
    },
    {
      "epoch": 2.251618871415356,
      "grad_norm": 0.21739207208156586,
      "learning_rate": 1.3897849459818602e-05,
      "loss": 0.3866,
      "step": 10953
    },
    {
      "epoch": 2.2518244423887346,
      "grad_norm": 0.2368880808353424,
      "learning_rate": 1.389063179270843e-05,
      "loss": 0.3975,
      "step": 10954
    },
    {
      "epoch": 2.252030013362113,
      "grad_norm": 0.22230856120586395,
      "learning_rate": 1.3883415658211439e-05,
      "loss": 0.3897,
      "step": 10955
    },
    {
      "epoch": 2.2522355843354918,
      "grad_norm": 0.2135685384273529,
      "learning_rate": 1.387620105668312e-05,
      "loss": 0.3953,
      "step": 10956
    },
    {
      "epoch": 2.2524411553088703,
      "grad_norm": 0.22502809762954712,
      "learning_rate": 1.3868987988478905e-05,
      "loss": 0.3849,
      "step": 10957
    },
    {
      "epoch": 2.252646726282249,
      "grad_norm": 0.12617872655391693,
      "learning_rate": 1.3861776453954141e-05,
      "loss": 0.4533,
      "step": 10958
    },
    {
      "epoch": 2.2528522972556275,
      "grad_norm": 0.12221905589103699,
      "learning_rate": 1.3854566453464114e-05,
      "loss": 0.4514,
      "step": 10959
    },
    {
      "epoch": 2.253057868229006,
      "grad_norm": 0.22371545433998108,
      "learning_rate": 1.3847357987364026e-05,
      "loss": 0.4013,
      "step": 10960
    },
    {
      "epoch": 2.2532634392023847,
      "grad_norm": 0.22430896759033203,
      "learning_rate": 1.3840151056008989e-05,
      "loss": 0.3826,
      "step": 10961
    },
    {
      "epoch": 2.2534690101757633,
      "grad_norm": 0.2251027673482895,
      "learning_rate": 1.3832945659754084e-05,
      "loss": 0.39,
      "step": 10962
    },
    {
      "epoch": 2.253674581149142,
      "grad_norm": 0.21788759529590607,
      "learning_rate": 1.3825741798954265e-05,
      "loss": 0.3945,
      "step": 10963
    },
    {
      "epoch": 2.2538801521225205,
      "grad_norm": 0.2384837120771408,
      "learning_rate": 1.3818539473964443e-05,
      "loss": 0.3972,
      "step": 10964
    },
    {
      "epoch": 2.254085723095899,
      "grad_norm": 0.2365540862083435,
      "learning_rate": 1.381133868513944e-05,
      "loss": 0.4051,
      "step": 10965
    },
    {
      "epoch": 2.2542912940692776,
      "grad_norm": 0.22459320724010468,
      "learning_rate": 1.3804139432833994e-05,
      "loss": 0.3933,
      "step": 10966
    },
    {
      "epoch": 2.254496865042656,
      "grad_norm": 0.2330470085144043,
      "learning_rate": 1.3796941717402797e-05,
      "loss": 0.4029,
      "step": 10967
    },
    {
      "epoch": 2.2547024360160344,
      "grad_norm": 0.2302565574645996,
      "learning_rate": 1.3789745539200443e-05,
      "loss": 0.3685,
      "step": 10968
    },
    {
      "epoch": 2.254908006989413,
      "grad_norm": 0.12435781210660934,
      "learning_rate": 1.3782550898581435e-05,
      "loss": 0.465,
      "step": 10969
    },
    {
      "epoch": 2.2551135779627915,
      "grad_norm": 0.22399941086769104,
      "learning_rate": 1.377535779590025e-05,
      "loss": 0.3946,
      "step": 10970
    },
    {
      "epoch": 2.25531914893617,
      "grad_norm": 0.2299404740333557,
      "learning_rate": 1.3768166231511242e-05,
      "loss": 0.3981,
      "step": 10971
    },
    {
      "epoch": 2.2555247199095487,
      "grad_norm": 0.22755853831768036,
      "learning_rate": 1.3760976205768704e-05,
      "loss": 0.4128,
      "step": 10972
    },
    {
      "epoch": 2.2557302908829273,
      "grad_norm": 0.23051007091999054,
      "learning_rate": 1.3753787719026858e-05,
      "loss": 0.4034,
      "step": 10973
    },
    {
      "epoch": 2.255935861856306,
      "grad_norm": 0.11795416474342346,
      "learning_rate": 1.3746600771639847e-05,
      "loss": 0.4349,
      "step": 10974
    },
    {
      "epoch": 2.2561414328296845,
      "grad_norm": 0.22369509935379028,
      "learning_rate": 1.3739415363961725e-05,
      "loss": 0.3958,
      "step": 10975
    },
    {
      "epoch": 2.256347003803063,
      "grad_norm": 0.224918395280838,
      "learning_rate": 1.3732231496346506e-05,
      "loss": 0.4054,
      "step": 10976
    },
    {
      "epoch": 2.2565525747764417,
      "grad_norm": 0.22502835094928741,
      "learning_rate": 1.3725049169148101e-05,
      "loss": 0.3986,
      "step": 10977
    },
    {
      "epoch": 2.2567581457498203,
      "grad_norm": 0.2298583686351776,
      "learning_rate": 1.3717868382720342e-05,
      "loss": 0.4023,
      "step": 10978
    },
    {
      "epoch": 2.256963716723199,
      "grad_norm": 0.2239440232515335,
      "learning_rate": 1.3710689137417002e-05,
      "loss": 0.3776,
      "step": 10979
    },
    {
      "epoch": 2.2571692876965774,
      "grad_norm": 0.12783947587013245,
      "learning_rate": 1.3703511433591756e-05,
      "loss": 0.4592,
      "step": 10980
    },
    {
      "epoch": 2.257374858669956,
      "grad_norm": 0.23055274784564972,
      "learning_rate": 1.3696335271598206e-05,
      "loss": 0.3805,
      "step": 10981
    },
    {
      "epoch": 2.257580429643334,
      "grad_norm": 0.22777009010314941,
      "learning_rate": 1.3689160651789923e-05,
      "loss": 0.3927,
      "step": 10982
    },
    {
      "epoch": 2.2577860006167128,
      "grad_norm": 0.2232956886291504,
      "learning_rate": 1.3681987574520346e-05,
      "loss": 0.3783,
      "step": 10983
    },
    {
      "epoch": 2.2579915715900913,
      "grad_norm": 0.2353593409061432,
      "learning_rate": 1.3674816040142864e-05,
      "loss": 0.4053,
      "step": 10984
    },
    {
      "epoch": 2.25819714256347,
      "grad_norm": 0.12569645047187805,
      "learning_rate": 1.3667646049010782e-05,
      "loss": 0.4533,
      "step": 10985
    },
    {
      "epoch": 2.2584027135368485,
      "grad_norm": 0.22515416145324707,
      "learning_rate": 1.3660477601477328e-05,
      "loss": 0.3757,
      "step": 10986
    },
    {
      "epoch": 2.258608284510227,
      "grad_norm": 0.13127067685127258,
      "learning_rate": 1.3653310697895652e-05,
      "loss": 0.4595,
      "step": 10987
    },
    {
      "epoch": 2.2588138554836057,
      "grad_norm": 0.22975093126296997,
      "learning_rate": 1.3646145338618855e-05,
      "loss": 0.3877,
      "step": 10988
    },
    {
      "epoch": 2.2590194264569843,
      "grad_norm": 0.22624441981315613,
      "learning_rate": 1.3638981523999929e-05,
      "loss": 0.379,
      "step": 10989
    },
    {
      "epoch": 2.259224997430363,
      "grad_norm": 0.12386941909790039,
      "learning_rate": 1.3631819254391793e-05,
      "loss": 0.4457,
      "step": 10990
    },
    {
      "epoch": 2.2594305684037415,
      "grad_norm": 0.2416963428258896,
      "learning_rate": 1.3624658530147319e-05,
      "loss": 0.3763,
      "step": 10991
    },
    {
      "epoch": 2.25963613937712,
      "grad_norm": 0.22425812482833862,
      "learning_rate": 1.3617499351619269e-05,
      "loss": 0.3828,
      "step": 10992
    },
    {
      "epoch": 2.2598417103504986,
      "grad_norm": 0.13300848007202148,
      "learning_rate": 1.3610341719160347e-05,
      "loss": 0.4532,
      "step": 10993
    },
    {
      "epoch": 2.260047281323877,
      "grad_norm": 0.22609826922416687,
      "learning_rate": 1.3603185633123177e-05,
      "loss": 0.3796,
      "step": 10994
    },
    {
      "epoch": 2.260252852297256,
      "grad_norm": 0.22295403480529785,
      "learning_rate": 1.3596031093860283e-05,
      "loss": 0.4128,
      "step": 10995
    },
    {
      "epoch": 2.2604584232706344,
      "grad_norm": 0.22617916762828827,
      "learning_rate": 1.3588878101724169e-05,
      "loss": 0.4004,
      "step": 10996
    },
    {
      "epoch": 2.2606639942440125,
      "grad_norm": 0.23671671748161316,
      "learning_rate": 1.3581726657067217e-05,
      "loss": 0.3947,
      "step": 10997
    },
    {
      "epoch": 2.260869565217391,
      "grad_norm": 0.2252146303653717,
      "learning_rate": 1.357457676024175e-05,
      "loss": 0.3923,
      "step": 10998
    },
    {
      "epoch": 2.2610751361907697,
      "grad_norm": 0.2305798977613449,
      "learning_rate": 1.3567428411599997e-05,
      "loss": 0.4119,
      "step": 10999
    },
    {
      "epoch": 2.2612807071641483,
      "grad_norm": 0.23965519666671753,
      "learning_rate": 1.3560281611494131e-05,
      "loss": 0.3992,
      "step": 11000
    },
    {
      "epoch": 2.261486278137527,
      "grad_norm": 0.22159597277641296,
      "learning_rate": 1.355313636027624e-05,
      "loss": 0.3947,
      "step": 11001
    },
    {
      "epoch": 2.2616918491109055,
      "grad_norm": 0.23163023591041565,
      "learning_rate": 1.3545992658298328e-05,
      "loss": 0.3794,
      "step": 11002
    },
    {
      "epoch": 2.261897420084284,
      "grad_norm": 0.2376321256160736,
      "learning_rate": 1.3538850505912354e-05,
      "loss": 0.3868,
      "step": 11003
    },
    {
      "epoch": 2.2621029910576627,
      "grad_norm": 0.22760237753391266,
      "learning_rate": 1.3531709903470169e-05,
      "loss": 0.3917,
      "step": 11004
    },
    {
      "epoch": 2.2623085620310412,
      "grad_norm": 0.22676926851272583,
      "learning_rate": 1.3524570851323556e-05,
      "loss": 0.3942,
      "step": 11005
    },
    {
      "epoch": 2.26251413300442,
      "grad_norm": 0.22704067826271057,
      "learning_rate": 1.351743334982422e-05,
      "loss": 0.3709,
      "step": 11006
    },
    {
      "epoch": 2.2627197039777984,
      "grad_norm": 0.24701926112174988,
      "learning_rate": 1.3510297399323792e-05,
      "loss": 0.3939,
      "step": 11007
    },
    {
      "epoch": 2.262925274951177,
      "grad_norm": 0.2252301573753357,
      "learning_rate": 1.3503163000173827e-05,
      "loss": 0.373,
      "step": 11008
    },
    {
      "epoch": 2.2631308459245556,
      "grad_norm": 0.2303270697593689,
      "learning_rate": 1.3496030152725793e-05,
      "loss": 0.4049,
      "step": 11009
    },
    {
      "epoch": 2.263336416897934,
      "grad_norm": 0.22634254395961761,
      "learning_rate": 1.3488898857331116e-05,
      "loss": 0.3793,
      "step": 11010
    },
    {
      "epoch": 2.2635419878713128,
      "grad_norm": 0.231819748878479,
      "learning_rate": 1.3481769114341098e-05,
      "loss": 0.3854,
      "step": 11011
    },
    {
      "epoch": 2.263747558844691,
      "grad_norm": 0.12441035360097885,
      "learning_rate": 1.3474640924107014e-05,
      "loss": 0.4482,
      "step": 11012
    },
    {
      "epoch": 2.2639531298180695,
      "grad_norm": 0.23297782242298126,
      "learning_rate": 1.3467514286980024e-05,
      "loss": 0.3978,
      "step": 11013
    },
    {
      "epoch": 2.264158700791448,
      "grad_norm": 0.23407147824764252,
      "learning_rate": 1.346038920331122e-05,
      "loss": 0.3915,
      "step": 11014
    },
    {
      "epoch": 2.2643642717648267,
      "grad_norm": 0.22615815699100494,
      "learning_rate": 1.3453265673451623e-05,
      "loss": 0.3919,
      "step": 11015
    },
    {
      "epoch": 2.2645698427382053,
      "grad_norm": 0.23967291414737701,
      "learning_rate": 1.3446143697752166e-05,
      "loss": 0.3988,
      "step": 11016
    },
    {
      "epoch": 2.264775413711584,
      "grad_norm": 0.2341252863407135,
      "learning_rate": 1.3439023276563739e-05,
      "loss": 0.363,
      "step": 11017
    },
    {
      "epoch": 2.2649809846849625,
      "grad_norm": 0.22647178173065186,
      "learning_rate": 1.3431904410237122e-05,
      "loss": 0.3922,
      "step": 11018
    },
    {
      "epoch": 2.265186555658341,
      "grad_norm": 0.2393738180398941,
      "learning_rate": 1.3424787099123023e-05,
      "loss": 0.3874,
      "step": 11019
    },
    {
      "epoch": 2.2653921266317196,
      "grad_norm": 0.23167793452739716,
      "learning_rate": 1.3417671343572087e-05,
      "loss": 0.3921,
      "step": 11020
    },
    {
      "epoch": 2.265597697605098,
      "grad_norm": 0.2206806093454361,
      "learning_rate": 1.3410557143934864e-05,
      "loss": 0.3988,
      "step": 11021
    },
    {
      "epoch": 2.265803268578477,
      "grad_norm": 0.22465433180332184,
      "learning_rate": 1.340344450056184e-05,
      "loss": 0.3896,
      "step": 11022
    },
    {
      "epoch": 2.2660088395518554,
      "grad_norm": 0.22498202323913574,
      "learning_rate": 1.3396333413803412e-05,
      "loss": 0.3902,
      "step": 11023
    },
    {
      "epoch": 2.266214410525234,
      "grad_norm": 0.23176932334899902,
      "learning_rate": 1.3389223884009937e-05,
      "loss": 0.4043,
      "step": 11024
    },
    {
      "epoch": 2.2664199814986126,
      "grad_norm": 0.22066771984100342,
      "learning_rate": 1.3382115911531653e-05,
      "loss": 0.3588,
      "step": 11025
    },
    {
      "epoch": 2.266625552471991,
      "grad_norm": 0.23479969799518585,
      "learning_rate": 1.3375009496718729e-05,
      "loss": 0.4034,
      "step": 11026
    },
    {
      "epoch": 2.2668311234453693,
      "grad_norm": 0.21714085340499878,
      "learning_rate": 1.336790463992128e-05,
      "loss": 0.4034,
      "step": 11027
    },
    {
      "epoch": 2.267036694418748,
      "grad_norm": 0.22929847240447998,
      "learning_rate": 1.336080134148932e-05,
      "loss": 0.4047,
      "step": 11028
    },
    {
      "epoch": 2.2672422653921265,
      "grad_norm": 0.23881329596042633,
      "learning_rate": 1.3353699601772797e-05,
      "loss": 0.3813,
      "step": 11029
    },
    {
      "epoch": 2.267447836365505,
      "grad_norm": 0.22318050265312195,
      "learning_rate": 1.3346599421121562e-05,
      "loss": 0.4027,
      "step": 11030
    },
    {
      "epoch": 2.2676534073388837,
      "grad_norm": 0.21505969762802124,
      "learning_rate": 1.3339500799885443e-05,
      "loss": 0.3957,
      "step": 11031
    },
    {
      "epoch": 2.2678589783122622,
      "grad_norm": 0.22498784959316254,
      "learning_rate": 1.3332403738414138e-05,
      "loss": 0.3994,
      "step": 11032
    },
    {
      "epoch": 2.268064549285641,
      "grad_norm": 0.23193588852882385,
      "learning_rate": 1.3325308237057274e-05,
      "loss": 0.3767,
      "step": 11033
    },
    {
      "epoch": 2.2682701202590194,
      "grad_norm": 0.2315264791250229,
      "learning_rate": 1.3318214296164444e-05,
      "loss": 0.4012,
      "step": 11034
    },
    {
      "epoch": 2.268475691232398,
      "grad_norm": 0.2320316731929779,
      "learning_rate": 1.3311121916085105e-05,
      "loss": 0.3979,
      "step": 11035
    },
    {
      "epoch": 2.2686812622057766,
      "grad_norm": 0.22784501314163208,
      "learning_rate": 1.3304031097168684e-05,
      "loss": 0.3942,
      "step": 11036
    },
    {
      "epoch": 2.268886833179155,
      "grad_norm": 0.22963948547840118,
      "learning_rate": 1.329694183976449e-05,
      "loss": 0.3872,
      "step": 11037
    },
    {
      "epoch": 2.2690924041525338,
      "grad_norm": 0.2397637516260147,
      "learning_rate": 1.32898541442218e-05,
      "loss": 0.4042,
      "step": 11038
    },
    {
      "epoch": 2.2692979751259124,
      "grad_norm": 0.22877174615859985,
      "learning_rate": 1.3282768010889788e-05,
      "loss": 0.39,
      "step": 11039
    },
    {
      "epoch": 2.269503546099291,
      "grad_norm": 0.21806636452674866,
      "learning_rate": 1.3275683440117551e-05,
      "loss": 0.3721,
      "step": 11040
    },
    {
      "epoch": 2.2697091170726695,
      "grad_norm": 0.22859534621238708,
      "learning_rate": 1.3268600432254108e-05,
      "loss": 0.4001,
      "step": 11041
    },
    {
      "epoch": 2.2699146880460477,
      "grad_norm": 0.22555097937583923,
      "learning_rate": 1.3261518987648413e-05,
      "loss": 0.3969,
      "step": 11042
    },
    {
      "epoch": 2.2701202590194263,
      "grad_norm": 0.22480298578739166,
      "learning_rate": 1.3254439106649332e-05,
      "loss": 0.3929,
      "step": 11043
    },
    {
      "epoch": 2.270325829992805,
      "grad_norm": 0.13393786549568176,
      "learning_rate": 1.324736078960564e-05,
      "loss": 0.4585,
      "step": 11044
    },
    {
      "epoch": 2.2705314009661834,
      "grad_norm": 0.22970856726169586,
      "learning_rate": 1.324028403686609e-05,
      "loss": 0.4069,
      "step": 11045
    },
    {
      "epoch": 2.270736971939562,
      "grad_norm": 0.22466929256916046,
      "learning_rate": 1.3233208848779298e-05,
      "loss": 0.3929,
      "step": 11046
    },
    {
      "epoch": 2.2709425429129406,
      "grad_norm": 0.12328503280878067,
      "learning_rate": 1.3226135225693829e-05,
      "loss": 0.4301,
      "step": 11047
    },
    {
      "epoch": 2.271148113886319,
      "grad_norm": 0.2344934195280075,
      "learning_rate": 1.3219063167958165e-05,
      "loss": 0.3806,
      "step": 11048
    },
    {
      "epoch": 2.271353684859698,
      "grad_norm": 0.23457783460617065,
      "learning_rate": 1.3211992675920716e-05,
      "loss": 0.3918,
      "step": 11049
    },
    {
      "epoch": 2.2715592558330764,
      "grad_norm": 0.12788406014442444,
      "learning_rate": 1.3204923749929811e-05,
      "loss": 0.4623,
      "step": 11050
    },
    {
      "epoch": 2.271764826806455,
      "grad_norm": 0.12366097420454025,
      "learning_rate": 1.319785639033369e-05,
      "loss": 0.431,
      "step": 11051
    },
    {
      "epoch": 2.2719703977798336,
      "grad_norm": 0.22478674352169037,
      "learning_rate": 1.3190790597480558e-05,
      "loss": 0.4044,
      "step": 11052
    },
    {
      "epoch": 2.272175968753212,
      "grad_norm": 0.2239609956741333,
      "learning_rate": 1.3183726371718493e-05,
      "loss": 0.3959,
      "step": 11053
    },
    {
      "epoch": 2.2723815397265907,
      "grad_norm": 0.22685250639915466,
      "learning_rate": 1.3176663713395506e-05,
      "loss": 0.4002,
      "step": 11054
    },
    {
      "epoch": 2.2725871106999693,
      "grad_norm": 0.2281496375799179,
      "learning_rate": 1.3169602622859576e-05,
      "loss": 0.3986,
      "step": 11055
    },
    {
      "epoch": 2.272792681673348,
      "grad_norm": 0.23187507688999176,
      "learning_rate": 1.3162543100458542e-05,
      "loss": 0.4239,
      "step": 11056
    },
    {
      "epoch": 2.272998252646726,
      "grad_norm": 0.2259424477815628,
      "learning_rate": 1.3155485146540192e-05,
      "loss": 0.381,
      "step": 11057
    },
    {
      "epoch": 2.273203823620105,
      "grad_norm": 0.23765668272972107,
      "learning_rate": 1.3148428761452263e-05,
      "loss": 0.4185,
      "step": 11058
    },
    {
      "epoch": 2.2734093945934832,
      "grad_norm": 0.23085662722587585,
      "learning_rate": 1.3141373945542375e-05,
      "loss": 0.4,
      "step": 11059
    },
    {
      "epoch": 2.273614965566862,
      "grad_norm": 0.22228921949863434,
      "learning_rate": 1.3134320699158083e-05,
      "loss": 0.3736,
      "step": 11060
    },
    {
      "epoch": 2.2738205365402404,
      "grad_norm": 0.21951285004615784,
      "learning_rate": 1.3127269022646872e-05,
      "loss": 0.3928,
      "step": 11061
    },
    {
      "epoch": 2.274026107513619,
      "grad_norm": 0.1213352307677269,
      "learning_rate": 1.3120218916356144e-05,
      "loss": 0.4417,
      "step": 11062
    },
    {
      "epoch": 2.2742316784869976,
      "grad_norm": 0.23710954189300537,
      "learning_rate": 1.3113170380633223e-05,
      "loss": 0.3963,
      "step": 11063
    },
    {
      "epoch": 2.274437249460376,
      "grad_norm": 0.23138689994812012,
      "learning_rate": 1.310612341582535e-05,
      "loss": 0.3926,
      "step": 11064
    },
    {
      "epoch": 2.2746428204337548,
      "grad_norm": 0.12516102194786072,
      "learning_rate": 1.309907802227971e-05,
      "loss": 0.4632,
      "step": 11065
    },
    {
      "epoch": 2.2748483914071334,
      "grad_norm": 0.1229373887181282,
      "learning_rate": 1.3092034200343395e-05,
      "loss": 0.4587,
      "step": 11066
    },
    {
      "epoch": 2.275053962380512,
      "grad_norm": 0.12089274078607559,
      "learning_rate": 1.308499195036342e-05,
      "loss": 0.4485,
      "step": 11067
    },
    {
      "epoch": 2.2752595333538905,
      "grad_norm": 0.23402529954910278,
      "learning_rate": 1.3077951272686716e-05,
      "loss": 0.4031,
      "step": 11068
    },
    {
      "epoch": 2.275465104327269,
      "grad_norm": 0.12246517091989517,
      "learning_rate": 1.3070912167660153e-05,
      "loss": 0.4518,
      "step": 11069
    },
    {
      "epoch": 2.2756706753006477,
      "grad_norm": 0.22479888796806335,
      "learning_rate": 1.3063874635630514e-05,
      "loss": 0.4006,
      "step": 11070
    },
    {
      "epoch": 2.2758762462740263,
      "grad_norm": 0.2248338758945465,
      "learning_rate": 1.3056838676944483e-05,
      "loss": 0.3937,
      "step": 11071
    },
    {
      "epoch": 2.2760818172474044,
      "grad_norm": 0.23100706934928894,
      "learning_rate": 1.3049804291948727e-05,
      "loss": 0.3983,
      "step": 11072
    },
    {
      "epoch": 2.2762873882207835,
      "grad_norm": 0.23669414222240448,
      "learning_rate": 1.3042771480989777e-05,
      "loss": 0.4027,
      "step": 11073
    },
    {
      "epoch": 2.2764929591941616,
      "grad_norm": 0.1265943944454193,
      "learning_rate": 1.303574024441411e-05,
      "loss": 0.4579,
      "step": 11074
    },
    {
      "epoch": 2.27669853016754,
      "grad_norm": 0.23661333322525024,
      "learning_rate": 1.3028710582568104e-05,
      "loss": 0.3944,
      "step": 11075
    },
    {
      "epoch": 2.276904101140919,
      "grad_norm": 0.1238350123167038,
      "learning_rate": 1.3021682495798108e-05,
      "loss": 0.4527,
      "step": 11076
    },
    {
      "epoch": 2.2771096721142974,
      "grad_norm": 0.23075202107429504,
      "learning_rate": 1.3014655984450351e-05,
      "loss": 0.4139,
      "step": 11077
    },
    {
      "epoch": 2.277315243087676,
      "grad_norm": 0.23109117150306702,
      "learning_rate": 1.300763104887098e-05,
      "loss": 0.3795,
      "step": 11078
    },
    {
      "epoch": 2.2775208140610546,
      "grad_norm": 0.13491906225681305,
      "learning_rate": 1.300060768940611e-05,
      "loss": 0.4503,
      "step": 11079
    },
    {
      "epoch": 2.277726385034433,
      "grad_norm": 0.22590011358261108,
      "learning_rate": 1.2993585906401735e-05,
      "loss": 0.3878,
      "step": 11080
    },
    {
      "epoch": 2.2779319560078117,
      "grad_norm": 0.23638883233070374,
      "learning_rate": 1.2986565700203778e-05,
      "loss": 0.3989,
      "step": 11081
    },
    {
      "epoch": 2.2781375269811903,
      "grad_norm": 0.2324167639017105,
      "learning_rate": 1.2979547071158106e-05,
      "loss": 0.3983,
      "step": 11082
    },
    {
      "epoch": 2.278343097954569,
      "grad_norm": 0.22499267756938934,
      "learning_rate": 1.2972530019610482e-05,
      "loss": 0.3917,
      "step": 11083
    },
    {
      "epoch": 2.2785486689279475,
      "grad_norm": 0.23397715389728546,
      "learning_rate": 1.2965514545906612e-05,
      "loss": 0.4039,
      "step": 11084
    },
    {
      "epoch": 2.278754239901326,
      "grad_norm": 0.12136294692754745,
      "learning_rate": 1.2958500650392098e-05,
      "loss": 0.4592,
      "step": 11085
    },
    {
      "epoch": 2.2789598108747047,
      "grad_norm": 0.23275341093540192,
      "learning_rate": 1.2951488333412505e-05,
      "loss": 0.3907,
      "step": 11086
    },
    {
      "epoch": 2.279165381848083,
      "grad_norm": 0.23098520934581757,
      "learning_rate": 1.294447759531329e-05,
      "loss": 0.3933,
      "step": 11087
    },
    {
      "epoch": 2.279370952821462,
      "grad_norm": 0.2239454835653305,
      "learning_rate": 1.2937468436439835e-05,
      "loss": 0.3851,
      "step": 11088
    },
    {
      "epoch": 2.27957652379484,
      "grad_norm": 0.23332616686820984,
      "learning_rate": 1.2930460857137452e-05,
      "loss": 0.4186,
      "step": 11089
    },
    {
      "epoch": 2.2797820947682186,
      "grad_norm": 0.22289900481700897,
      "learning_rate": 1.2923454857751368e-05,
      "loss": 0.3918,
      "step": 11090
    },
    {
      "epoch": 2.279987665741597,
      "grad_norm": 0.11850762367248535,
      "learning_rate": 1.2916450438626742e-05,
      "loss": 0.4475,
      "step": 11091
    },
    {
      "epoch": 2.2801932367149758,
      "grad_norm": 0.22523003816604614,
      "learning_rate": 1.2909447600108626e-05,
      "loss": 0.3886,
      "step": 11092
    },
    {
      "epoch": 2.2803988076883543,
      "grad_norm": 0.23885266482830048,
      "learning_rate": 1.2902446342542053e-05,
      "loss": 0.4051,
      "step": 11093
    },
    {
      "epoch": 2.280604378661733,
      "grad_norm": 0.2248595505952835,
      "learning_rate": 1.2895446666271926e-05,
      "loss": 0.3843,
      "step": 11094
    },
    {
      "epoch": 2.2808099496351115,
      "grad_norm": 0.23855264484882355,
      "learning_rate": 1.2888448571643081e-05,
      "loss": 0.3936,
      "step": 11095
    },
    {
      "epoch": 2.28101552060849,
      "grad_norm": 0.2420293390750885,
      "learning_rate": 1.2881452059000287e-05,
      "loss": 0.3967,
      "step": 11096
    },
    {
      "epoch": 2.2812210915818687,
      "grad_norm": 0.22361691296100616,
      "learning_rate": 1.2874457128688216e-05,
      "loss": 0.3815,
      "step": 11097
    },
    {
      "epoch": 2.2814266625552473,
      "grad_norm": 0.13447174429893494,
      "learning_rate": 1.28674637810515e-05,
      "loss": 0.4621,
      "step": 11098
    },
    {
      "epoch": 2.281632233528626,
      "grad_norm": 0.23001371324062347,
      "learning_rate": 1.2860472016434645e-05,
      "loss": 0.3698,
      "step": 11099
    },
    {
      "epoch": 2.2818378045020045,
      "grad_norm": 0.2274404913187027,
      "learning_rate": 1.2853481835182129e-05,
      "loss": 0.3959,
      "step": 11100
    },
    {
      "epoch": 2.282043375475383,
      "grad_norm": 0.23622088134288788,
      "learning_rate": 1.2846493237638308e-05,
      "loss": 0.4038,
      "step": 11101
    },
    {
      "epoch": 2.282248946448761,
      "grad_norm": 0.11896710842847824,
      "learning_rate": 1.283950622414748e-05,
      "loss": 0.4503,
      "step": 11102
    },
    {
      "epoch": 2.2824545174221402,
      "grad_norm": 0.23470290005207062,
      "learning_rate": 1.2832520795053865e-05,
      "loss": 0.3857,
      "step": 11103
    },
    {
      "epoch": 2.2826600883955184,
      "grad_norm": 0.2171606570482254,
      "learning_rate": 1.2825536950701594e-05,
      "loss": 0.4002,
      "step": 11104
    },
    {
      "epoch": 2.282865659368897,
      "grad_norm": 0.23823009431362152,
      "learning_rate": 1.281855469143474e-05,
      "loss": 0.3899,
      "step": 11105
    },
    {
      "epoch": 2.2830712303422755,
      "grad_norm": 0.22637523710727692,
      "learning_rate": 1.2811574017597265e-05,
      "loss": 0.3961,
      "step": 11106
    },
    {
      "epoch": 2.283276801315654,
      "grad_norm": 0.23832228779792786,
      "learning_rate": 1.2804594929533107e-05,
      "loss": 0.4002,
      "step": 11107
    },
    {
      "epoch": 2.2834823722890327,
      "grad_norm": 0.22340717911720276,
      "learning_rate": 1.2797617427586071e-05,
      "loss": 0.3843,
      "step": 11108
    },
    {
      "epoch": 2.2836879432624113,
      "grad_norm": 0.2311078906059265,
      "learning_rate": 1.2790641512099914e-05,
      "loss": 0.3848,
      "step": 11109
    },
    {
      "epoch": 2.28389351423579,
      "grad_norm": 0.1308235377073288,
      "learning_rate": 1.2783667183418299e-05,
      "loss": 0.4372,
      "step": 11110
    },
    {
      "epoch": 2.2840990852091685,
      "grad_norm": 0.22774946689605713,
      "learning_rate": 1.2776694441884828e-05,
      "loss": 0.4162,
      "step": 11111
    },
    {
      "epoch": 2.284304656182547,
      "grad_norm": 0.23029407858848572,
      "learning_rate": 1.2769723287843009e-05,
      "loss": 0.4024,
      "step": 11112
    },
    {
      "epoch": 2.2845102271559257,
      "grad_norm": 0.126814067363739,
      "learning_rate": 1.2762753721636263e-05,
      "loss": 0.4453,
      "step": 11113
    },
    {
      "epoch": 2.2847157981293043,
      "grad_norm": 0.1285434365272522,
      "learning_rate": 1.2755785743607981e-05,
      "loss": 0.4571,
      "step": 11114
    },
    {
      "epoch": 2.284921369102683,
      "grad_norm": 0.22413338720798492,
      "learning_rate": 1.2748819354101428e-05,
      "loss": 0.4142,
      "step": 11115
    },
    {
      "epoch": 2.2851269400760614,
      "grad_norm": 0.2274656891822815,
      "learning_rate": 1.2741854553459801e-05,
      "loss": 0.3934,
      "step": 11116
    },
    {
      "epoch": 2.2853325110494396,
      "grad_norm": 0.2260764241218567,
      "learning_rate": 1.2734891342026228e-05,
      "loss": 0.3912,
      "step": 11117
    },
    {
      "epoch": 2.2855380820228186,
      "grad_norm": 0.24936430156230927,
      "learning_rate": 1.2727929720143737e-05,
      "loss": 0.3797,
      "step": 11118
    },
    {
      "epoch": 2.2857436529961968,
      "grad_norm": 0.12210172414779663,
      "learning_rate": 1.2720969688155326e-05,
      "loss": 0.4556,
      "step": 11119
    },
    {
      "epoch": 2.2859492239695753,
      "grad_norm": 0.23101243376731873,
      "learning_rate": 1.2714011246403862e-05,
      "loss": 0.3901,
      "step": 11120
    },
    {
      "epoch": 2.286154794942954,
      "grad_norm": 0.22702264785766602,
      "learning_rate": 1.2707054395232148e-05,
      "loss": 0.4061,
      "step": 11121
    },
    {
      "epoch": 2.2863603659163325,
      "grad_norm": 0.12117066979408264,
      "learning_rate": 1.270009913498294e-05,
      "loss": 0.4418,
      "step": 11122
    },
    {
      "epoch": 2.286565936889711,
      "grad_norm": 0.12678340077400208,
      "learning_rate": 1.2693145465998878e-05,
      "loss": 0.462,
      "step": 11123
    },
    {
      "epoch": 2.2867715078630897,
      "grad_norm": 0.1255645453929901,
      "learning_rate": 1.2686193388622541e-05,
      "loss": 0.4692,
      "step": 11124
    },
    {
      "epoch": 2.2869770788364683,
      "grad_norm": 0.2327447086572647,
      "learning_rate": 1.2679242903196418e-05,
      "loss": 0.4108,
      "step": 11125
    },
    {
      "epoch": 2.287182649809847,
      "grad_norm": 0.23680876195430756,
      "learning_rate": 1.267229401006293e-05,
      "loss": 0.3892,
      "step": 11126
    },
    {
      "epoch": 2.2873882207832255,
      "grad_norm": 0.22818145155906677,
      "learning_rate": 1.2665346709564407e-05,
      "loss": 0.4014,
      "step": 11127
    },
    {
      "epoch": 2.287593791756604,
      "grad_norm": 0.2357787936925888,
      "learning_rate": 1.2658401002043128e-05,
      "loss": 0.3958,
      "step": 11128
    },
    {
      "epoch": 2.2877993627299826,
      "grad_norm": 0.12954148650169373,
      "learning_rate": 1.2651456887841272e-05,
      "loss": 0.4567,
      "step": 11129
    },
    {
      "epoch": 2.288004933703361,
      "grad_norm": 0.23145915567874908,
      "learning_rate": 1.2644514367300932e-05,
      "loss": 0.4028,
      "step": 11130
    },
    {
      "epoch": 2.28821050467674,
      "grad_norm": 0.22589780390262604,
      "learning_rate": 1.2637573440764148e-05,
      "loss": 0.3977,
      "step": 11131
    },
    {
      "epoch": 2.2884160756501184,
      "grad_norm": 0.23484013974666595,
      "learning_rate": 1.2630634108572853e-05,
      "loss": 0.3964,
      "step": 11132
    },
    {
      "epoch": 2.288621646623497,
      "grad_norm": 0.23270565271377563,
      "learning_rate": 1.2623696371068912e-05,
      "loss": 0.3953,
      "step": 11133
    },
    {
      "epoch": 2.288827217596875,
      "grad_norm": 0.12677009403705597,
      "learning_rate": 1.2616760228594133e-05,
      "loss": 0.4461,
      "step": 11134
    },
    {
      "epoch": 2.2890327885702537,
      "grad_norm": 0.22877991199493408,
      "learning_rate": 1.2609825681490221e-05,
      "loss": 0.3859,
      "step": 11135
    },
    {
      "epoch": 2.2892383595436323,
      "grad_norm": 0.23278361558914185,
      "learning_rate": 1.260289273009881e-05,
      "loss": 0.3986,
      "step": 11136
    },
    {
      "epoch": 2.289443930517011,
      "grad_norm": 0.2246071696281433,
      "learning_rate": 1.2595961374761448e-05,
      "loss": 0.3715,
      "step": 11137
    },
    {
      "epoch": 2.2896495014903895,
      "grad_norm": 0.23304541409015656,
      "learning_rate": 1.2589031615819613e-05,
      "loss": 0.3874,
      "step": 11138
    },
    {
      "epoch": 2.289855072463768,
      "grad_norm": 0.2341768443584442,
      "learning_rate": 1.2582103453614684e-05,
      "loss": 0.3995,
      "step": 11139
    },
    {
      "epoch": 2.2900606434371467,
      "grad_norm": 0.22343499958515167,
      "learning_rate": 1.2575176888488016e-05,
      "loss": 0.3997,
      "step": 11140
    },
    {
      "epoch": 2.2902662144105252,
      "grad_norm": 0.22474630177021027,
      "learning_rate": 1.2568251920780829e-05,
      "loss": 0.4096,
      "step": 11141
    },
    {
      "epoch": 2.290471785383904,
      "grad_norm": 0.1266659051179886,
      "learning_rate": 1.2561328550834265e-05,
      "loss": 0.4552,
      "step": 11142
    },
    {
      "epoch": 2.2906773563572824,
      "grad_norm": 0.2366304099559784,
      "learning_rate": 1.2554406778989448e-05,
      "loss": 0.3886,
      "step": 11143
    },
    {
      "epoch": 2.290882927330661,
      "grad_norm": 0.23987746238708496,
      "learning_rate": 1.2547486605587354e-05,
      "loss": 0.4198,
      "step": 11144
    },
    {
      "epoch": 2.2910884983040396,
      "grad_norm": 0.12243471294641495,
      "learning_rate": 1.2540568030968911e-05,
      "loss": 0.4459,
      "step": 11145
    },
    {
      "epoch": 2.291294069277418,
      "grad_norm": 0.12086188048124313,
      "learning_rate": 1.2533651055474965e-05,
      "loss": 0.4536,
      "step": 11146
    },
    {
      "epoch": 2.2914996402507968,
      "grad_norm": 0.23374128341674805,
      "learning_rate": 1.2526735679446273e-05,
      "loss": 0.3984,
      "step": 11147
    },
    {
      "epoch": 2.2917052112241754,
      "grad_norm": 0.23066291213035583,
      "learning_rate": 1.2519821903223552e-05,
      "loss": 0.4043,
      "step": 11148
    },
    {
      "epoch": 2.2919107821975535,
      "grad_norm": 0.227426216006279,
      "learning_rate": 1.2512909727147388e-05,
      "loss": 0.4083,
      "step": 11149
    },
    {
      "epoch": 2.292116353170932,
      "grad_norm": 0.22349144518375397,
      "learning_rate": 1.2505999151558319e-05,
      "loss": 0.4062,
      "step": 11150
    },
    {
      "epoch": 2.2923219241443107,
      "grad_norm": 0.22015713155269623,
      "learning_rate": 1.2499090176796794e-05,
      "loss": 0.3929,
      "step": 11151
    },
    {
      "epoch": 2.2925274951176893,
      "grad_norm": 0.22965404391288757,
      "learning_rate": 1.2492182803203188e-05,
      "loss": 0.3723,
      "step": 11152
    },
    {
      "epoch": 2.292733066091068,
      "grad_norm": 0.22359246015548706,
      "learning_rate": 1.24852770311178e-05,
      "loss": 0.399,
      "step": 11153
    },
    {
      "epoch": 2.2929386370644464,
      "grad_norm": 0.2246733158826828,
      "learning_rate": 1.2478372860880819e-05,
      "loss": 0.4153,
      "step": 11154
    },
    {
      "epoch": 2.293144208037825,
      "grad_norm": 0.23003293573856354,
      "learning_rate": 1.2471470292832414e-05,
      "loss": 0.4202,
      "step": 11155
    },
    {
      "epoch": 2.2933497790112036,
      "grad_norm": 0.22609424591064453,
      "learning_rate": 1.2464569327312634e-05,
      "loss": 0.3861,
      "step": 11156
    },
    {
      "epoch": 2.293555349984582,
      "grad_norm": 0.233436718583107,
      "learning_rate": 1.2457669964661447e-05,
      "loss": 0.4113,
      "step": 11157
    },
    {
      "epoch": 2.293760920957961,
      "grad_norm": 0.2230585813522339,
      "learning_rate": 1.2450772205218768e-05,
      "loss": 0.3785,
      "step": 11158
    },
    {
      "epoch": 2.2939664919313394,
      "grad_norm": 0.13363520801067352,
      "learning_rate": 1.2443876049324401e-05,
      "loss": 0.4589,
      "step": 11159
    },
    {
      "epoch": 2.294172062904718,
      "grad_norm": 0.23311814665794373,
      "learning_rate": 1.2436981497318081e-05,
      "loss": 0.398,
      "step": 11160
    },
    {
      "epoch": 2.2943776338780966,
      "grad_norm": 0.23788057267665863,
      "learning_rate": 1.2430088549539498e-05,
      "loss": 0.3656,
      "step": 11161
    },
    {
      "epoch": 2.294583204851475,
      "grad_norm": 0.23247785866260529,
      "learning_rate": 1.2423197206328219e-05,
      "loss": 0.416,
      "step": 11162
    },
    {
      "epoch": 2.2947887758248537,
      "grad_norm": 0.12585797905921936,
      "learning_rate": 1.2416307468023738e-05,
      "loss": 0.4245,
      "step": 11163
    },
    {
      "epoch": 2.294994346798232,
      "grad_norm": 0.11753173917531967,
      "learning_rate": 1.2409419334965507e-05,
      "loss": 0.4366,
      "step": 11164
    },
    {
      "epoch": 2.2951999177716105,
      "grad_norm": 0.11819145828485489,
      "learning_rate": 1.2402532807492854e-05,
      "loss": 0.4381,
      "step": 11165
    },
    {
      "epoch": 2.295405488744989,
      "grad_norm": 0.2348855584859848,
      "learning_rate": 1.2395647885945055e-05,
      "loss": 0.3894,
      "step": 11166
    },
    {
      "epoch": 2.2956110597183677,
      "grad_norm": 0.11923953890800476,
      "learning_rate": 1.238876457066129e-05,
      "loss": 0.4363,
      "step": 11167
    },
    {
      "epoch": 2.2958166306917462,
      "grad_norm": 0.23349328339099884,
      "learning_rate": 1.2381882861980653e-05,
      "loss": 0.3905,
      "step": 11168
    },
    {
      "epoch": 2.296022201665125,
      "grad_norm": 0.2256205677986145,
      "learning_rate": 1.2375002760242207e-05,
      "loss": 0.385,
      "step": 11169
    },
    {
      "epoch": 2.2962277726385034,
      "grad_norm": 0.23128965497016907,
      "learning_rate": 1.2368124265784888e-05,
      "loss": 0.3942,
      "step": 11170
    },
    {
      "epoch": 2.296433343611882,
      "grad_norm": 0.12350024282932281,
      "learning_rate": 1.2361247378947561e-05,
      "loss": 0.4333,
      "step": 11171
    },
    {
      "epoch": 2.2966389145852606,
      "grad_norm": 0.23417676985263824,
      "learning_rate": 1.2354372100069026e-05,
      "loss": 0.3891,
      "step": 11172
    },
    {
      "epoch": 2.296844485558639,
      "grad_norm": 0.22731667757034302,
      "learning_rate": 1.2347498429487991e-05,
      "loss": 0.3977,
      "step": 11173
    },
    {
      "epoch": 2.2970500565320178,
      "grad_norm": 0.2296586036682129,
      "learning_rate": 1.2340626367543091e-05,
      "loss": 0.4054,
      "step": 11174
    },
    {
      "epoch": 2.2972556275053964,
      "grad_norm": 0.13354873657226562,
      "learning_rate": 1.2333755914572868e-05,
      "loss": 0.4622,
      "step": 11175
    },
    {
      "epoch": 2.297461198478775,
      "grad_norm": 0.22536778450012207,
      "learning_rate": 1.2326887070915823e-05,
      "loss": 0.3746,
      "step": 11176
    },
    {
      "epoch": 2.2976667694521535,
      "grad_norm": 0.22419311106204987,
      "learning_rate": 1.2320019836910335e-05,
      "loss": 0.4029,
      "step": 11177
    },
    {
      "epoch": 2.297872340425532,
      "grad_norm": 0.2210252434015274,
      "learning_rate": 1.231315421289473e-05,
      "loss": 0.3709,
      "step": 11178
    },
    {
      "epoch": 2.2980779113989103,
      "grad_norm": 0.22239845991134644,
      "learning_rate": 1.2306290199207233e-05,
      "loss": 0.3892,
      "step": 11179
    },
    {
      "epoch": 2.298283482372289,
      "grad_norm": 0.22236813604831696,
      "learning_rate": 1.2299427796186008e-05,
      "loss": 0.4075,
      "step": 11180
    },
    {
      "epoch": 2.2984890533456674,
      "grad_norm": 0.22609713673591614,
      "learning_rate": 1.229256700416914e-05,
      "loss": 0.3968,
      "step": 11181
    },
    {
      "epoch": 2.298694624319046,
      "grad_norm": 0.23106250166893005,
      "learning_rate": 1.2285707823494599e-05,
      "loss": 0.3792,
      "step": 11182
    },
    {
      "epoch": 2.2989001952924246,
      "grad_norm": 0.22286170721054077,
      "learning_rate": 1.2278850254500348e-05,
      "loss": 0.3835,
      "step": 11183
    },
    {
      "epoch": 2.299105766265803,
      "grad_norm": 0.229881152510643,
      "learning_rate": 1.227199429752419e-05,
      "loss": 0.3851,
      "step": 11184
    },
    {
      "epoch": 2.299311337239182,
      "grad_norm": 0.1258445382118225,
      "learning_rate": 1.2265139952903916e-05,
      "loss": 0.4364,
      "step": 11185
    },
    {
      "epoch": 2.2995169082125604,
      "grad_norm": 0.22773106396198273,
      "learning_rate": 1.2258287220977196e-05,
      "loss": 0.4042,
      "step": 11186
    },
    {
      "epoch": 2.299722479185939,
      "grad_norm": 0.22230634093284607,
      "learning_rate": 1.225143610208163e-05,
      "loss": 0.3832,
      "step": 11187
    },
    {
      "epoch": 2.2999280501593176,
      "grad_norm": 0.23126055300235748,
      "learning_rate": 1.2244586596554739e-05,
      "loss": 0.3922,
      "step": 11188
    },
    {
      "epoch": 2.300133621132696,
      "grad_norm": 0.2347308248281479,
      "learning_rate": 1.2237738704733954e-05,
      "loss": 0.3671,
      "step": 11189
    },
    {
      "epoch": 2.3003391921060747,
      "grad_norm": 0.12365079671144485,
      "learning_rate": 1.2230892426956669e-05,
      "loss": 0.4378,
      "step": 11190
    },
    {
      "epoch": 2.3005447630794533,
      "grad_norm": 0.22160682082176208,
      "learning_rate": 1.222404776356015e-05,
      "loss": 0.388,
      "step": 11191
    },
    {
      "epoch": 2.300750334052832,
      "grad_norm": 0.22561746835708618,
      "learning_rate": 1.2217204714881603e-05,
      "loss": 0.3529,
      "step": 11192
    },
    {
      "epoch": 2.3009559050262105,
      "grad_norm": 0.27136144042015076,
      "learning_rate": 1.2210363281258155e-05,
      "loss": 0.3885,
      "step": 11193
    },
    {
      "epoch": 2.3011614759995886,
      "grad_norm": 0.22475385665893555,
      "learning_rate": 1.220352346302685e-05,
      "loss": 0.3874,
      "step": 11194
    },
    {
      "epoch": 2.3013670469729672,
      "grad_norm": 0.23630446195602417,
      "learning_rate": 1.2196685260524648e-05,
      "loss": 0.3871,
      "step": 11195
    },
    {
      "epoch": 2.301572617946346,
      "grad_norm": 0.12092158198356628,
      "learning_rate": 1.2189848674088433e-05,
      "loss": 0.4375,
      "step": 11196
    },
    {
      "epoch": 2.3017781889197244,
      "grad_norm": 0.23177292943000793,
      "learning_rate": 1.2183013704055033e-05,
      "loss": 0.4025,
      "step": 11197
    },
    {
      "epoch": 2.301983759893103,
      "grad_norm": 0.12416423112154007,
      "learning_rate": 1.2176180350761157e-05,
      "loss": 0.4473,
      "step": 11198
    },
    {
      "epoch": 2.3021893308664816,
      "grad_norm": 0.12276289612054825,
      "learning_rate": 1.2169348614543464e-05,
      "loss": 0.4537,
      "step": 11199
    },
    {
      "epoch": 2.30239490183986,
      "grad_norm": 0.22835765779018402,
      "learning_rate": 1.216251849573851e-05,
      "loss": 0.3937,
      "step": 11200
    },
    {
      "epoch": 2.3026004728132388,
      "grad_norm": 0.22718718647956848,
      "learning_rate": 1.2155689994682788e-05,
      "loss": 0.3896,
      "step": 11201
    },
    {
      "epoch": 2.3028060437866174,
      "grad_norm": 0.1231781542301178,
      "learning_rate": 1.2148863111712704e-05,
      "loss": 0.447,
      "step": 11202
    },
    {
      "epoch": 2.303011614759996,
      "grad_norm": 0.23988062143325806,
      "learning_rate": 1.214203784716458e-05,
      "loss": 0.3919,
      "step": 11203
    },
    {
      "epoch": 2.3032171857333745,
      "grad_norm": 0.21849578619003296,
      "learning_rate": 1.2135214201374685e-05,
      "loss": 0.3758,
      "step": 11204
    },
    {
      "epoch": 2.303422756706753,
      "grad_norm": 0.2158803790807724,
      "learning_rate": 1.2128392174679179e-05,
      "loss": 0.3704,
      "step": 11205
    },
    {
      "epoch": 2.3036283276801317,
      "grad_norm": 0.22733426094055176,
      "learning_rate": 1.212157176741413e-05,
      "loss": 0.3694,
      "step": 11206
    },
    {
      "epoch": 2.3038338986535103,
      "grad_norm": 0.23298750817775726,
      "learning_rate": 1.2114752979915584e-05,
      "loss": 0.3798,
      "step": 11207
    },
    {
      "epoch": 2.304039469626889,
      "grad_norm": 0.22814899682998657,
      "learning_rate": 1.210793581251945e-05,
      "loss": 0.3811,
      "step": 11208
    },
    {
      "epoch": 2.304245040600267,
      "grad_norm": 0.23419663310050964,
      "learning_rate": 1.2101120265561585e-05,
      "loss": 0.3799,
      "step": 11209
    },
    {
      "epoch": 2.3044506115736456,
      "grad_norm": 0.12921544909477234,
      "learning_rate": 1.2094306339377743e-05,
      "loss": 0.4378,
      "step": 11210
    },
    {
      "epoch": 2.304656182547024,
      "grad_norm": 0.22787374258041382,
      "learning_rate": 1.208749403430364e-05,
      "loss": 0.4039,
      "step": 11211
    },
    {
      "epoch": 2.304861753520403,
      "grad_norm": 0.2288065403699875,
      "learning_rate": 1.2080683350674869e-05,
      "loss": 0.3922,
      "step": 11212
    },
    {
      "epoch": 2.3050673244937814,
      "grad_norm": 0.23211759328842163,
      "learning_rate": 1.2073874288826966e-05,
      "loss": 0.3804,
      "step": 11213
    },
    {
      "epoch": 2.30527289546716,
      "grad_norm": 0.23307380080223083,
      "learning_rate": 1.2067066849095386e-05,
      "loss": 0.3883,
      "step": 11214
    },
    {
      "epoch": 2.3054784664405386,
      "grad_norm": 0.22233398258686066,
      "learning_rate": 1.206026103181549e-05,
      "loss": 0.3948,
      "step": 11215
    },
    {
      "epoch": 2.305684037413917,
      "grad_norm": 0.22807008028030396,
      "learning_rate": 1.2053456837322557e-05,
      "loss": 0.396,
      "step": 11216
    },
    {
      "epoch": 2.3058896083872957,
      "grad_norm": 0.23228740692138672,
      "learning_rate": 1.204665426595183e-05,
      "loss": 0.4057,
      "step": 11217
    },
    {
      "epoch": 2.3060951793606743,
      "grad_norm": 0.2424495369195938,
      "learning_rate": 1.2039853318038428e-05,
      "loss": 0.4068,
      "step": 11218
    },
    {
      "epoch": 2.306300750334053,
      "grad_norm": 0.23171810805797577,
      "learning_rate": 1.2033053993917391e-05,
      "loss": 0.4152,
      "step": 11219
    },
    {
      "epoch": 2.3065063213074315,
      "grad_norm": 0.2335965633392334,
      "learning_rate": 1.2026256293923702e-05,
      "loss": 0.3733,
      "step": 11220
    },
    {
      "epoch": 2.30671189228081,
      "grad_norm": 0.12516964972019196,
      "learning_rate": 1.2019460218392243e-05,
      "loss": 0.4496,
      "step": 11221
    },
    {
      "epoch": 2.3069174632541887,
      "grad_norm": 0.2288234382867813,
      "learning_rate": 1.2012665767657825e-05,
      "loss": 0.3842,
      "step": 11222
    },
    {
      "epoch": 2.3071230342275673,
      "grad_norm": 0.23571978509426117,
      "learning_rate": 1.2005872942055177e-05,
      "loss": 0.4029,
      "step": 11223
    },
    {
      "epoch": 2.3073286052009454,
      "grad_norm": 0.23239515721797943,
      "learning_rate": 1.1999081741918965e-05,
      "loss": 0.4028,
      "step": 11224
    },
    {
      "epoch": 2.307534176174324,
      "grad_norm": 0.23048000037670135,
      "learning_rate": 1.1992292167583748e-05,
      "loss": 0.3883,
      "step": 11225
    },
    {
      "epoch": 2.3077397471477026,
      "grad_norm": 0.1262623518705368,
      "learning_rate": 1.198550421938402e-05,
      "loss": 0.4509,
      "step": 11226
    },
    {
      "epoch": 2.307945318121081,
      "grad_norm": 0.2399047166109085,
      "learning_rate": 1.1978717897654171e-05,
      "loss": 0.4162,
      "step": 11227
    },
    {
      "epoch": 2.3081508890944598,
      "grad_norm": 0.22697141766548157,
      "learning_rate": 1.197193320272857e-05,
      "loss": 0.3845,
      "step": 11228
    },
    {
      "epoch": 2.3083564600678383,
      "grad_norm": 0.2281046062707901,
      "learning_rate": 1.1965150134941447e-05,
      "loss": 0.3835,
      "step": 11229
    },
    {
      "epoch": 2.308562031041217,
      "grad_norm": 0.12404376268386841,
      "learning_rate": 1.1958368694626956e-05,
      "loss": 0.4376,
      "step": 11230
    },
    {
      "epoch": 2.3087676020145955,
      "grad_norm": 0.12131867557764053,
      "learning_rate": 1.195158888211922e-05,
      "loss": 0.4545,
      "step": 11231
    },
    {
      "epoch": 2.308973172987974,
      "grad_norm": 0.22881445288658142,
      "learning_rate": 1.194481069775223e-05,
      "loss": 0.4063,
      "step": 11232
    },
    {
      "epoch": 2.3091787439613527,
      "grad_norm": 0.22988468408584595,
      "learning_rate": 1.1938034141859915e-05,
      "loss": 0.4105,
      "step": 11233
    },
    {
      "epoch": 2.3093843149347313,
      "grad_norm": 0.23098687827587128,
      "learning_rate": 1.1931259214776129e-05,
      "loss": 0.3975,
      "step": 11234
    },
    {
      "epoch": 2.30958988590811,
      "grad_norm": 0.12407363951206207,
      "learning_rate": 1.1924485916834638e-05,
      "loss": 0.4472,
      "step": 11235
    },
    {
      "epoch": 2.3097954568814885,
      "grad_norm": 0.12328176200389862,
      "learning_rate": 1.1917714248369133e-05,
      "loss": 0.4449,
      "step": 11236
    },
    {
      "epoch": 2.310001027854867,
      "grad_norm": 0.22142189741134644,
      "learning_rate": 1.1910944209713205e-05,
      "loss": 0.3997,
      "step": 11237
    },
    {
      "epoch": 2.3102065988282456,
      "grad_norm": 0.2281443476676941,
      "learning_rate": 1.1904175801200417e-05,
      "loss": 0.3818,
      "step": 11238
    },
    {
      "epoch": 2.310412169801624,
      "grad_norm": 0.22729991376399994,
      "learning_rate": 1.1897409023164191e-05,
      "loss": 0.3928,
      "step": 11239
    },
    {
      "epoch": 2.310617740775003,
      "grad_norm": 0.12084699422121048,
      "learning_rate": 1.1890643875937904e-05,
      "loss": 0.4569,
      "step": 11240
    },
    {
      "epoch": 2.310823311748381,
      "grad_norm": 0.12548977136611938,
      "learning_rate": 1.1883880359854836e-05,
      "loss": 0.4437,
      "step": 11241
    },
    {
      "epoch": 2.3110288827217595,
      "grad_norm": 0.22213564813137054,
      "learning_rate": 1.1877118475248204e-05,
      "loss": 0.4011,
      "step": 11242
    },
    {
      "epoch": 2.311234453695138,
      "grad_norm": 0.2207585573196411,
      "learning_rate": 1.1870358222451127e-05,
      "loss": 0.4,
      "step": 11243
    },
    {
      "epoch": 2.3114400246685167,
      "grad_norm": 0.2309262752532959,
      "learning_rate": 1.1863599601796638e-05,
      "loss": 0.384,
      "step": 11244
    },
    {
      "epoch": 2.3116455956418953,
      "grad_norm": 0.22863119840621948,
      "learning_rate": 1.1856842613617734e-05,
      "loss": 0.3985,
      "step": 11245
    },
    {
      "epoch": 2.311851166615274,
      "grad_norm": 0.22216136753559113,
      "learning_rate": 1.1850087258247282e-05,
      "loss": 0.3878,
      "step": 11246
    },
    {
      "epoch": 2.3120567375886525,
      "grad_norm": 0.23234418034553528,
      "learning_rate": 1.1843333536018088e-05,
      "loss": 0.3844,
      "step": 11247
    },
    {
      "epoch": 2.312262308562031,
      "grad_norm": 0.22549466788768768,
      "learning_rate": 1.1836581447262865e-05,
      "loss": 0.3844,
      "step": 11248
    },
    {
      "epoch": 2.3124678795354097,
      "grad_norm": 0.2254628688097,
      "learning_rate": 1.1829830992314282e-05,
      "loss": 0.38,
      "step": 11249
    },
    {
      "epoch": 2.3126734505087883,
      "grad_norm": 0.23794369399547577,
      "learning_rate": 1.1823082171504888e-05,
      "loss": 0.38,
      "step": 11250
    },
    {
      "epoch": 2.312879021482167,
      "grad_norm": 0.1556072235107422,
      "learning_rate": 1.1816334985167152e-05,
      "loss": 0.4545,
      "step": 11251
    },
    {
      "epoch": 2.3130845924555454,
      "grad_norm": 0.23473793268203735,
      "learning_rate": 1.1809589433633507e-05,
      "loss": 0.4154,
      "step": 11252
    },
    {
      "epoch": 2.313290163428924,
      "grad_norm": 0.22591789066791534,
      "learning_rate": 1.1802845517236261e-05,
      "loss": 0.3782,
      "step": 11253
    },
    {
      "epoch": 2.313495734402302,
      "grad_norm": 0.22409707307815552,
      "learning_rate": 1.1796103236307647e-05,
      "loss": 0.3871,
      "step": 11254
    },
    {
      "epoch": 2.313701305375681,
      "grad_norm": 0.12136626243591309,
      "learning_rate": 1.1789362591179836e-05,
      "loss": 0.4417,
      "step": 11255
    },
    {
      "epoch": 2.3139068763490593,
      "grad_norm": 0.23068110644817352,
      "learning_rate": 1.1782623582184907e-05,
      "loss": 0.3921,
      "step": 11256
    },
    {
      "epoch": 2.314112447322438,
      "grad_norm": 0.22606144845485687,
      "learning_rate": 1.1775886209654853e-05,
      "loss": 0.4033,
      "step": 11257
    },
    {
      "epoch": 2.3143180182958165,
      "grad_norm": 0.23773600161075592,
      "learning_rate": 1.1769150473921582e-05,
      "loss": 0.4094,
      "step": 11258
    },
    {
      "epoch": 2.314523589269195,
      "grad_norm": 0.23489652574062347,
      "learning_rate": 1.1762416375316958e-05,
      "loss": 0.3755,
      "step": 11259
    },
    {
      "epoch": 2.3147291602425737,
      "grad_norm": 0.12201520800590515,
      "learning_rate": 1.1755683914172731e-05,
      "loss": 0.4488,
      "step": 11260
    },
    {
      "epoch": 2.3149347312159523,
      "grad_norm": 0.22625313699245453,
      "learning_rate": 1.1748953090820572e-05,
      "loss": 0.382,
      "step": 11261
    },
    {
      "epoch": 2.315140302189331,
      "grad_norm": 0.21789546310901642,
      "learning_rate": 1.1742223905592084e-05,
      "loss": 0.3877,
      "step": 11262
    },
    {
      "epoch": 2.3153458731627095,
      "grad_norm": 0.2211894392967224,
      "learning_rate": 1.1735496358818773e-05,
      "loss": 0.3978,
      "step": 11263
    },
    {
      "epoch": 2.315551444136088,
      "grad_norm": 0.22544537484645844,
      "learning_rate": 1.1728770450832078e-05,
      "loss": 0.3777,
      "step": 11264
    },
    {
      "epoch": 2.3157570151094666,
      "grad_norm": 0.23240074515342712,
      "learning_rate": 1.1722046181963344e-05,
      "loss": 0.3894,
      "step": 11265
    },
    {
      "epoch": 2.315962586082845,
      "grad_norm": 0.22723515331745148,
      "learning_rate": 1.1715323552543861e-05,
      "loss": 0.3761,
      "step": 11266
    },
    {
      "epoch": 2.316168157056224,
      "grad_norm": 0.2265399843454361,
      "learning_rate": 1.170860256290482e-05,
      "loss": 0.3725,
      "step": 11267
    },
    {
      "epoch": 2.3163737280296024,
      "grad_norm": 0.22929410636425018,
      "learning_rate": 1.1701883213377327e-05,
      "loss": 0.4007,
      "step": 11268
    },
    {
      "epoch": 2.3165792990029805,
      "grad_norm": 0.2396460622549057,
      "learning_rate": 1.1695165504292409e-05,
      "loss": 0.386,
      "step": 11269
    },
    {
      "epoch": 2.3167848699763596,
      "grad_norm": 0.23619569838047028,
      "learning_rate": 1.168844943598101e-05,
      "loss": 0.3854,
      "step": 11270
    },
    {
      "epoch": 2.3169904409497377,
      "grad_norm": 0.22975857555866241,
      "learning_rate": 1.168173500877402e-05,
      "loss": 0.3851,
      "step": 11271
    },
    {
      "epoch": 2.3171960119231163,
      "grad_norm": 0.23731692135334015,
      "learning_rate": 1.167502222300221e-05,
      "loss": 0.3812,
      "step": 11272
    },
    {
      "epoch": 2.317401582896495,
      "grad_norm": 0.22858087718486786,
      "learning_rate": 1.1668311078996303e-05,
      "loss": 0.387,
      "step": 11273
    },
    {
      "epoch": 2.3176071538698735,
      "grad_norm": 0.22912317514419556,
      "learning_rate": 1.1661601577086916e-05,
      "loss": 0.4138,
      "step": 11274
    },
    {
      "epoch": 2.317812724843252,
      "grad_norm": 0.2295382171869278,
      "learning_rate": 1.1654893717604597e-05,
      "loss": 0.4013,
      "step": 11275
    },
    {
      "epoch": 2.3180182958166307,
      "grad_norm": 0.1292608678340912,
      "learning_rate": 1.1648187500879812e-05,
      "loss": 0.4512,
      "step": 11276
    },
    {
      "epoch": 2.3182238667900092,
      "grad_norm": 0.23045098781585693,
      "learning_rate": 1.1641482927242945e-05,
      "loss": 0.4034,
      "step": 11277
    },
    {
      "epoch": 2.318429437763388,
      "grad_norm": 0.22682234644889832,
      "learning_rate": 1.1634779997024293e-05,
      "loss": 0.3821,
      "step": 11278
    },
    {
      "epoch": 2.3186350087367664,
      "grad_norm": 0.2304777354001999,
      "learning_rate": 1.1628078710554069e-05,
      "loss": 0.3779,
      "step": 11279
    },
    {
      "epoch": 2.318840579710145,
      "grad_norm": 0.2295672744512558,
      "learning_rate": 1.1621379068162438e-05,
      "loss": 0.3924,
      "step": 11280
    },
    {
      "epoch": 2.3190461506835236,
      "grad_norm": 0.23286469280719757,
      "learning_rate": 1.161468107017945e-05,
      "loss": 0.3817,
      "step": 11281
    },
    {
      "epoch": 2.319251721656902,
      "grad_norm": 0.12597419321537018,
      "learning_rate": 1.1607984716935084e-05,
      "loss": 0.4553,
      "step": 11282
    },
    {
      "epoch": 2.3194572926302808,
      "grad_norm": 0.2292589247226715,
      "learning_rate": 1.160129000875924e-05,
      "loss": 0.3939,
      "step": 11283
    },
    {
      "epoch": 2.319662863603659,
      "grad_norm": 0.2388840913772583,
      "learning_rate": 1.1594596945981732e-05,
      "loss": 0.3885,
      "step": 11284
    },
    {
      "epoch": 2.319868434577038,
      "grad_norm": 0.22787928581237793,
      "learning_rate": 1.1587905528932294e-05,
      "loss": 0.3977,
      "step": 11285
    },
    {
      "epoch": 2.320074005550416,
      "grad_norm": 0.23008286952972412,
      "learning_rate": 1.1581215757940565e-05,
      "loss": 0.3862,
      "step": 11286
    },
    {
      "epoch": 2.3202795765237947,
      "grad_norm": 0.22636668384075165,
      "learning_rate": 1.1574527633336158e-05,
      "loss": 0.4,
      "step": 11287
    },
    {
      "epoch": 2.3204851474971733,
      "grad_norm": 0.12164843082427979,
      "learning_rate": 1.1567841155448539e-05,
      "loss": 0.4519,
      "step": 11288
    },
    {
      "epoch": 2.320690718470552,
      "grad_norm": 0.22811272740364075,
      "learning_rate": 1.1561156324607123e-05,
      "loss": 0.3912,
      "step": 11289
    },
    {
      "epoch": 2.3208962894439304,
      "grad_norm": 0.2221514880657196,
      "learning_rate": 1.1554473141141244e-05,
      "loss": 0.3612,
      "step": 11290
    },
    {
      "epoch": 2.321101860417309,
      "grad_norm": 0.23008368909358978,
      "learning_rate": 1.154779160538014e-05,
      "loss": 0.3888,
      "step": 11291
    },
    {
      "epoch": 2.3213074313906876,
      "grad_norm": 0.23193509876728058,
      "learning_rate": 1.1541111717653002e-05,
      "loss": 0.3793,
      "step": 11292
    },
    {
      "epoch": 2.321513002364066,
      "grad_norm": 0.22582639753818512,
      "learning_rate": 1.1534433478288896e-05,
      "loss": 0.4062,
      "step": 11293
    },
    {
      "epoch": 2.321718573337445,
      "grad_norm": 0.5882457494735718,
      "learning_rate": 1.1527756887616828e-05,
      "loss": 0.4089,
      "step": 11294
    },
    {
      "epoch": 2.3219241443108234,
      "grad_norm": 0.23613165318965912,
      "learning_rate": 1.152108194596574e-05,
      "loss": 0.3803,
      "step": 11295
    },
    {
      "epoch": 2.322129715284202,
      "grad_norm": 0.24490775167942047,
      "learning_rate": 1.1514408653664464e-05,
      "loss": 0.4217,
      "step": 11296
    },
    {
      "epoch": 2.3223352862575806,
      "grad_norm": 0.2295404076576233,
      "learning_rate": 1.1507737011041767e-05,
      "loss": 0.3876,
      "step": 11297
    },
    {
      "epoch": 2.322540857230959,
      "grad_norm": 0.22926199436187744,
      "learning_rate": 1.150106701842632e-05,
      "loss": 0.4045,
      "step": 11298
    },
    {
      "epoch": 2.3227464282043373,
      "grad_norm": 0.23146659135818481,
      "learning_rate": 1.1494398676146716e-05,
      "loss": 0.3973,
      "step": 11299
    },
    {
      "epoch": 2.3229519991777163,
      "grad_norm": 0.2279983013868332,
      "learning_rate": 1.1487731984531497e-05,
      "loss": 0.3856,
      "step": 11300
    },
    {
      "epoch": 2.3231575701510945,
      "grad_norm": 0.22734786570072174,
      "learning_rate": 1.1481066943909086e-05,
      "loss": 0.395,
      "step": 11301
    },
    {
      "epoch": 2.323363141124473,
      "grad_norm": 0.12357629090547562,
      "learning_rate": 1.147440355460784e-05,
      "loss": 0.4503,
      "step": 11302
    },
    {
      "epoch": 2.3235687120978517,
      "grad_norm": 0.22878186404705048,
      "learning_rate": 1.1467741816956036e-05,
      "loss": 0.3805,
      "step": 11303
    },
    {
      "epoch": 2.3237742830712302,
      "grad_norm": 0.22551243007183075,
      "learning_rate": 1.1461081731281857e-05,
      "loss": 0.3962,
      "step": 11304
    },
    {
      "epoch": 2.323979854044609,
      "grad_norm": 0.22322127223014832,
      "learning_rate": 1.1454423297913425e-05,
      "loss": 0.3839,
      "step": 11305
    },
    {
      "epoch": 2.3241854250179874,
      "grad_norm": 0.12337585538625717,
      "learning_rate": 1.1447766517178752e-05,
      "loss": 0.4513,
      "step": 11306
    },
    {
      "epoch": 2.324390995991366,
      "grad_norm": 0.22409552335739136,
      "learning_rate": 1.1441111389405813e-05,
      "loss": 0.3851,
      "step": 11307
    },
    {
      "epoch": 2.3245965669647446,
      "grad_norm": 0.2322671264410019,
      "learning_rate": 1.1434457914922463e-05,
      "loss": 0.4114,
      "step": 11308
    },
    {
      "epoch": 2.324802137938123,
      "grad_norm": 0.23481951653957367,
      "learning_rate": 1.1427806094056486e-05,
      "loss": 0.4041,
      "step": 11309
    },
    {
      "epoch": 2.3250077089115018,
      "grad_norm": 0.2358068972826004,
      "learning_rate": 1.1421155927135584e-05,
      "loss": 0.404,
      "step": 11310
    },
    {
      "epoch": 2.3252132798848804,
      "grad_norm": 0.24007724225521088,
      "learning_rate": 1.1414507414487383e-05,
      "loss": 0.3907,
      "step": 11311
    },
    {
      "epoch": 2.325418850858259,
      "grad_norm": 0.2249882072210312,
      "learning_rate": 1.1407860556439413e-05,
      "loss": 0.4018,
      "step": 11312
    },
    {
      "epoch": 2.3256244218316375,
      "grad_norm": 0.21669505536556244,
      "learning_rate": 1.1401215353319158e-05,
      "loss": 0.3996,
      "step": 11313
    },
    {
      "epoch": 2.325829992805016,
      "grad_norm": 0.2299477905035019,
      "learning_rate": 1.139457180545398e-05,
      "loss": 0.3819,
      "step": 11314
    },
    {
      "epoch": 2.3260355637783947,
      "grad_norm": 0.22735092043876648,
      "learning_rate": 1.1387929913171164e-05,
      "loss": 0.3832,
      "step": 11315
    },
    {
      "epoch": 2.326241134751773,
      "grad_norm": 0.22514750063419342,
      "learning_rate": 1.1381289676797953e-05,
      "loss": 0.3827,
      "step": 11316
    },
    {
      "epoch": 2.3264467057251514,
      "grad_norm": 0.23412209749221802,
      "learning_rate": 1.1374651096661464e-05,
      "loss": 0.4225,
      "step": 11317
    },
    {
      "epoch": 2.32665227669853,
      "grad_norm": 0.23634769022464752,
      "learning_rate": 1.1368014173088757e-05,
      "loss": 0.412,
      "step": 11318
    },
    {
      "epoch": 2.3268578476719086,
      "grad_norm": 0.2300824671983719,
      "learning_rate": 1.136137890640679e-05,
      "loss": 0.3749,
      "step": 11319
    },
    {
      "epoch": 2.327063418645287,
      "grad_norm": 0.2358069270849228,
      "learning_rate": 1.135474529694245e-05,
      "loss": 0.4009,
      "step": 11320
    },
    {
      "epoch": 2.327268989618666,
      "grad_norm": 0.23068921267986298,
      "learning_rate": 1.134811334502256e-05,
      "loss": 0.3985,
      "step": 11321
    },
    {
      "epoch": 2.3274745605920444,
      "grad_norm": 0.22651554644107819,
      "learning_rate": 1.1341483050973838e-05,
      "loss": 0.38,
      "step": 11322
    },
    {
      "epoch": 2.327680131565423,
      "grad_norm": 0.22414909303188324,
      "learning_rate": 1.1334854415122924e-05,
      "loss": 0.3884,
      "step": 11323
    },
    {
      "epoch": 2.3278857025388016,
      "grad_norm": 0.21925905346870422,
      "learning_rate": 1.1328227437796389e-05,
      "loss": 0.3742,
      "step": 11324
    },
    {
      "epoch": 2.32809127351218,
      "grad_norm": 0.23087939620018005,
      "learning_rate": 1.1321602119320704e-05,
      "loss": 0.3872,
      "step": 11325
    },
    {
      "epoch": 2.3282968444855587,
      "grad_norm": 0.2237529307603836,
      "learning_rate": 1.131497846002227e-05,
      "loss": 0.3848,
      "step": 11326
    },
    {
      "epoch": 2.3285024154589373,
      "grad_norm": 0.22944872081279755,
      "learning_rate": 1.1308356460227386e-05,
      "loss": 0.4088,
      "step": 11327
    },
    {
      "epoch": 2.328707986432316,
      "grad_norm": 0.1283191293478012,
      "learning_rate": 1.1301736120262326e-05,
      "loss": 0.47,
      "step": 11328
    },
    {
      "epoch": 2.3289135574056945,
      "grad_norm": 0.22146999835968018,
      "learning_rate": 1.1295117440453219e-05,
      "loss": 0.3917,
      "step": 11329
    },
    {
      "epoch": 2.329119128379073,
      "grad_norm": 0.22980590164661407,
      "learning_rate": 1.1288500421126137e-05,
      "loss": 0.3876,
      "step": 11330
    },
    {
      "epoch": 2.3293246993524512,
      "grad_norm": 0.22274045646190643,
      "learning_rate": 1.1281885062607072e-05,
      "loss": 0.3849,
      "step": 11331
    },
    {
      "epoch": 2.32953027032583,
      "grad_norm": 0.22919537127017975,
      "learning_rate": 1.1275271365221938e-05,
      "loss": 0.3906,
      "step": 11332
    },
    {
      "epoch": 2.3297358412992084,
      "grad_norm": 0.1261204034090042,
      "learning_rate": 1.1268659329296534e-05,
      "loss": 0.444,
      "step": 11333
    },
    {
      "epoch": 2.329941412272587,
      "grad_norm": 0.2240409255027771,
      "learning_rate": 1.1262048955156643e-05,
      "loss": 0.3987,
      "step": 11334
    },
    {
      "epoch": 2.3301469832459656,
      "grad_norm": 0.125702366232872,
      "learning_rate": 1.1255440243127906e-05,
      "loss": 0.4473,
      "step": 11335
    },
    {
      "epoch": 2.330352554219344,
      "grad_norm": 0.22843293845653534,
      "learning_rate": 1.1248833193535898e-05,
      "loss": 0.4213,
      "step": 11336
    },
    {
      "epoch": 2.3305581251927228,
      "grad_norm": 0.23132173717021942,
      "learning_rate": 1.1242227806706137e-05,
      "loss": 0.3878,
      "step": 11337
    },
    {
      "epoch": 2.3307636961661014,
      "grad_norm": 0.23673327267169952,
      "learning_rate": 1.1235624082964025e-05,
      "loss": 0.3987,
      "step": 11338
    },
    {
      "epoch": 2.33096926713948,
      "grad_norm": 0.23916591703891754,
      "learning_rate": 1.1229022022634903e-05,
      "loss": 0.4045,
      "step": 11339
    },
    {
      "epoch": 2.3311748381128585,
      "grad_norm": 0.12463133037090302,
      "learning_rate": 1.122242162604402e-05,
      "loss": 0.4453,
      "step": 11340
    },
    {
      "epoch": 2.331380409086237,
      "grad_norm": 0.23358865082263947,
      "learning_rate": 1.1215822893516539e-05,
      "loss": 0.3772,
      "step": 11341
    },
    {
      "epoch": 2.3315859800596157,
      "grad_norm": 0.2250611037015915,
      "learning_rate": 1.1209225825377565e-05,
      "loss": 0.4015,
      "step": 11342
    },
    {
      "epoch": 2.3317915510329943,
      "grad_norm": 0.12012235075235367,
      "learning_rate": 1.1202630421952097e-05,
      "loss": 0.446,
      "step": 11343
    },
    {
      "epoch": 2.331997122006373,
      "grad_norm": 0.12716658413410187,
      "learning_rate": 1.1196036683565063e-05,
      "loss": 0.4522,
      "step": 11344
    },
    {
      "epoch": 2.3322026929797515,
      "grad_norm": 0.12125218659639359,
      "learning_rate": 1.11894446105413e-05,
      "loss": 0.4634,
      "step": 11345
    },
    {
      "epoch": 2.3324082639531296,
      "grad_norm": 0.23313722014427185,
      "learning_rate": 1.1182854203205569e-05,
      "loss": 0.4123,
      "step": 11346
    },
    {
      "epoch": 2.332613834926508,
      "grad_norm": 0.22456228733062744,
      "learning_rate": 1.1176265461882556e-05,
      "loss": 0.3851,
      "step": 11347
    },
    {
      "epoch": 2.332819405899887,
      "grad_norm": 0.22414372861385345,
      "learning_rate": 1.1169678386896833e-05,
      "loss": 0.4027,
      "step": 11348
    },
    {
      "epoch": 2.3330249768732654,
      "grad_norm": 0.2482268065214157,
      "learning_rate": 1.116309297857295e-05,
      "loss": 0.3893,
      "step": 11349
    },
    {
      "epoch": 2.333230547846644,
      "grad_norm": 0.2372516393661499,
      "learning_rate": 1.1156509237235325e-05,
      "loss": 0.3884,
      "step": 11350
    },
    {
      "epoch": 2.3334361188200226,
      "grad_norm": 0.23063679039478302,
      "learning_rate": 1.1149927163208297e-05,
      "loss": 0.3853,
      "step": 11351
    },
    {
      "epoch": 2.333641689793401,
      "grad_norm": 0.12314844876527786,
      "learning_rate": 1.114334675681615e-05,
      "loss": 0.4468,
      "step": 11352
    },
    {
      "epoch": 2.3338472607667797,
      "grad_norm": 0.22128140926361084,
      "learning_rate": 1.1136768018383064e-05,
      "loss": 0.3851,
      "step": 11353
    },
    {
      "epoch": 2.3340528317401583,
      "grad_norm": 0.22692500054836273,
      "learning_rate": 1.1130190948233133e-05,
      "loss": 0.3878,
      "step": 11354
    },
    {
      "epoch": 2.334258402713537,
      "grad_norm": 0.2241378277540207,
      "learning_rate": 1.1123615546690383e-05,
      "loss": 0.3838,
      "step": 11355
    },
    {
      "epoch": 2.3344639736869155,
      "grad_norm": 0.22740109264850616,
      "learning_rate": 1.1117041814078769e-05,
      "loss": 0.3741,
      "step": 11356
    },
    {
      "epoch": 2.334669544660294,
      "grad_norm": 0.25140267610549927,
      "learning_rate": 1.1110469750722118e-05,
      "loss": 0.3816,
      "step": 11357
    },
    {
      "epoch": 2.3348751156336727,
      "grad_norm": 0.22210964560508728,
      "learning_rate": 1.1103899356944239e-05,
      "loss": 0.3815,
      "step": 11358
    },
    {
      "epoch": 2.3350806866070513,
      "grad_norm": 0.2357717901468277,
      "learning_rate": 1.1097330633068806e-05,
      "loss": 0.3867,
      "step": 11359
    },
    {
      "epoch": 2.33528625758043,
      "grad_norm": 0.23202987015247345,
      "learning_rate": 1.1090763579419436e-05,
      "loss": 0.4003,
      "step": 11360
    },
    {
      "epoch": 2.335491828553808,
      "grad_norm": 0.2323846071958542,
      "learning_rate": 1.1084198196319653e-05,
      "loss": 0.3845,
      "step": 11361
    },
    {
      "epoch": 2.3356973995271866,
      "grad_norm": 0.22971893846988678,
      "learning_rate": 1.1077634484092887e-05,
      "loss": 0.3897,
      "step": 11362
    },
    {
      "epoch": 2.335902970500565,
      "grad_norm": 0.23653818666934967,
      "learning_rate": 1.1071072443062531e-05,
      "loss": 0.416,
      "step": 11363
    },
    {
      "epoch": 2.3361085414739438,
      "grad_norm": 0.21813298761844635,
      "learning_rate": 1.1064512073551854e-05,
      "loss": 0.3926,
      "step": 11364
    },
    {
      "epoch": 2.3363141124473223,
      "grad_norm": 0.24081604182720184,
      "learning_rate": 1.1057953375884053e-05,
      "loss": 0.3823,
      "step": 11365
    },
    {
      "epoch": 2.336519683420701,
      "grad_norm": 0.12533682584762573,
      "learning_rate": 1.1051396350382246e-05,
      "loss": 0.4627,
      "step": 11366
    },
    {
      "epoch": 2.3367252543940795,
      "grad_norm": 0.23893719911575317,
      "learning_rate": 1.104484099736946e-05,
      "loss": 0.3698,
      "step": 11367
    },
    {
      "epoch": 2.336930825367458,
      "grad_norm": 0.1185644194483757,
      "learning_rate": 1.1038287317168643e-05,
      "loss": 0.4715,
      "step": 11368
    },
    {
      "epoch": 2.3371363963408367,
      "grad_norm": 0.22912783920764923,
      "learning_rate": 1.1031735310102686e-05,
      "loss": 0.3963,
      "step": 11369
    },
    {
      "epoch": 2.3373419673142153,
      "grad_norm": 0.23992134630680084,
      "learning_rate": 1.1025184976494363e-05,
      "loss": 0.3906,
      "step": 11370
    },
    {
      "epoch": 2.337547538287594,
      "grad_norm": 0.2348276525735855,
      "learning_rate": 1.1018636316666378e-05,
      "loss": 0.4119,
      "step": 11371
    },
    {
      "epoch": 2.3377531092609725,
      "grad_norm": 0.23046445846557617,
      "learning_rate": 1.101208933094135e-05,
      "loss": 0.3736,
      "step": 11372
    },
    {
      "epoch": 2.337958680234351,
      "grad_norm": 0.22680574655532837,
      "learning_rate": 1.1005544019641824e-05,
      "loss": 0.3675,
      "step": 11373
    },
    {
      "epoch": 2.3381642512077296,
      "grad_norm": 0.22704631090164185,
      "learning_rate": 1.0999000383090255e-05,
      "loss": 0.4037,
      "step": 11374
    },
    {
      "epoch": 2.3383698221811082,
      "grad_norm": 0.23311007022857666,
      "learning_rate": 1.0992458421609007e-05,
      "loss": 0.3913,
      "step": 11375
    },
    {
      "epoch": 2.3385753931544864,
      "grad_norm": 0.23383252322673798,
      "learning_rate": 1.098591813552039e-05,
      "loss": 0.3879,
      "step": 11376
    },
    {
      "epoch": 2.338780964127865,
      "grad_norm": 0.2401203066110611,
      "learning_rate": 1.0979379525146603e-05,
      "loss": 0.4057,
      "step": 11377
    },
    {
      "epoch": 2.3389865351012435,
      "grad_norm": 0.23543764650821686,
      "learning_rate": 1.0972842590809783e-05,
      "loss": 0.3725,
      "step": 11378
    },
    {
      "epoch": 2.339192106074622,
      "grad_norm": 0.22404974699020386,
      "learning_rate": 1.0966307332831947e-05,
      "loss": 0.3833,
      "step": 11379
    },
    {
      "epoch": 2.3393976770480007,
      "grad_norm": 0.23188042640686035,
      "learning_rate": 1.0959773751535091e-05,
      "loss": 0.3922,
      "step": 11380
    },
    {
      "epoch": 2.3396032480213793,
      "grad_norm": 0.23337653279304504,
      "learning_rate": 1.0953241847241078e-05,
      "loss": 0.3864,
      "step": 11381
    },
    {
      "epoch": 2.339808818994758,
      "grad_norm": 0.2359674870967865,
      "learning_rate": 1.0946711620271692e-05,
      "loss": 0.4073,
      "step": 11382
    },
    {
      "epoch": 2.3400143899681365,
      "grad_norm": 0.22280322015285492,
      "learning_rate": 1.0940183070948668e-05,
      "loss": 0.3692,
      "step": 11383
    },
    {
      "epoch": 2.340219960941515,
      "grad_norm": 0.2298697531223297,
      "learning_rate": 1.0933656199593635e-05,
      "loss": 0.3965,
      "step": 11384
    },
    {
      "epoch": 2.3404255319148937,
      "grad_norm": 0.12579971551895142,
      "learning_rate": 1.0927131006528134e-05,
      "loss": 0.4416,
      "step": 11385
    },
    {
      "epoch": 2.3406311028882723,
      "grad_norm": 0.22117015719413757,
      "learning_rate": 1.0920607492073632e-05,
      "loss": 0.3884,
      "step": 11386
    },
    {
      "epoch": 2.340836673861651,
      "grad_norm": 0.22283059358596802,
      "learning_rate": 1.0914085656551514e-05,
      "loss": 0.3971,
      "step": 11387
    },
    {
      "epoch": 2.3410422448350294,
      "grad_norm": 0.2289050966501236,
      "learning_rate": 1.0907565500283078e-05,
      "loss": 0.4027,
      "step": 11388
    },
    {
      "epoch": 2.341247815808408,
      "grad_norm": 0.22611112892627716,
      "learning_rate": 1.0901047023589525e-05,
      "loss": 0.4097,
      "step": 11389
    },
    {
      "epoch": 2.3414533867817866,
      "grad_norm": 0.23010249435901642,
      "learning_rate": 1.0894530226792024e-05,
      "loss": 0.3971,
      "step": 11390
    },
    {
      "epoch": 2.3416589577551647,
      "grad_norm": 0.2295684963464737,
      "learning_rate": 1.088801511021161e-05,
      "loss": 0.371,
      "step": 11391
    },
    {
      "epoch": 2.3418645287285433,
      "grad_norm": 0.221123605966568,
      "learning_rate": 1.0881501674169247e-05,
      "loss": 0.3816,
      "step": 11392
    },
    {
      "epoch": 2.342070099701922,
      "grad_norm": 0.12064526975154877,
      "learning_rate": 1.0874989918985833e-05,
      "loss": 0.4318,
      "step": 11393
    },
    {
      "epoch": 2.3422756706753005,
      "grad_norm": 0.23293597996234894,
      "learning_rate": 1.0868479844982164e-05,
      "loss": 0.3857,
      "step": 11394
    },
    {
      "epoch": 2.342481241648679,
      "grad_norm": 0.22393792867660522,
      "learning_rate": 1.0861971452478966e-05,
      "loss": 0.3969,
      "step": 11395
    },
    {
      "epoch": 2.3426868126220577,
      "grad_norm": 0.12383504956960678,
      "learning_rate": 1.0855464741796857e-05,
      "loss": 0.4518,
      "step": 11396
    },
    {
      "epoch": 2.3428923835954363,
      "grad_norm": 0.2288213074207306,
      "learning_rate": 1.0848959713256421e-05,
      "loss": 0.3848,
      "step": 11397
    },
    {
      "epoch": 2.343097954568815,
      "grad_norm": 0.23577377200126648,
      "learning_rate": 1.0842456367178123e-05,
      "loss": 0.4115,
      "step": 11398
    },
    {
      "epoch": 2.3433035255421935,
      "grad_norm": 0.22047261893749237,
      "learning_rate": 1.0835954703882345e-05,
      "loss": 0.3738,
      "step": 11399
    },
    {
      "epoch": 2.343509096515572,
      "grad_norm": 0.22211310267448425,
      "learning_rate": 1.0829454723689383e-05,
      "loss": 0.4006,
      "step": 11400
    },
    {
      "epoch": 2.3437146674889506,
      "grad_norm": 0.23019267618656158,
      "learning_rate": 1.0822956426919487e-05,
      "loss": 0.3988,
      "step": 11401
    },
    {
      "epoch": 2.343920238462329,
      "grad_norm": 0.23312908411026,
      "learning_rate": 1.0816459813892787e-05,
      "loss": 0.3799,
      "step": 11402
    },
    {
      "epoch": 2.344125809435708,
      "grad_norm": 0.2296217679977417,
      "learning_rate": 1.0809964884929325e-05,
      "loss": 0.3731,
      "step": 11403
    },
    {
      "epoch": 2.3443313804090864,
      "grad_norm": 0.12692473828792572,
      "learning_rate": 1.08034716403491e-05,
      "loss": 0.4605,
      "step": 11404
    },
    {
      "epoch": 2.344536951382465,
      "grad_norm": 0.2210485190153122,
      "learning_rate": 1.0796980080471993e-05,
      "loss": 0.3822,
      "step": 11405
    },
    {
      "epoch": 2.344742522355843,
      "grad_norm": 0.22621271014213562,
      "learning_rate": 1.0790490205617812e-05,
      "loss": 0.3743,
      "step": 11406
    },
    {
      "epoch": 2.344948093329222,
      "grad_norm": 0.22742587327957153,
      "learning_rate": 1.0784002016106287e-05,
      "loss": 0.4062,
      "step": 11407
    },
    {
      "epoch": 2.3451536643026003,
      "grad_norm": 0.2238548845052719,
      "learning_rate": 1.0777515512257057e-05,
      "loss": 0.3738,
      "step": 11408
    },
    {
      "epoch": 2.345359235275979,
      "grad_norm": 0.2274450659751892,
      "learning_rate": 1.077103069438968e-05,
      "loss": 0.4024,
      "step": 11409
    },
    {
      "epoch": 2.3455648062493575,
      "grad_norm": 0.2332809865474701,
      "learning_rate": 1.0764547562823627e-05,
      "loss": 0.4046,
      "step": 11410
    },
    {
      "epoch": 2.345770377222736,
      "grad_norm": 0.1246822252869606,
      "learning_rate": 1.0758066117878307e-05,
      "loss": 0.4457,
      "step": 11411
    },
    {
      "epoch": 2.3459759481961147,
      "grad_norm": 0.2296641618013382,
      "learning_rate": 1.0751586359873026e-05,
      "loss": 0.394,
      "step": 11412
    },
    {
      "epoch": 2.3461815191694932,
      "grad_norm": 0.2302107959985733,
      "learning_rate": 1.0745108289127006e-05,
      "loss": 0.4005,
      "step": 11413
    },
    {
      "epoch": 2.346387090142872,
      "grad_norm": 0.12304381281137466,
      "learning_rate": 1.0738631905959397e-05,
      "loss": 0.4551,
      "step": 11414
    },
    {
      "epoch": 2.3465926611162504,
      "grad_norm": 0.23445133864879608,
      "learning_rate": 1.0732157210689257e-05,
      "loss": 0.3921,
      "step": 11415
    },
    {
      "epoch": 2.346798232089629,
      "grad_norm": 0.22406600415706635,
      "learning_rate": 1.0725684203635556e-05,
      "loss": 0.3952,
      "step": 11416
    },
    {
      "epoch": 2.3470038030630076,
      "grad_norm": 0.2265467792749405,
      "learning_rate": 1.0719212885117194e-05,
      "loss": 0.3897,
      "step": 11417
    },
    {
      "epoch": 2.347209374036386,
      "grad_norm": 0.22809205949306488,
      "learning_rate": 1.0712743255452993e-05,
      "loss": 0.3919,
      "step": 11418
    },
    {
      "epoch": 2.3474149450097648,
      "grad_norm": 0.12264318019151688,
      "learning_rate": 1.0706275314961672e-05,
      "loss": 0.4388,
      "step": 11419
    },
    {
      "epoch": 2.3476205159831434,
      "grad_norm": 0.26397988200187683,
      "learning_rate": 1.0699809063961879e-05,
      "loss": 0.3855,
      "step": 11420
    },
    {
      "epoch": 2.3478260869565215,
      "grad_norm": 0.2346579134464264,
      "learning_rate": 1.0693344502772162e-05,
      "loss": 0.397,
      "step": 11421
    },
    {
      "epoch": 2.3480316579299005,
      "grad_norm": 0.23680004477500916,
      "learning_rate": 1.0686881631711023e-05,
      "loss": 0.4192,
      "step": 11422
    },
    {
      "epoch": 2.3482372289032787,
      "grad_norm": 0.22599753737449646,
      "learning_rate": 1.0680420451096852e-05,
      "loss": 0.4073,
      "step": 11423
    },
    {
      "epoch": 2.3484427998766573,
      "grad_norm": 0.23660646378993988,
      "learning_rate": 1.0673960961247943e-05,
      "loss": 0.3879,
      "step": 11424
    },
    {
      "epoch": 2.348648370850036,
      "grad_norm": 0.26729151606559753,
      "learning_rate": 1.0667503162482548e-05,
      "loss": 0.3812,
      "step": 11425
    },
    {
      "epoch": 2.3488539418234144,
      "grad_norm": 0.22443972527980804,
      "learning_rate": 1.06610470551188e-05,
      "loss": 0.4038,
      "step": 11426
    },
    {
      "epoch": 2.349059512796793,
      "grad_norm": 0.22883421182632446,
      "learning_rate": 1.0654592639474768e-05,
      "loss": 0.3976,
      "step": 11427
    },
    {
      "epoch": 2.3492650837701716,
      "grad_norm": 0.23285576701164246,
      "learning_rate": 1.0648139915868425e-05,
      "loss": 0.3958,
      "step": 11428
    },
    {
      "epoch": 2.34947065474355,
      "grad_norm": 0.22203494608402252,
      "learning_rate": 1.0641688884617673e-05,
      "loss": 0.391,
      "step": 11429
    },
    {
      "epoch": 2.349676225716929,
      "grad_norm": 0.21967896819114685,
      "learning_rate": 1.0635239546040312e-05,
      "loss": 0.3793,
      "step": 11430
    },
    {
      "epoch": 2.3498817966903074,
      "grad_norm": 0.23787444829940796,
      "learning_rate": 1.062879190045407e-05,
      "loss": 0.3829,
      "step": 11431
    },
    {
      "epoch": 2.350087367663686,
      "grad_norm": 0.2242104560136795,
      "learning_rate": 1.0622345948176609e-05,
      "loss": 0.3986,
      "step": 11432
    },
    {
      "epoch": 2.3502929386370646,
      "grad_norm": 0.12349691241979599,
      "learning_rate": 1.0615901689525487e-05,
      "loss": 0.4521,
      "step": 11433
    },
    {
      "epoch": 2.350498509610443,
      "grad_norm": 0.2340456247329712,
      "learning_rate": 1.0609459124818177e-05,
      "loss": 0.419,
      "step": 11434
    },
    {
      "epoch": 2.3507040805838217,
      "grad_norm": 0.12247911095619202,
      "learning_rate": 1.0603018254372072e-05,
      "loss": 0.4609,
      "step": 11435
    },
    {
      "epoch": 2.3509096515572,
      "grad_norm": 0.2385515421628952,
      "learning_rate": 1.0596579078504486e-05,
      "loss": 0.3997,
      "step": 11436
    },
    {
      "epoch": 2.351115222530579,
      "grad_norm": 0.12545520067214966,
      "learning_rate": 1.0590141597532653e-05,
      "loss": 0.4411,
      "step": 11437
    },
    {
      "epoch": 2.351320793503957,
      "grad_norm": 0.23046888411045074,
      "learning_rate": 1.0583705811773695e-05,
      "loss": 0.3795,
      "step": 11438
    },
    {
      "epoch": 2.3515263644773357,
      "grad_norm": 0.12221966683864594,
      "learning_rate": 1.0577271721544703e-05,
      "loss": 0.4572,
      "step": 11439
    },
    {
      "epoch": 2.3517319354507142,
      "grad_norm": 0.22688139975070953,
      "learning_rate": 1.0570839327162644e-05,
      "loss": 0.3925,
      "step": 11440
    },
    {
      "epoch": 2.351937506424093,
      "grad_norm": 0.23011426627635956,
      "learning_rate": 1.056440862894441e-05,
      "loss": 0.3921,
      "step": 11441
    },
    {
      "epoch": 2.3521430773974714,
      "grad_norm": 0.2639561891555786,
      "learning_rate": 1.0557979627206812e-05,
      "loss": 0.3734,
      "step": 11442
    },
    {
      "epoch": 2.35234864837085,
      "grad_norm": 0.2354530692100525,
      "learning_rate": 1.055155232226656e-05,
      "loss": 0.3819,
      "step": 11443
    },
    {
      "epoch": 2.3525542193442286,
      "grad_norm": 0.23552900552749634,
      "learning_rate": 1.0545126714440329e-05,
      "loss": 0.3951,
      "step": 11444
    },
    {
      "epoch": 2.352759790317607,
      "grad_norm": 0.12986932694911957,
      "learning_rate": 1.0538702804044648e-05,
      "loss": 0.4338,
      "step": 11445
    },
    {
      "epoch": 2.3529653612909858,
      "grad_norm": 0.22587868571281433,
      "learning_rate": 1.0532280591396021e-05,
      "loss": 0.388,
      "step": 11446
    },
    {
      "epoch": 2.3531709322643644,
      "grad_norm": 0.22547636926174164,
      "learning_rate": 1.0525860076810829e-05,
      "loss": 0.3929,
      "step": 11447
    },
    {
      "epoch": 2.353376503237743,
      "grad_norm": 0.24222803115844727,
      "learning_rate": 1.0519441260605384e-05,
      "loss": 0.3973,
      "step": 11448
    },
    {
      "epoch": 2.3535820742111215,
      "grad_norm": 0.2281145453453064,
      "learning_rate": 1.0513024143095896e-05,
      "loss": 0.3693,
      "step": 11449
    },
    {
      "epoch": 2.3537876451845,
      "grad_norm": 0.22498665750026703,
      "learning_rate": 1.0506608724598525e-05,
      "loss": 0.3781,
      "step": 11450
    },
    {
      "epoch": 2.3539932161578783,
      "grad_norm": 0.12150565534830093,
      "learning_rate": 1.0500195005429303e-05,
      "loss": 0.4532,
      "step": 11451
    },
    {
      "epoch": 2.3541987871312573,
      "grad_norm": 0.23014621436595917,
      "learning_rate": 1.0493782985904235e-05,
      "loss": 0.3878,
      "step": 11452
    },
    {
      "epoch": 2.3544043581046354,
      "grad_norm": 0.2346828430891037,
      "learning_rate": 1.04873726663392e-05,
      "loss": 0.4009,
      "step": 11453
    },
    {
      "epoch": 2.354609929078014,
      "grad_norm": 0.21988657116889954,
      "learning_rate": 1.0480964047050002e-05,
      "loss": 0.3942,
      "step": 11454
    },
    {
      "epoch": 2.3548155000513926,
      "grad_norm": 0.12439004331827164,
      "learning_rate": 1.0474557128352365e-05,
      "loss": 0.4566,
      "step": 11455
    },
    {
      "epoch": 2.355021071024771,
      "grad_norm": 0.12461668252944946,
      "learning_rate": 1.0468151910561923e-05,
      "loss": 0.4609,
      "step": 11456
    },
    {
      "epoch": 2.35522664199815,
      "grad_norm": 0.11804953217506409,
      "learning_rate": 1.0461748393994234e-05,
      "loss": 0.4588,
      "step": 11457
    },
    {
      "epoch": 2.3554322129715284,
      "grad_norm": 0.2295227199792862,
      "learning_rate": 1.045534657896476e-05,
      "loss": 0.3971,
      "step": 11458
    },
    {
      "epoch": 2.355637783944907,
      "grad_norm": 0.22749020159244537,
      "learning_rate": 1.0448946465788915e-05,
      "loss": 0.4247,
      "step": 11459
    },
    {
      "epoch": 2.3558433549182856,
      "grad_norm": 0.22229236364364624,
      "learning_rate": 1.044254805478198e-05,
      "loss": 0.3964,
      "step": 11460
    },
    {
      "epoch": 2.356048925891664,
      "grad_norm": 0.2296680361032486,
      "learning_rate": 1.0436151346259184e-05,
      "loss": 0.402,
      "step": 11461
    },
    {
      "epoch": 2.3562544968650427,
      "grad_norm": 0.12308470159769058,
      "learning_rate": 1.0429756340535659e-05,
      "loss": 0.4583,
      "step": 11462
    },
    {
      "epoch": 2.3564600678384213,
      "grad_norm": 0.12049432843923569,
      "learning_rate": 1.0423363037926464e-05,
      "loss": 0.4624,
      "step": 11463
    },
    {
      "epoch": 2.3566656388118,
      "grad_norm": 0.12415426224470139,
      "learning_rate": 1.0416971438746542e-05,
      "loss": 0.4517,
      "step": 11464
    },
    {
      "epoch": 2.3568712097851785,
      "grad_norm": 0.2221984714269638,
      "learning_rate": 1.041058154331081e-05,
      "loss": 0.3924,
      "step": 11465
    },
    {
      "epoch": 2.3570767807585566,
      "grad_norm": 0.22418946027755737,
      "learning_rate": 1.0404193351934057e-05,
      "loss": 0.3781,
      "step": 11466
    },
    {
      "epoch": 2.3572823517319357,
      "grad_norm": 0.2208791971206665,
      "learning_rate": 1.0397806864930983e-05,
      "loss": 0.3731,
      "step": 11467
    },
    {
      "epoch": 2.357487922705314,
      "grad_norm": 0.23673607409000397,
      "learning_rate": 1.0391422082616247e-05,
      "loss": 0.3809,
      "step": 11468
    },
    {
      "epoch": 2.3576934936786924,
      "grad_norm": 0.22379258275032043,
      "learning_rate": 1.0385039005304386e-05,
      "loss": 0.401,
      "step": 11469
    },
    {
      "epoch": 2.357899064652071,
      "grad_norm": 0.2308909147977829,
      "learning_rate": 1.0378657633309862e-05,
      "loss": 0.3777,
      "step": 11470
    },
    {
      "epoch": 2.3581046356254496,
      "grad_norm": 0.12026369571685791,
      "learning_rate": 1.0372277966947059e-05,
      "loss": 0.4592,
      "step": 11471
    },
    {
      "epoch": 2.358310206598828,
      "grad_norm": 0.12578755617141724,
      "learning_rate": 1.036590000653026e-05,
      "loss": 0.4422,
      "step": 11472
    },
    {
      "epoch": 2.3585157775722068,
      "grad_norm": 0.23081423342227936,
      "learning_rate": 1.0359523752373694e-05,
      "loss": 0.3895,
      "step": 11473
    },
    {
      "epoch": 2.3587213485455854,
      "grad_norm": 0.1233346238732338,
      "learning_rate": 1.035314920479149e-05,
      "loss": 0.4362,
      "step": 11474
    },
    {
      "epoch": 2.358926919518964,
      "grad_norm": 0.23306210339069366,
      "learning_rate": 1.0346776364097683e-05,
      "loss": 0.3826,
      "step": 11475
    },
    {
      "epoch": 2.3591324904923425,
      "grad_norm": 0.23711657524108887,
      "learning_rate": 1.0340405230606235e-05,
      "loss": 0.3861,
      "step": 11476
    },
    {
      "epoch": 2.359338061465721,
      "grad_norm": 0.24400153756141663,
      "learning_rate": 1.0334035804631026e-05,
      "loss": 0.3896,
      "step": 11477
    },
    {
      "epoch": 2.3595436324390997,
      "grad_norm": 0.1271253228187561,
      "learning_rate": 1.0327668086485842e-05,
      "loss": 0.4421,
      "step": 11478
    },
    {
      "epoch": 2.3597492034124783,
      "grad_norm": 0.23349100351333618,
      "learning_rate": 1.0321302076484381e-05,
      "loss": 0.3748,
      "step": 11479
    },
    {
      "epoch": 2.359954774385857,
      "grad_norm": 0.22339333593845367,
      "learning_rate": 1.031493777494029e-05,
      "loss": 0.392,
      "step": 11480
    },
    {
      "epoch": 2.3601603453592355,
      "grad_norm": 0.23393899202346802,
      "learning_rate": 1.03085751821671e-05,
      "loss": 0.3857,
      "step": 11481
    },
    {
      "epoch": 2.360365916332614,
      "grad_norm": 0.22653932869434357,
      "learning_rate": 1.0302214298478262e-05,
      "loss": 0.3752,
      "step": 11482
    },
    {
      "epoch": 2.360571487305992,
      "grad_norm": 0.2276255041360855,
      "learning_rate": 1.0295855124187149e-05,
      "loss": 0.3894,
      "step": 11483
    },
    {
      "epoch": 2.360777058279371,
      "grad_norm": 0.22504010796546936,
      "learning_rate": 1.0289497659607049e-05,
      "loss": 0.355,
      "step": 11484
    },
    {
      "epoch": 2.3609826292527494,
      "grad_norm": 0.2319696694612503,
      "learning_rate": 1.0283141905051145e-05,
      "loss": 0.4006,
      "step": 11485
    },
    {
      "epoch": 2.361188200226128,
      "grad_norm": 0.23021776974201202,
      "learning_rate": 1.0276787860832589e-05,
      "loss": 0.3885,
      "step": 11486
    },
    {
      "epoch": 2.3613937711995066,
      "grad_norm": 0.22840525209903717,
      "learning_rate": 1.0270435527264398e-05,
      "loss": 0.3885,
      "step": 11487
    },
    {
      "epoch": 2.361599342172885,
      "grad_norm": 0.23946824669837952,
      "learning_rate": 1.0264084904659514e-05,
      "loss": 0.3887,
      "step": 11488
    },
    {
      "epoch": 2.3618049131462637,
      "grad_norm": 0.23394089937210083,
      "learning_rate": 1.025773599333082e-05,
      "loss": 0.387,
      "step": 11489
    },
    {
      "epoch": 2.3620104841196423,
      "grad_norm": 0.2347833514213562,
      "learning_rate": 1.0251388793591093e-05,
      "loss": 0.3909,
      "step": 11490
    },
    {
      "epoch": 2.362216055093021,
      "grad_norm": 0.24539333581924438,
      "learning_rate": 1.024504330575302e-05,
      "loss": 0.3911,
      "step": 11491
    },
    {
      "epoch": 2.3624216260663995,
      "grad_norm": 0.2272724211215973,
      "learning_rate": 1.0238699530129222e-05,
      "loss": 0.3899,
      "step": 11492
    },
    {
      "epoch": 2.362627197039778,
      "grad_norm": 0.1279131919145584,
      "learning_rate": 1.0232357467032217e-05,
      "loss": 0.4453,
      "step": 11493
    },
    {
      "epoch": 2.3628327680131567,
      "grad_norm": 0.21736563742160797,
      "learning_rate": 1.0226017116774459e-05,
      "loss": 0.3957,
      "step": 11494
    },
    {
      "epoch": 2.3630383389865353,
      "grad_norm": 0.22350220382213593,
      "learning_rate": 1.0219678479668308e-05,
      "loss": 0.38,
      "step": 11495
    },
    {
      "epoch": 2.363243909959914,
      "grad_norm": 0.22701016068458557,
      "learning_rate": 1.0213341556026038e-05,
      "loss": 0.3937,
      "step": 11496
    },
    {
      "epoch": 2.3634494809332924,
      "grad_norm": 0.23441599309444427,
      "learning_rate": 1.0207006346159835e-05,
      "loss": 0.3887,
      "step": 11497
    },
    {
      "epoch": 2.3636550519066706,
      "grad_norm": 0.2203342318534851,
      "learning_rate": 1.0200672850381808e-05,
      "loss": 0.3824,
      "step": 11498
    },
    {
      "epoch": 2.363860622880049,
      "grad_norm": 0.12455693632364273,
      "learning_rate": 1.0194341069003977e-05,
      "loss": 0.4432,
      "step": 11499
    },
    {
      "epoch": 2.3640661938534278,
      "grad_norm": 0.22522957623004913,
      "learning_rate": 1.0188011002338268e-05,
      "loss": 0.376,
      "step": 11500
    },
    {
      "epoch": 2.3642717648268063,
      "grad_norm": 0.23828500509262085,
      "learning_rate": 1.0181682650696563e-05,
      "loss": 0.394,
      "step": 11501
    },
    {
      "epoch": 2.364477335800185,
      "grad_norm": 0.1210595965385437,
      "learning_rate": 1.0175356014390606e-05,
      "loss": 0.4444,
      "step": 11502
    },
    {
      "epoch": 2.3646829067735635,
      "grad_norm": 0.22295325994491577,
      "learning_rate": 1.0169031093732092e-05,
      "loss": 0.3968,
      "step": 11503
    },
    {
      "epoch": 2.364888477746942,
      "grad_norm": 0.21954037249088287,
      "learning_rate": 1.016270788903262e-05,
      "loss": 0.3821,
      "step": 11504
    },
    {
      "epoch": 2.3650940487203207,
      "grad_norm": 0.23098480701446533,
      "learning_rate": 1.0156386400603697e-05,
      "loss": 0.3838,
      "step": 11505
    },
    {
      "epoch": 2.3652996196936993,
      "grad_norm": 0.2265908420085907,
      "learning_rate": 1.0150066628756741e-05,
      "loss": 0.4052,
      "step": 11506
    },
    {
      "epoch": 2.365505190667078,
      "grad_norm": 0.2330310344696045,
      "learning_rate": 1.0143748573803133e-05,
      "loss": 0.4044,
      "step": 11507
    },
    {
      "epoch": 2.3657107616404565,
      "grad_norm": 0.22517040371894836,
      "learning_rate": 1.0137432236054111e-05,
      "loss": 0.4007,
      "step": 11508
    },
    {
      "epoch": 2.365916332613835,
      "grad_norm": 0.22092685103416443,
      "learning_rate": 1.0131117615820847e-05,
      "loss": 0.395,
      "step": 11509
    },
    {
      "epoch": 2.3661219035872136,
      "grad_norm": 0.22711274027824402,
      "learning_rate": 1.0124804713414453e-05,
      "loss": 0.3662,
      "step": 11510
    },
    {
      "epoch": 2.3663274745605922,
      "grad_norm": 0.2373218983411789,
      "learning_rate": 1.011849352914592e-05,
      "loss": 0.3841,
      "step": 11511
    },
    {
      "epoch": 2.366533045533971,
      "grad_norm": 0.23521727323532104,
      "learning_rate": 1.011218406332618e-05,
      "loss": 0.3878,
      "step": 11512
    },
    {
      "epoch": 2.366738616507349,
      "grad_norm": 0.12808705866336823,
      "learning_rate": 1.0105876316266065e-05,
      "loss": 0.4659,
      "step": 11513
    },
    {
      "epoch": 2.3669441874807275,
      "grad_norm": 0.11950518935918808,
      "learning_rate": 1.0099570288276317e-05,
      "loss": 0.4365,
      "step": 11514
    },
    {
      "epoch": 2.367149758454106,
      "grad_norm": 0.24219125509262085,
      "learning_rate": 1.0093265979667625e-05,
      "loss": 0.3793,
      "step": 11515
    },
    {
      "epoch": 2.3673553294274847,
      "grad_norm": 0.22846874594688416,
      "learning_rate": 1.0086963390750568e-05,
      "loss": 0.3735,
      "step": 11516
    },
    {
      "epoch": 2.3675609004008633,
      "grad_norm": 0.23134097456932068,
      "learning_rate": 1.0080662521835643e-05,
      "loss": 0.3869,
      "step": 11517
    },
    {
      "epoch": 2.367766471374242,
      "grad_norm": 0.21544456481933594,
      "learning_rate": 1.0074363373233259e-05,
      "loss": 0.393,
      "step": 11518
    },
    {
      "epoch": 2.3679720423476205,
      "grad_norm": 0.22806456685066223,
      "learning_rate": 1.0068065945253753e-05,
      "loss": 0.3971,
      "step": 11519
    },
    {
      "epoch": 2.368177613320999,
      "grad_norm": 0.22958514094352722,
      "learning_rate": 1.0061770238207364e-05,
      "loss": 0.4065,
      "step": 11520
    },
    {
      "epoch": 2.3683831842943777,
      "grad_norm": 0.229364275932312,
      "learning_rate": 1.0055476252404244e-05,
      "loss": 0.394,
      "step": 11521
    },
    {
      "epoch": 2.3685887552677563,
      "grad_norm": 0.23312440514564514,
      "learning_rate": 1.0049183988154493e-05,
      "loss": 0.4033,
      "step": 11522
    },
    {
      "epoch": 2.368794326241135,
      "grad_norm": 0.22797515988349915,
      "learning_rate": 1.0042893445768084e-05,
      "loss": 0.3912,
      "step": 11523
    },
    {
      "epoch": 2.3689998972145134,
      "grad_norm": 0.23262247443199158,
      "learning_rate": 1.0036604625554923e-05,
      "loss": 0.3907,
      "step": 11524
    },
    {
      "epoch": 2.369205468187892,
      "grad_norm": 0.22848542034626007,
      "learning_rate": 1.003031752782484e-05,
      "loss": 0.3972,
      "step": 11525
    },
    {
      "epoch": 2.3694110391612706,
      "grad_norm": 0.21729277074337006,
      "learning_rate": 1.002403215288756e-05,
      "loss": 0.4045,
      "step": 11526
    },
    {
      "epoch": 2.369616610134649,
      "grad_norm": 0.22861436009407043,
      "learning_rate": 1.001774850105273e-05,
      "loss": 0.4129,
      "step": 11527
    },
    {
      "epoch": 2.3698221811080273,
      "grad_norm": 0.22693173587322235,
      "learning_rate": 1.0011466572629933e-05,
      "loss": 0.3786,
      "step": 11528
    },
    {
      "epoch": 2.370027752081406,
      "grad_norm": 0.23766165971755981,
      "learning_rate": 1.0005186367928648e-05,
      "loss": 0.406,
      "step": 11529
    },
    {
      "epoch": 2.3702333230547845,
      "grad_norm": 0.12284702807664871,
      "learning_rate": 9.998907887258245e-06,
      "loss": 0.4393,
      "step": 11530
    },
    {
      "epoch": 2.370438894028163,
      "grad_norm": 0.12830850481987,
      "learning_rate": 9.992631130928073e-06,
      "loss": 0.4596,
      "step": 11531
    },
    {
      "epoch": 2.3706444650015417,
      "grad_norm": 0.21804697811603546,
      "learning_rate": 9.986356099247343e-06,
      "loss": 0.3676,
      "step": 11532
    },
    {
      "epoch": 2.3708500359749203,
      "grad_norm": 0.12258250266313553,
      "learning_rate": 9.98008279252519e-06,
      "loss": 0.4617,
      "step": 11533
    },
    {
      "epoch": 2.371055606948299,
      "grad_norm": 0.22385314106941223,
      "learning_rate": 9.973811211070666e-06,
      "loss": 0.3938,
      "step": 11534
    },
    {
      "epoch": 2.3712611779216775,
      "grad_norm": 0.12500827014446259,
      "learning_rate": 9.967541355192763e-06,
      "loss": 0.4385,
      "step": 11535
    },
    {
      "epoch": 2.371466748895056,
      "grad_norm": 0.23873549699783325,
      "learning_rate": 9.961273225200353e-06,
      "loss": 0.3857,
      "step": 11536
    },
    {
      "epoch": 2.3716723198684346,
      "grad_norm": 0.23228701949119568,
      "learning_rate": 9.955006821402244e-06,
      "loss": 0.3898,
      "step": 11537
    },
    {
      "epoch": 2.371877890841813,
      "grad_norm": 0.12291909754276276,
      "learning_rate": 9.948742144107149e-06,
      "loss": 0.4612,
      "step": 11538
    },
    {
      "epoch": 2.372083461815192,
      "grad_norm": 0.23434710502624512,
      "learning_rate": 9.942479193623696e-06,
      "loss": 0.3871,
      "step": 11539
    },
    {
      "epoch": 2.3722890327885704,
      "grad_norm": 0.23191601037979126,
      "learning_rate": 9.936217970260437e-06,
      "loss": 0.4079,
      "step": 11540
    },
    {
      "epoch": 2.372494603761949,
      "grad_norm": 0.21654024720191956,
      "learning_rate": 9.929958474325821e-06,
      "loss": 0.387,
      "step": 11541
    },
    {
      "epoch": 2.3727001747353276,
      "grad_norm": 0.2176521271467209,
      "learning_rate": 9.923700706128245e-06,
      "loss": 0.4028,
      "step": 11542
    },
    {
      "epoch": 2.3729057457087057,
      "grad_norm": 0.22292236983776093,
      "learning_rate": 9.917444665975987e-06,
      "loss": 0.3789,
      "step": 11543
    },
    {
      "epoch": 2.3731113166820843,
      "grad_norm": 0.23066085577011108,
      "learning_rate": 9.911190354177257e-06,
      "loss": 0.3781,
      "step": 11544
    },
    {
      "epoch": 2.373316887655463,
      "grad_norm": 0.1312110424041748,
      "learning_rate": 9.904937771040172e-06,
      "loss": 0.4353,
      "step": 11545
    },
    {
      "epoch": 2.3735224586288415,
      "grad_norm": 0.22334595024585724,
      "learning_rate": 9.89868691687277e-06,
      "loss": 0.3894,
      "step": 11546
    },
    {
      "epoch": 2.37372802960222,
      "grad_norm": 0.2200348973274231,
      "learning_rate": 9.892437791983002e-06,
      "loss": 0.3792,
      "step": 11547
    },
    {
      "epoch": 2.3739336005755987,
      "grad_norm": 0.2263760268688202,
      "learning_rate": 9.886190396678715e-06,
      "loss": 0.3948,
      "step": 11548
    },
    {
      "epoch": 2.3741391715489772,
      "grad_norm": 0.22932201623916626,
      "learning_rate": 9.879944731267723e-06,
      "loss": 0.3927,
      "step": 11549
    },
    {
      "epoch": 2.374344742522356,
      "grad_norm": 0.22899407148361206,
      "learning_rate": 9.873700796057702e-06,
      "loss": 0.4045,
      "step": 11550
    },
    {
      "epoch": 2.3745503134957344,
      "grad_norm": 0.23349648714065552,
      "learning_rate": 9.867458591356262e-06,
      "loss": 0.3858,
      "step": 11551
    },
    {
      "epoch": 2.374755884469113,
      "grad_norm": 0.2280297577381134,
      "learning_rate": 9.861218117470914e-06,
      "loss": 0.3987,
      "step": 11552
    },
    {
      "epoch": 2.3749614554424916,
      "grad_norm": 0.11944809556007385,
      "learning_rate": 9.854979374709125e-06,
      "loss": 0.44,
      "step": 11553
    },
    {
      "epoch": 2.37516702641587,
      "grad_norm": 0.2443980574607849,
      "learning_rate": 9.848742363378233e-06,
      "loss": 0.3749,
      "step": 11554
    },
    {
      "epoch": 2.3753725973892488,
      "grad_norm": 0.224415123462677,
      "learning_rate": 9.8425070837855e-06,
      "loss": 0.4007,
      "step": 11555
    },
    {
      "epoch": 2.3755781683626274,
      "grad_norm": 0.23538914322853088,
      "learning_rate": 9.836273536238125e-06,
      "loss": 0.4024,
      "step": 11556
    },
    {
      "epoch": 2.375783739336006,
      "grad_norm": 0.2267664521932602,
      "learning_rate": 9.830041721043201e-06,
      "loss": 0.3676,
      "step": 11557
    },
    {
      "epoch": 2.375989310309384,
      "grad_norm": 0.2350446581840515,
      "learning_rate": 9.823811638507738e-06,
      "loss": 0.3737,
      "step": 11558
    },
    {
      "epoch": 2.3761948812827627,
      "grad_norm": 0.23056869208812714,
      "learning_rate": 9.81758328893866e-06,
      "loss": 0.3897,
      "step": 11559
    },
    {
      "epoch": 2.3764004522561413,
      "grad_norm": 0.22713732719421387,
      "learning_rate": 9.811356672642816e-06,
      "loss": 0.3669,
      "step": 11560
    },
    {
      "epoch": 2.37660602322952,
      "grad_norm": 0.22514687478542328,
      "learning_rate": 9.805131789926953e-06,
      "loss": 0.3922,
      "step": 11561
    },
    {
      "epoch": 2.3768115942028984,
      "grad_norm": 0.2302553504705429,
      "learning_rate": 9.798908641097734e-06,
      "loss": 0.3878,
      "step": 11562
    },
    {
      "epoch": 2.377017165176277,
      "grad_norm": 0.22958478331565857,
      "learning_rate": 9.792687226461768e-06,
      "loss": 0.3946,
      "step": 11563
    },
    {
      "epoch": 2.3772227361496556,
      "grad_norm": 0.2399352788925171,
      "learning_rate": 9.786467546325548e-06,
      "loss": 0.3835,
      "step": 11564
    },
    {
      "epoch": 2.377428307123034,
      "grad_norm": 0.22785188257694244,
      "learning_rate": 9.780249600995484e-06,
      "loss": 0.383,
      "step": 11565
    },
    {
      "epoch": 2.377633878096413,
      "grad_norm": 0.23024681210517883,
      "learning_rate": 9.774033390777902e-06,
      "loss": 0.379,
      "step": 11566
    },
    {
      "epoch": 2.3778394490697914,
      "grad_norm": 0.12375470250844955,
      "learning_rate": 9.767818915979052e-06,
      "loss": 0.4333,
      "step": 11567
    },
    {
      "epoch": 2.37804502004317,
      "grad_norm": 0.23087210953235626,
      "learning_rate": 9.761606176905089e-06,
      "loss": 0.3899,
      "step": 11568
    },
    {
      "epoch": 2.3782505910165486,
      "grad_norm": 0.12501628696918488,
      "learning_rate": 9.755395173862072e-06,
      "loss": 0.4761,
      "step": 11569
    },
    {
      "epoch": 2.378456161989927,
      "grad_norm": 0.23227210342884064,
      "learning_rate": 9.749185907156014e-06,
      "loss": 0.3867,
      "step": 11570
    },
    {
      "epoch": 2.3786617329633057,
      "grad_norm": 0.11980535089969635,
      "learning_rate": 9.742978377092805e-06,
      "loss": 0.4406,
      "step": 11571
    },
    {
      "epoch": 2.3788673039366843,
      "grad_norm": 0.23125259578227997,
      "learning_rate": 9.736772583978261e-06,
      "loss": 0.3782,
      "step": 11572
    },
    {
      "epoch": 2.3790728749100625,
      "grad_norm": 0.237775981426239,
      "learning_rate": 9.730568528118097e-06,
      "loss": 0.4088,
      "step": 11573
    },
    {
      "epoch": 2.3792784458834415,
      "grad_norm": 0.22310465574264526,
      "learning_rate": 9.724366209817991e-06,
      "loss": 0.3875,
      "step": 11574
    },
    {
      "epoch": 2.3794840168568197,
      "grad_norm": 0.23160605132579803,
      "learning_rate": 9.71816562938348e-06,
      "loss": 0.3908,
      "step": 11575
    },
    {
      "epoch": 2.3796895878301982,
      "grad_norm": 0.21605071425437927,
      "learning_rate": 9.711966787120025e-06,
      "loss": 0.3931,
      "step": 11576
    },
    {
      "epoch": 2.379895158803577,
      "grad_norm": 0.23142650723457336,
      "learning_rate": 9.705769683333049e-06,
      "loss": 0.3814,
      "step": 11577
    },
    {
      "epoch": 2.3801007297769554,
      "grad_norm": 0.2322172224521637,
      "learning_rate": 9.699574318327836e-06,
      "loss": 0.4077,
      "step": 11578
    },
    {
      "epoch": 2.380306300750334,
      "grad_norm": 0.23128941655158997,
      "learning_rate": 9.693380692409598e-06,
      "loss": 0.4085,
      "step": 11579
    },
    {
      "epoch": 2.3805118717237126,
      "grad_norm": 0.22898712754249573,
      "learning_rate": 9.687188805883475e-06,
      "loss": 0.3729,
      "step": 11580
    },
    {
      "epoch": 2.380717442697091,
      "grad_norm": 0.2282625287771225,
      "learning_rate": 9.680998659054504e-06,
      "loss": 0.3726,
      "step": 11581
    },
    {
      "epoch": 2.3809230136704698,
      "grad_norm": 0.2424619495868683,
      "learning_rate": 9.674810252227655e-06,
      "loss": 0.4017,
      "step": 11582
    },
    {
      "epoch": 2.3811285846438484,
      "grad_norm": 0.12431478500366211,
      "learning_rate": 9.668623585707774e-06,
      "loss": 0.4515,
      "step": 11583
    },
    {
      "epoch": 2.381334155617227,
      "grad_norm": 0.2225092202425003,
      "learning_rate": 9.662438659799689e-06,
      "loss": 0.3965,
      "step": 11584
    },
    {
      "epoch": 2.3815397265906055,
      "grad_norm": 0.2349206954240799,
      "learning_rate": 9.656255474808082e-06,
      "loss": 0.3851,
      "step": 11585
    },
    {
      "epoch": 2.381745297563984,
      "grad_norm": 0.22471074759960175,
      "learning_rate": 9.650074031037576e-06,
      "loss": 0.396,
      "step": 11586
    },
    {
      "epoch": 2.3819508685373627,
      "grad_norm": 0.13233044743537903,
      "learning_rate": 9.643894328792692e-06,
      "loss": 0.4617,
      "step": 11587
    },
    {
      "epoch": 2.382156439510741,
      "grad_norm": 0.11851814389228821,
      "learning_rate": 9.637716368377883e-06,
      "loss": 0.4364,
      "step": 11588
    },
    {
      "epoch": 2.38236201048412,
      "grad_norm": 0.22143539786338806,
      "learning_rate": 9.631540150097501e-06,
      "loss": 0.4004,
      "step": 11589
    },
    {
      "epoch": 2.382567581457498,
      "grad_norm": 0.2365567684173584,
      "learning_rate": 9.625365674255817e-06,
      "loss": 0.4103,
      "step": 11590
    },
    {
      "epoch": 2.3827731524308766,
      "grad_norm": 0.22856760025024414,
      "learning_rate": 9.619192941157033e-06,
      "loss": 0.3897,
      "step": 11591
    },
    {
      "epoch": 2.382978723404255,
      "grad_norm": 0.12115172296762466,
      "learning_rate": 9.613021951105246e-06,
      "loss": 0.456,
      "step": 11592
    },
    {
      "epoch": 2.383184294377634,
      "grad_norm": 0.2271842509508133,
      "learning_rate": 9.606852704404472e-06,
      "loss": 0.3896,
      "step": 11593
    },
    {
      "epoch": 2.3833898653510124,
      "grad_norm": 0.232135608792305,
      "learning_rate": 9.600685201358626e-06,
      "loss": 0.3863,
      "step": 11594
    },
    {
      "epoch": 2.383595436324391,
      "grad_norm": 0.23168498277664185,
      "learning_rate": 9.594519442271568e-06,
      "loss": 0.4031,
      "step": 11595
    },
    {
      "epoch": 2.3838010072977696,
      "grad_norm": 0.22451357543468475,
      "learning_rate": 9.588355427447062e-06,
      "loss": 0.3845,
      "step": 11596
    },
    {
      "epoch": 2.384006578271148,
      "grad_norm": 0.23108120262622833,
      "learning_rate": 9.582193157188753e-06,
      "loss": 0.3817,
      "step": 11597
    },
    {
      "epoch": 2.3842121492445267,
      "grad_norm": 0.2306068241596222,
      "learning_rate": 9.576032631800258e-06,
      "loss": 0.3839,
      "step": 11598
    },
    {
      "epoch": 2.3844177202179053,
      "grad_norm": 0.23344826698303223,
      "learning_rate": 9.569873851585067e-06,
      "loss": 0.3873,
      "step": 11599
    },
    {
      "epoch": 2.384623291191284,
      "grad_norm": 0.12073783576488495,
      "learning_rate": 9.563716816846585e-06,
      "loss": 0.4482,
      "step": 11600
    },
    {
      "epoch": 2.3848288621646625,
      "grad_norm": 0.23118554055690765,
      "learning_rate": 9.557561527888153e-06,
      "loss": 0.3992,
      "step": 11601
    },
    {
      "epoch": 2.385034433138041,
      "grad_norm": 0.24189937114715576,
      "learning_rate": 9.551407985013004e-06,
      "loss": 0.3896,
      "step": 11602
    },
    {
      "epoch": 2.3852400041114192,
      "grad_norm": 0.22828659415245056,
      "learning_rate": 9.545256188524287e-06,
      "loss": 0.3812,
      "step": 11603
    },
    {
      "epoch": 2.3854455750847983,
      "grad_norm": 0.22598852217197418,
      "learning_rate": 9.53910613872509e-06,
      "loss": 0.3918,
      "step": 11604
    },
    {
      "epoch": 2.3856511460581764,
      "grad_norm": 0.2214164286851883,
      "learning_rate": 9.532957835918392e-06,
      "loss": 0.3615,
      "step": 11605
    },
    {
      "epoch": 2.385856717031555,
      "grad_norm": 0.2324512004852295,
      "learning_rate": 9.526811280407091e-06,
      "loss": 0.3832,
      "step": 11606
    },
    {
      "epoch": 2.3860622880049336,
      "grad_norm": 0.22195158898830414,
      "learning_rate": 9.520666472493996e-06,
      "loss": 0.3767,
      "step": 11607
    },
    {
      "epoch": 2.386267858978312,
      "grad_norm": 0.23884356021881104,
      "learning_rate": 9.514523412481835e-06,
      "loss": 0.3979,
      "step": 11608
    },
    {
      "epoch": 2.3864734299516908,
      "grad_norm": 0.2285103052854538,
      "learning_rate": 9.508382100673247e-06,
      "loss": 0.3877,
      "step": 11609
    },
    {
      "epoch": 2.3866790009250693,
      "grad_norm": 0.24297171831130981,
      "learning_rate": 9.502242537370767e-06,
      "loss": 0.3847,
      "step": 11610
    },
    {
      "epoch": 2.386884571898448,
      "grad_norm": 0.23993346095085144,
      "learning_rate": 9.4961047228769e-06,
      "loss": 0.3909,
      "step": 11611
    },
    {
      "epoch": 2.3870901428718265,
      "grad_norm": 0.24006116390228271,
      "learning_rate": 9.489968657494006e-06,
      "loss": 0.3865,
      "step": 11612
    },
    {
      "epoch": 2.387295713845205,
      "grad_norm": 0.23091156780719757,
      "learning_rate": 9.483834341524384e-06,
      "loss": 0.3936,
      "step": 11613
    },
    {
      "epoch": 2.3875012848185837,
      "grad_norm": 0.12236663699150085,
      "learning_rate": 9.477701775270241e-06,
      "loss": 0.4518,
      "step": 11614
    },
    {
      "epoch": 2.3877068557919623,
      "grad_norm": 0.2414701133966446,
      "learning_rate": 9.471570959033699e-06,
      "loss": 0.3928,
      "step": 11615
    },
    {
      "epoch": 2.387912426765341,
      "grad_norm": 0.22857315838336945,
      "learning_rate": 9.465441893116786e-06,
      "loss": 0.3743,
      "step": 11616
    },
    {
      "epoch": 2.3881179977387195,
      "grad_norm": 0.2300974428653717,
      "learning_rate": 9.459314577821475e-06,
      "loss": 0.3847,
      "step": 11617
    },
    {
      "epoch": 2.3883235687120976,
      "grad_norm": 0.12099748104810715,
      "learning_rate": 9.453189013449605e-06,
      "loss": 0.4291,
      "step": 11618
    },
    {
      "epoch": 2.3885291396854766,
      "grad_norm": 0.1314156949520111,
      "learning_rate": 9.44706520030298e-06,
      "loss": 0.4537,
      "step": 11619
    },
    {
      "epoch": 2.388734710658855,
      "grad_norm": 0.22321221232414246,
      "learning_rate": 9.44094313868328e-06,
      "loss": 0.3879,
      "step": 11620
    },
    {
      "epoch": 2.3889402816322334,
      "grad_norm": 0.22418095171451569,
      "learning_rate": 9.434822828892105e-06,
      "loss": 0.37,
      "step": 11621
    },
    {
      "epoch": 2.389145852605612,
      "grad_norm": 0.23237618803977966,
      "learning_rate": 9.428704271230982e-06,
      "loss": 0.4108,
      "step": 11622
    },
    {
      "epoch": 2.3893514235789906,
      "grad_norm": 0.235540971159935,
      "learning_rate": 9.42258746600134e-06,
      "loss": 0.3878,
      "step": 11623
    },
    {
      "epoch": 2.389556994552369,
      "grad_norm": 0.2259136289358139,
      "learning_rate": 9.41647241350451e-06,
      "loss": 0.385,
      "step": 11624
    },
    {
      "epoch": 2.3897625655257477,
      "grad_norm": 0.2294631153345108,
      "learning_rate": 9.41035911404178e-06,
      "loss": 0.3939,
      "step": 11625
    },
    {
      "epoch": 2.3899681364991263,
      "grad_norm": 0.23754870891571045,
      "learning_rate": 9.404247567914311e-06,
      "loss": 0.3749,
      "step": 11626
    },
    {
      "epoch": 2.390173707472505,
      "grad_norm": 0.2304736226797104,
      "learning_rate": 9.398137775423193e-06,
      "loss": 0.4073,
      "step": 11627
    },
    {
      "epoch": 2.3903792784458835,
      "grad_norm": 0.22400328516960144,
      "learning_rate": 9.392029736869421e-06,
      "loss": 0.4066,
      "step": 11628
    },
    {
      "epoch": 2.390584849419262,
      "grad_norm": 0.2297855019569397,
      "learning_rate": 9.385923452553912e-06,
      "loss": 0.3995,
      "step": 11629
    },
    {
      "epoch": 2.3907904203926407,
      "grad_norm": 0.22708038985729218,
      "learning_rate": 9.379818922777499e-06,
      "loss": 0.3896,
      "step": 11630
    },
    {
      "epoch": 2.3909959913660193,
      "grad_norm": 0.22796861827373505,
      "learning_rate": 9.373716147840904e-06,
      "loss": 0.3939,
      "step": 11631
    },
    {
      "epoch": 2.391201562339398,
      "grad_norm": 0.2315075695514679,
      "learning_rate": 9.367615128044811e-06,
      "loss": 0.3848,
      "step": 11632
    },
    {
      "epoch": 2.391407133312776,
      "grad_norm": 0.23707285523414612,
      "learning_rate": 9.361515863689775e-06,
      "loss": 0.3923,
      "step": 11633
    },
    {
      "epoch": 2.391612704286155,
      "grad_norm": 0.2384757399559021,
      "learning_rate": 9.355418355076277e-06,
      "loss": 0.362,
      "step": 11634
    },
    {
      "epoch": 2.391818275259533,
      "grad_norm": 0.23415617644786835,
      "learning_rate": 9.349322602504717e-06,
      "loss": 0.4033,
      "step": 11635
    },
    {
      "epoch": 2.3920238462329118,
      "grad_norm": 0.23738761246204376,
      "learning_rate": 9.343228606275398e-06,
      "loss": 0.389,
      "step": 11636
    },
    {
      "epoch": 2.3922294172062903,
      "grad_norm": 0.2381700575351715,
      "learning_rate": 9.337136366688534e-06,
      "loss": 0.396,
      "step": 11637
    },
    {
      "epoch": 2.392434988179669,
      "grad_norm": 0.22899046540260315,
      "learning_rate": 9.331045884044288e-06,
      "loss": 0.3902,
      "step": 11638
    },
    {
      "epoch": 2.3926405591530475,
      "grad_norm": 0.24088416993618011,
      "learning_rate": 9.324957158642698e-06,
      "loss": 0.4191,
      "step": 11639
    },
    {
      "epoch": 2.392846130126426,
      "grad_norm": 0.12892726063728333,
      "learning_rate": 9.318870190783708e-06,
      "loss": 0.4628,
      "step": 11640
    },
    {
      "epoch": 2.3930517010998047,
      "grad_norm": 0.23511195182800293,
      "learning_rate": 9.312784980767221e-06,
      "loss": 0.4036,
      "step": 11641
    },
    {
      "epoch": 2.3932572720731833,
      "grad_norm": 0.1235305592417717,
      "learning_rate": 9.306701528893022e-06,
      "loss": 0.4505,
      "step": 11642
    },
    {
      "epoch": 2.393462843046562,
      "grad_norm": 0.33878177404403687,
      "learning_rate": 9.300619835460804e-06,
      "loss": 0.3857,
      "step": 11643
    },
    {
      "epoch": 2.3936684140199405,
      "grad_norm": 0.1223718672990799,
      "learning_rate": 9.294539900770187e-06,
      "loss": 0.4886,
      "step": 11644
    },
    {
      "epoch": 2.393873984993319,
      "grad_norm": 0.2304789125919342,
      "learning_rate": 9.288461725120694e-06,
      "loss": 0.3925,
      "step": 11645
    },
    {
      "epoch": 2.3940795559666976,
      "grad_norm": 0.2310042679309845,
      "learning_rate": 9.282385308811784e-06,
      "loss": 0.3862,
      "step": 11646
    },
    {
      "epoch": 2.3942851269400762,
      "grad_norm": 0.22798366844654083,
      "learning_rate": 9.276310652142813e-06,
      "loss": 0.3814,
      "step": 11647
    },
    {
      "epoch": 2.394490697913455,
      "grad_norm": 0.23074646294116974,
      "learning_rate": 9.270237755413042e-06,
      "loss": 0.3983,
      "step": 11648
    },
    {
      "epoch": 2.3946962688868334,
      "grad_norm": 0.11851920187473297,
      "learning_rate": 9.264166618921649e-06,
      "loss": 0.4514,
      "step": 11649
    },
    {
      "epoch": 2.3949018398602115,
      "grad_norm": 0.22583618760108948,
      "learning_rate": 9.258097242967744e-06,
      "loss": 0.3941,
      "step": 11650
    },
    {
      "epoch": 2.39510741083359,
      "grad_norm": 0.23350371420383453,
      "learning_rate": 9.252029627850334e-06,
      "loss": 0.3911,
      "step": 11651
    },
    {
      "epoch": 2.3953129818069687,
      "grad_norm": 0.22866030037403107,
      "learning_rate": 9.245963773868321e-06,
      "loss": 0.3851,
      "step": 11652
    },
    {
      "epoch": 2.3955185527803473,
      "grad_norm": 0.23153471946716309,
      "learning_rate": 9.239899681320573e-06,
      "loss": 0.3953,
      "step": 11653
    },
    {
      "epoch": 2.395724123753726,
      "grad_norm": 0.24449722468852997,
      "learning_rate": 9.233837350505824e-06,
      "loss": 0.3887,
      "step": 11654
    },
    {
      "epoch": 2.3959296947271045,
      "grad_norm": 0.23732249438762665,
      "learning_rate": 9.22777678172274e-06,
      "loss": 0.3858,
      "step": 11655
    },
    {
      "epoch": 2.396135265700483,
      "grad_norm": 0.231268510222435,
      "learning_rate": 9.221717975269895e-06,
      "loss": 0.3985,
      "step": 11656
    },
    {
      "epoch": 2.3963408366738617,
      "grad_norm": 0.23087145388126373,
      "learning_rate": 9.215660931445777e-06,
      "loss": 0.4104,
      "step": 11657
    },
    {
      "epoch": 2.3965464076472403,
      "grad_norm": 0.12480619549751282,
      "learning_rate": 9.209605650548777e-06,
      "loss": 0.4454,
      "step": 11658
    },
    {
      "epoch": 2.396751978620619,
      "grad_norm": 0.2369321882724762,
      "learning_rate": 9.203552132877233e-06,
      "loss": 0.3862,
      "step": 11659
    },
    {
      "epoch": 2.3969575495939974,
      "grad_norm": 0.22652901709079742,
      "learning_rate": 9.197500378729366e-06,
      "loss": 0.3744,
      "step": 11660
    },
    {
      "epoch": 2.397163120567376,
      "grad_norm": 0.2313791662454605,
      "learning_rate": 9.191450388403304e-06,
      "loss": 0.3994,
      "step": 11661
    },
    {
      "epoch": 2.3973686915407546,
      "grad_norm": 0.22537241876125336,
      "learning_rate": 9.18540216219712e-06,
      "loss": 0.3834,
      "step": 11662
    },
    {
      "epoch": 2.397574262514133,
      "grad_norm": 0.12685616314411163,
      "learning_rate": 9.17935570040878e-06,
      "loss": 0.4568,
      "step": 11663
    },
    {
      "epoch": 2.397779833487512,
      "grad_norm": 0.24072742462158203,
      "learning_rate": 9.173311003336157e-06,
      "loss": 0.3874,
      "step": 11664
    },
    {
      "epoch": 2.39798540446089,
      "grad_norm": 0.2216685265302658,
      "learning_rate": 9.167268071277045e-06,
      "loss": 0.4017,
      "step": 11665
    },
    {
      "epoch": 2.3981909754342685,
      "grad_norm": 0.1218583807349205,
      "learning_rate": 9.161226904529145e-06,
      "loss": 0.4435,
      "step": 11666
    },
    {
      "epoch": 2.398396546407647,
      "grad_norm": 0.23101285099983215,
      "learning_rate": 9.155187503390094e-06,
      "loss": 0.3781,
      "step": 11667
    },
    {
      "epoch": 2.3986021173810257,
      "grad_norm": 0.23491719365119934,
      "learning_rate": 9.14914986815742e-06,
      "loss": 0.394,
      "step": 11668
    },
    {
      "epoch": 2.3988076883544043,
      "grad_norm": 0.23189514875411987,
      "learning_rate": 9.143113999128563e-06,
      "loss": 0.3847,
      "step": 11669
    },
    {
      "epoch": 2.399013259327783,
      "grad_norm": 0.2177298218011856,
      "learning_rate": 9.137079896600887e-06,
      "loss": 0.3886,
      "step": 11670
    },
    {
      "epoch": 2.3992188303011615,
      "grad_norm": 0.1185031533241272,
      "learning_rate": 9.131047560871658e-06,
      "loss": 0.4323,
      "step": 11671
    },
    {
      "epoch": 2.39942440127454,
      "grad_norm": 0.22408519685268402,
      "learning_rate": 9.12501699223807e-06,
      "loss": 0.3679,
      "step": 11672
    },
    {
      "epoch": 2.3996299722479186,
      "grad_norm": 0.23200219869613647,
      "learning_rate": 9.118988190997197e-06,
      "loss": 0.3909,
      "step": 11673
    },
    {
      "epoch": 2.399835543221297,
      "grad_norm": 0.22250621020793915,
      "learning_rate": 9.112961157446087e-06,
      "loss": 0.3789,
      "step": 11674
    },
    {
      "epoch": 2.400041114194676,
      "grad_norm": 0.2219180166721344,
      "learning_rate": 9.106935891881641e-06,
      "loss": 0.3725,
      "step": 11675
    },
    {
      "epoch": 2.4002466851680544,
      "grad_norm": 0.2245936095714569,
      "learning_rate": 9.1009123946007e-06,
      "loss": 0.401,
      "step": 11676
    },
    {
      "epoch": 2.400452256141433,
      "grad_norm": 0.2297823131084442,
      "learning_rate": 9.094890665900018e-06,
      "loss": 0.3871,
      "step": 11677
    },
    {
      "epoch": 2.4006578271148116,
      "grad_norm": 0.2330087423324585,
      "learning_rate": 9.088870706076245e-06,
      "loss": 0.4198,
      "step": 11678
    },
    {
      "epoch": 2.40086339808819,
      "grad_norm": 0.23439383506774902,
      "learning_rate": 9.08285251542596e-06,
      "loss": 0.3966,
      "step": 11679
    },
    {
      "epoch": 2.4010689690615683,
      "grad_norm": 0.12889879941940308,
      "learning_rate": 9.076836094245659e-06,
      "loss": 0.4475,
      "step": 11680
    },
    {
      "epoch": 2.401274540034947,
      "grad_norm": 0.22724612057209015,
      "learning_rate": 9.070821442831747e-06,
      "loss": 0.3952,
      "step": 11681
    },
    {
      "epoch": 2.4014801110083255,
      "grad_norm": 0.22570443153381348,
      "learning_rate": 9.064808561480513e-06,
      "loss": 0.3949,
      "step": 11682
    },
    {
      "epoch": 2.401685681981704,
      "grad_norm": 0.22554244101047516,
      "learning_rate": 9.058797450488212e-06,
      "loss": 0.4023,
      "step": 11683
    },
    {
      "epoch": 2.4018912529550827,
      "grad_norm": 0.12734529376029968,
      "learning_rate": 9.052788110150975e-06,
      "loss": 0.4305,
      "step": 11684
    },
    {
      "epoch": 2.4020968239284612,
      "grad_norm": 0.23667073249816895,
      "learning_rate": 9.046780540764853e-06,
      "loss": 0.3961,
      "step": 11685
    },
    {
      "epoch": 2.40230239490184,
      "grad_norm": 0.12144028395414352,
      "learning_rate": 9.040774742625795e-06,
      "loss": 0.4524,
      "step": 11686
    },
    {
      "epoch": 2.4025079658752184,
      "grad_norm": 0.2276497334241867,
      "learning_rate": 9.034770716029703e-06,
      "loss": 0.3837,
      "step": 11687
    },
    {
      "epoch": 2.402713536848597,
      "grad_norm": 0.23129193484783173,
      "learning_rate": 9.028768461272352e-06,
      "loss": 0.384,
      "step": 11688
    },
    {
      "epoch": 2.4029191078219756,
      "grad_norm": 0.21576765179634094,
      "learning_rate": 9.022767978649457e-06,
      "loss": 0.4049,
      "step": 11689
    },
    {
      "epoch": 2.403124678795354,
      "grad_norm": 0.2269795835018158,
      "learning_rate": 9.016769268456623e-06,
      "loss": 0.3741,
      "step": 11690
    },
    {
      "epoch": 2.4033302497687328,
      "grad_norm": 0.22810319066047668,
      "learning_rate": 9.010772330989387e-06,
      "loss": 0.4111,
      "step": 11691
    },
    {
      "epoch": 2.4035358207421114,
      "grad_norm": 0.23659124970436096,
      "learning_rate": 9.00477716654318e-06,
      "loss": 0.4142,
      "step": 11692
    },
    {
      "epoch": 2.40374139171549,
      "grad_norm": 0.21605411171913147,
      "learning_rate": 8.998783775413351e-06,
      "loss": 0.3838,
      "step": 11693
    },
    {
      "epoch": 2.4039469626888685,
      "grad_norm": 0.23164892196655273,
      "learning_rate": 8.992792157895186e-06,
      "loss": 0.3911,
      "step": 11694
    },
    {
      "epoch": 2.4041525336622467,
      "grad_norm": 0.23304125666618347,
      "learning_rate": 8.986802314283856e-06,
      "loss": 0.3949,
      "step": 11695
    },
    {
      "epoch": 2.4043581046356253,
      "grad_norm": 0.2246290147304535,
      "learning_rate": 8.980814244874447e-06,
      "loss": 0.373,
      "step": 11696
    },
    {
      "epoch": 2.404563675609004,
      "grad_norm": 0.23660001158714294,
      "learning_rate": 8.974827949961973e-06,
      "loss": 0.3805,
      "step": 11697
    },
    {
      "epoch": 2.4047692465823824,
      "grad_norm": 0.2283889651298523,
      "learning_rate": 8.968843429841342e-06,
      "loss": 0.3934,
      "step": 11698
    },
    {
      "epoch": 2.404974817555761,
      "grad_norm": 0.22905899584293365,
      "learning_rate": 8.962860684807384e-06,
      "loss": 0.3994,
      "step": 11699
    },
    {
      "epoch": 2.4051803885291396,
      "grad_norm": 0.21978145837783813,
      "learning_rate": 8.956879715154832e-06,
      "loss": 0.3818,
      "step": 11700
    },
    {
      "epoch": 2.405385959502518,
      "grad_norm": 0.2412233203649521,
      "learning_rate": 8.950900521178367e-06,
      "loss": 0.3827,
      "step": 11701
    },
    {
      "epoch": 2.405591530475897,
      "grad_norm": 0.2382228821516037,
      "learning_rate": 8.944923103172537e-06,
      "loss": 0.3949,
      "step": 11702
    },
    {
      "epoch": 2.4057971014492754,
      "grad_norm": 0.24121670424938202,
      "learning_rate": 8.938947461431813e-06,
      "loss": 0.3916,
      "step": 11703
    },
    {
      "epoch": 2.406002672422654,
      "grad_norm": 0.12582828104496002,
      "learning_rate": 8.932973596250607e-06,
      "loss": 0.4566,
      "step": 11704
    },
    {
      "epoch": 2.4062082433960326,
      "grad_norm": 0.11934048682451248,
      "learning_rate": 8.927001507923221e-06,
      "loss": 0.456,
      "step": 11705
    },
    {
      "epoch": 2.406413814369411,
      "grad_norm": 0.22901131212711334,
      "learning_rate": 8.921031196743864e-06,
      "loss": 0.374,
      "step": 11706
    },
    {
      "epoch": 2.4066193853427897,
      "grad_norm": 0.23406758904457092,
      "learning_rate": 8.915062663006655e-06,
      "loss": 0.3698,
      "step": 11707
    },
    {
      "epoch": 2.4068249563161683,
      "grad_norm": 0.23848240077495575,
      "learning_rate": 8.909095907005659e-06,
      "loss": 0.3978,
      "step": 11708
    },
    {
      "epoch": 2.407030527289547,
      "grad_norm": 0.22418878972530365,
      "learning_rate": 8.903130929034822e-06,
      "loss": 0.3848,
      "step": 11709
    },
    {
      "epoch": 2.407236098262925,
      "grad_norm": 0.22299997508525848,
      "learning_rate": 8.897167729388002e-06,
      "loss": 0.3901,
      "step": 11710
    },
    {
      "epoch": 2.4074416692363036,
      "grad_norm": 0.22833383083343506,
      "learning_rate": 8.89120630835899e-06,
      "loss": 0.3744,
      "step": 11711
    },
    {
      "epoch": 2.4076472402096822,
      "grad_norm": 0.2442595660686493,
      "learning_rate": 8.885246666241468e-06,
      "loss": 0.3829,
      "step": 11712
    },
    {
      "epoch": 2.407852811183061,
      "grad_norm": 0.23331138491630554,
      "learning_rate": 8.879288803329043e-06,
      "loss": 0.4022,
      "step": 11713
    },
    {
      "epoch": 2.4080583821564394,
      "grad_norm": 0.22748929262161255,
      "learning_rate": 8.87333271991522e-06,
      "loss": 0.4032,
      "step": 11714
    },
    {
      "epoch": 2.408263953129818,
      "grad_norm": 0.23111633956432343,
      "learning_rate": 8.867378416293447e-06,
      "loss": 0.3815,
      "step": 11715
    },
    {
      "epoch": 2.4084695241031966,
      "grad_norm": 0.23724834620952606,
      "learning_rate": 8.861425892757058e-06,
      "loss": 0.384,
      "step": 11716
    },
    {
      "epoch": 2.408675095076575,
      "grad_norm": 0.22605331242084503,
      "learning_rate": 8.855475149599309e-06,
      "loss": 0.3709,
      "step": 11717
    },
    {
      "epoch": 2.4088806660499538,
      "grad_norm": 0.2561459541320801,
      "learning_rate": 8.849526187113354e-06,
      "loss": 0.3945,
      "step": 11718
    },
    {
      "epoch": 2.4090862370233324,
      "grad_norm": 0.2261964976787567,
      "learning_rate": 8.843579005592281e-06,
      "loss": 0.399,
      "step": 11719
    },
    {
      "epoch": 2.409291807996711,
      "grad_norm": 0.23060794174671173,
      "learning_rate": 8.837633605329074e-06,
      "loss": 0.4068,
      "step": 11720
    },
    {
      "epoch": 2.4094973789700895,
      "grad_norm": 0.2191411554813385,
      "learning_rate": 8.831689986616623e-06,
      "loss": 0.3823,
      "step": 11721
    },
    {
      "epoch": 2.409702949943468,
      "grad_norm": 0.2240157574415207,
      "learning_rate": 8.82574814974777e-06,
      "loss": 0.3942,
      "step": 11722
    },
    {
      "epoch": 2.4099085209168467,
      "grad_norm": 0.22615095973014832,
      "learning_rate": 8.819808095015225e-06,
      "loss": 0.3915,
      "step": 11723
    },
    {
      "epoch": 2.4101140918902253,
      "grad_norm": 0.12168576568365097,
      "learning_rate": 8.81386982271163e-06,
      "loss": 0.4526,
      "step": 11724
    },
    {
      "epoch": 2.4103196628636034,
      "grad_norm": 0.12334515154361725,
      "learning_rate": 8.807933333129526e-06,
      "loss": 0.4541,
      "step": 11725
    },
    {
      "epoch": 2.410525233836982,
      "grad_norm": 0.2267734259366989,
      "learning_rate": 8.801998626561397e-06,
      "loss": 0.3867,
      "step": 11726
    },
    {
      "epoch": 2.4107308048103606,
      "grad_norm": 0.23022069036960602,
      "learning_rate": 8.796065703299608e-06,
      "loss": 0.4002,
      "step": 11727
    },
    {
      "epoch": 2.410936375783739,
      "grad_norm": 0.2284584641456604,
      "learning_rate": 8.79013456363643e-06,
      "loss": 0.3759,
      "step": 11728
    },
    {
      "epoch": 2.411141946757118,
      "grad_norm": 0.24320749938488007,
      "learning_rate": 8.78420520786409e-06,
      "loss": 0.3935,
      "step": 11729
    },
    {
      "epoch": 2.4113475177304964,
      "grad_norm": 0.2366839051246643,
      "learning_rate": 8.778277636274688e-06,
      "loss": 0.399,
      "step": 11730
    },
    {
      "epoch": 2.411553088703875,
      "grad_norm": 0.22736315429210663,
      "learning_rate": 8.772351849160245e-06,
      "loss": 0.3755,
      "step": 11731
    },
    {
      "epoch": 2.4117586596772536,
      "grad_norm": 0.23666198551654816,
      "learning_rate": 8.766427846812702e-06,
      "loss": 0.3967,
      "step": 11732
    },
    {
      "epoch": 2.411964230650632,
      "grad_norm": 0.2277197241783142,
      "learning_rate": 8.760505629523901e-06,
      "loss": 0.3715,
      "step": 11733
    },
    {
      "epoch": 2.4121698016240107,
      "grad_norm": 0.21670396625995636,
      "learning_rate": 8.754585197585605e-06,
      "loss": 0.3729,
      "step": 11734
    },
    {
      "epoch": 2.4123753725973893,
      "grad_norm": 0.22383198142051697,
      "learning_rate": 8.748666551289474e-06,
      "loss": 0.39,
      "step": 11735
    },
    {
      "epoch": 2.412580943570768,
      "grad_norm": 0.232547789812088,
      "learning_rate": 8.742749690927115e-06,
      "loss": 0.3888,
      "step": 11736
    },
    {
      "epoch": 2.4127865145441465,
      "grad_norm": 0.23317821323871613,
      "learning_rate": 8.736834616790018e-06,
      "loss": 0.4036,
      "step": 11737
    },
    {
      "epoch": 2.412992085517525,
      "grad_norm": 0.12473565340042114,
      "learning_rate": 8.73092132916958e-06,
      "loss": 0.4348,
      "step": 11738
    },
    {
      "epoch": 2.4131976564909037,
      "grad_norm": 0.22561731934547424,
      "learning_rate": 8.72500982835713e-06,
      "loss": 0.3938,
      "step": 11739
    },
    {
      "epoch": 2.413403227464282,
      "grad_norm": 0.22302691638469696,
      "learning_rate": 8.719100114643891e-06,
      "loss": 0.3842,
      "step": 11740
    },
    {
      "epoch": 2.413608798437661,
      "grad_norm": 0.2331441342830658,
      "learning_rate": 8.71319218832102e-06,
      "loss": 0.3891,
      "step": 11741
    },
    {
      "epoch": 2.413814369411039,
      "grad_norm": 0.22362032532691956,
      "learning_rate": 8.70728604967955e-06,
      "loss": 0.3858,
      "step": 11742
    },
    {
      "epoch": 2.4140199403844176,
      "grad_norm": 0.23315522074699402,
      "learning_rate": 8.701381699010476e-06,
      "loss": 0.3939,
      "step": 11743
    },
    {
      "epoch": 2.414225511357796,
      "grad_norm": 0.11996540427207947,
      "learning_rate": 8.69547913660467e-06,
      "loss": 0.4391,
      "step": 11744
    },
    {
      "epoch": 2.4144310823311748,
      "grad_norm": 0.12286810576915741,
      "learning_rate": 8.689578362752919e-06,
      "loss": 0.4379,
      "step": 11745
    },
    {
      "epoch": 2.4146366533045533,
      "grad_norm": 0.229908749461174,
      "learning_rate": 8.683679377745915e-06,
      "loss": 0.3843,
      "step": 11746
    },
    {
      "epoch": 2.414842224277932,
      "grad_norm": 0.2293166071176529,
      "learning_rate": 8.677782181874295e-06,
      "loss": 0.3845,
      "step": 11747
    },
    {
      "epoch": 2.4150477952513105,
      "grad_norm": 0.22157427668571472,
      "learning_rate": 8.671886775428584e-06,
      "loss": 0.3857,
      "step": 11748
    },
    {
      "epoch": 2.415253366224689,
      "grad_norm": 0.22539031505584717,
      "learning_rate": 8.665993158699197e-06,
      "loss": 0.3803,
      "step": 11749
    },
    {
      "epoch": 2.4154589371980677,
      "grad_norm": 0.23554009199142456,
      "learning_rate": 8.660101331976515e-06,
      "loss": 0.3964,
      "step": 11750
    },
    {
      "epoch": 2.4156645081714463,
      "grad_norm": 0.11748301237821579,
      "learning_rate": 8.654211295550791e-06,
      "loss": 0.4473,
      "step": 11751
    },
    {
      "epoch": 2.415870079144825,
      "grad_norm": 0.12662889063358307,
      "learning_rate": 8.648323049712192e-06,
      "loss": 0.4615,
      "step": 11752
    },
    {
      "epoch": 2.4160756501182035,
      "grad_norm": 0.23614639043807983,
      "learning_rate": 8.642436594750813e-06,
      "loss": 0.3832,
      "step": 11753
    },
    {
      "epoch": 2.416281221091582,
      "grad_norm": 0.23256917297840118,
      "learning_rate": 8.636551930956645e-06,
      "loss": 0.4061,
      "step": 11754
    },
    {
      "epoch": 2.41648679206496,
      "grad_norm": 0.23401497304439545,
      "learning_rate": 8.630669058619595e-06,
      "loss": 0.4095,
      "step": 11755
    },
    {
      "epoch": 2.4166923630383392,
      "grad_norm": 0.12618698179721832,
      "learning_rate": 8.624787978029495e-06,
      "loss": 0.4405,
      "step": 11756
    },
    {
      "epoch": 2.4168979340117174,
      "grad_norm": 0.22936862707138062,
      "learning_rate": 8.61890868947608e-06,
      "loss": 0.391,
      "step": 11757
    },
    {
      "epoch": 2.417103504985096,
      "grad_norm": 0.2273116260766983,
      "learning_rate": 8.613031193248985e-06,
      "loss": 0.4034,
      "step": 11758
    },
    {
      "epoch": 2.4173090759584746,
      "grad_norm": 0.23834945261478424,
      "learning_rate": 8.607155489637773e-06,
      "loss": 0.3938,
      "step": 11759
    },
    {
      "epoch": 2.417514646931853,
      "grad_norm": 0.22730384767055511,
      "learning_rate": 8.601281578931908e-06,
      "loss": 0.4146,
      "step": 11760
    },
    {
      "epoch": 2.4177202179052317,
      "grad_norm": 0.23497353494167328,
      "learning_rate": 8.595409461420778e-06,
      "loss": 0.3847,
      "step": 11761
    },
    {
      "epoch": 2.4179257888786103,
      "grad_norm": 0.23128505051136017,
      "learning_rate": 8.589539137393653e-06,
      "loss": 0.3937,
      "step": 11762
    },
    {
      "epoch": 2.418131359851989,
      "grad_norm": 0.22396472096443176,
      "learning_rate": 8.583670607139764e-06,
      "loss": 0.3887,
      "step": 11763
    },
    {
      "epoch": 2.4183369308253675,
      "grad_norm": 0.2318245768547058,
      "learning_rate": 8.577803870948217e-06,
      "loss": 0.3752,
      "step": 11764
    },
    {
      "epoch": 2.418542501798746,
      "grad_norm": 0.12530356645584106,
      "learning_rate": 8.571938929108033e-06,
      "loss": 0.4542,
      "step": 11765
    },
    {
      "epoch": 2.4187480727721247,
      "grad_norm": 0.23986156284809113,
      "learning_rate": 8.566075781908158e-06,
      "loss": 0.3791,
      "step": 11766
    },
    {
      "epoch": 2.4189536437455033,
      "grad_norm": 0.2401699423789978,
      "learning_rate": 8.56021442963742e-06,
      "loss": 0.3889,
      "step": 11767
    },
    {
      "epoch": 2.419159214718882,
      "grad_norm": 0.12486173957586288,
      "learning_rate": 8.554354872584612e-06,
      "loss": 0.4482,
      "step": 11768
    },
    {
      "epoch": 2.4193647856922604,
      "grad_norm": 0.12108970433473587,
      "learning_rate": 8.5484971110384e-06,
      "loss": 0.4339,
      "step": 11769
    },
    {
      "epoch": 2.4195703566656386,
      "grad_norm": 0.2209288775920868,
      "learning_rate": 8.542641145287342e-06,
      "loss": 0.3695,
      "step": 11770
    },
    {
      "epoch": 2.4197759276390176,
      "grad_norm": 0.22829124331474304,
      "learning_rate": 8.536786975619966e-06,
      "loss": 0.3876,
      "step": 11771
    },
    {
      "epoch": 2.4199814986123958,
      "grad_norm": 0.24268139898777008,
      "learning_rate": 8.53093460232467e-06,
      "loss": 0.3802,
      "step": 11772
    },
    {
      "epoch": 2.4201870695857743,
      "grad_norm": 0.23681510984897614,
      "learning_rate": 8.525084025689766e-06,
      "loss": 0.3856,
      "step": 11773
    },
    {
      "epoch": 2.420392640559153,
      "grad_norm": 0.23241069912910461,
      "learning_rate": 8.519235246003491e-06,
      "loss": 0.3781,
      "step": 11774
    },
    {
      "epoch": 2.4205982115325315,
      "grad_norm": 0.21853965520858765,
      "learning_rate": 8.513388263553982e-06,
      "loss": 0.3835,
      "step": 11775
    },
    {
      "epoch": 2.42080378250591,
      "grad_norm": 0.23458018898963928,
      "learning_rate": 8.507543078629288e-06,
      "loss": 0.3982,
      "step": 11776
    },
    {
      "epoch": 2.4210093534792887,
      "grad_norm": 0.23409396409988403,
      "learning_rate": 8.501699691517392e-06,
      "loss": 0.3817,
      "step": 11777
    },
    {
      "epoch": 2.4212149244526673,
      "grad_norm": 0.23286281526088715,
      "learning_rate": 8.49585810250616e-06,
      "loss": 0.4137,
      "step": 11778
    },
    {
      "epoch": 2.421420495426046,
      "grad_norm": 0.11904696375131607,
      "learning_rate": 8.49001831188338e-06,
      "loss": 0.453,
      "step": 11779
    },
    {
      "epoch": 2.4216260663994245,
      "grad_norm": 0.24150028824806213,
      "learning_rate": 8.484180319936748e-06,
      "loss": 0.3943,
      "step": 11780
    },
    {
      "epoch": 2.421831637372803,
      "grad_norm": 0.2359628528356552,
      "learning_rate": 8.478344126953874e-06,
      "loss": 0.3806,
      "step": 11781
    },
    {
      "epoch": 2.4220372083461816,
      "grad_norm": 0.12654449045658112,
      "learning_rate": 8.472509733222289e-06,
      "loss": 0.4553,
      "step": 11782
    },
    {
      "epoch": 2.4222427793195602,
      "grad_norm": 0.23700331151485443,
      "learning_rate": 8.466677139029405e-06,
      "loss": 0.4043,
      "step": 11783
    },
    {
      "epoch": 2.422448350292939,
      "grad_norm": 0.24000297486782074,
      "learning_rate": 8.460846344662597e-06,
      "loss": 0.396,
      "step": 11784
    },
    {
      "epoch": 2.422653921266317,
      "grad_norm": 0.12040732055902481,
      "learning_rate": 8.455017350409105e-06,
      "loss": 0.4522,
      "step": 11785
    },
    {
      "epoch": 2.422859492239696,
      "grad_norm": 0.21814100444316864,
      "learning_rate": 8.449190156556098e-06,
      "loss": 0.3766,
      "step": 11786
    },
    {
      "epoch": 2.423065063213074,
      "grad_norm": 0.12481694668531418,
      "learning_rate": 8.443364763390649e-06,
      "loss": 0.4527,
      "step": 11787
    },
    {
      "epoch": 2.4232706341864527,
      "grad_norm": 0.22663375735282898,
      "learning_rate": 8.43754117119976e-06,
      "loss": 0.383,
      "step": 11788
    },
    {
      "epoch": 2.4234762051598313,
      "grad_norm": 0.12208539247512817,
      "learning_rate": 8.431719380270307e-06,
      "loss": 0.4564,
      "step": 11789
    },
    {
      "epoch": 2.42368177613321,
      "grad_norm": 0.22577068209648132,
      "learning_rate": 8.425899390889138e-06,
      "loss": 0.3758,
      "step": 11790
    },
    {
      "epoch": 2.4238873471065885,
      "grad_norm": 0.12725965678691864,
      "learning_rate": 8.420081203342941e-06,
      "loss": 0.435,
      "step": 11791
    },
    {
      "epoch": 2.424092918079967,
      "grad_norm": 0.23309412598609924,
      "learning_rate": 8.414264817918385e-06,
      "loss": 0.3846,
      "step": 11792
    },
    {
      "epoch": 2.4242984890533457,
      "grad_norm": 0.2279675304889679,
      "learning_rate": 8.408450234901998e-06,
      "loss": 0.3934,
      "step": 11793
    },
    {
      "epoch": 2.4245040600267243,
      "grad_norm": 0.22592322528362274,
      "learning_rate": 8.402637454580244e-06,
      "loss": 0.3864,
      "step": 11794
    },
    {
      "epoch": 2.424709631000103,
      "grad_norm": 0.22809530794620514,
      "learning_rate": 8.396826477239479e-06,
      "loss": 0.3911,
      "step": 11795
    },
    {
      "epoch": 2.4249152019734814,
      "grad_norm": 0.23382043838500977,
      "learning_rate": 8.391017303165995e-06,
      "loss": 0.392,
      "step": 11796
    },
    {
      "epoch": 2.42512077294686,
      "grad_norm": 0.23308755457401276,
      "learning_rate": 8.38520993264597e-06,
      "loss": 0.4044,
      "step": 11797
    },
    {
      "epoch": 2.4253263439202386,
      "grad_norm": 0.22026486694812775,
      "learning_rate": 8.379404365965524e-06,
      "loss": 0.3994,
      "step": 11798
    },
    {
      "epoch": 2.425531914893617,
      "grad_norm": 0.12114302068948746,
      "learning_rate": 8.373600603410658e-06,
      "loss": 0.4553,
      "step": 11799
    },
    {
      "epoch": 2.4257374858669953,
      "grad_norm": 0.23082832992076874,
      "learning_rate": 8.367798645267303e-06,
      "loss": 0.3775,
      "step": 11800
    },
    {
      "epoch": 2.4259430568403744,
      "grad_norm": 0.2422942817211151,
      "learning_rate": 8.361998491821289e-06,
      "loss": 0.3988,
      "step": 11801
    },
    {
      "epoch": 2.4261486278137525,
      "grad_norm": 0.23066774010658264,
      "learning_rate": 8.356200143358363e-06,
      "loss": 0.3964,
      "step": 11802
    },
    {
      "epoch": 2.426354198787131,
      "grad_norm": 0.1255854219198227,
      "learning_rate": 8.35040360016418e-06,
      "loss": 0.4471,
      "step": 11803
    },
    {
      "epoch": 2.4265597697605097,
      "grad_norm": 0.23167705535888672,
      "learning_rate": 8.344608862524306e-06,
      "loss": 0.3935,
      "step": 11804
    },
    {
      "epoch": 2.4267653407338883,
      "grad_norm": 0.22956189513206482,
      "learning_rate": 8.338815930724234e-06,
      "loss": 0.3887,
      "step": 11805
    },
    {
      "epoch": 2.426970911707267,
      "grad_norm": 0.23048321902751923,
      "learning_rate": 8.33302480504935e-06,
      "loss": 0.3993,
      "step": 11806
    },
    {
      "epoch": 2.4271764826806455,
      "grad_norm": 0.22636932134628296,
      "learning_rate": 8.327235485784948e-06,
      "loss": 0.3955,
      "step": 11807
    },
    {
      "epoch": 2.427382053654024,
      "grad_norm": 0.2231477051973343,
      "learning_rate": 8.321447973216248e-06,
      "loss": 0.3885,
      "step": 11808
    },
    {
      "epoch": 2.4275876246274026,
      "grad_norm": 0.23407144844532013,
      "learning_rate": 8.315662267628374e-06,
      "loss": 0.3875,
      "step": 11809
    },
    {
      "epoch": 2.427793195600781,
      "grad_norm": 0.22613434493541718,
      "learning_rate": 8.309878369306348e-06,
      "loss": 0.3699,
      "step": 11810
    },
    {
      "epoch": 2.42799876657416,
      "grad_norm": 0.244913712143898,
      "learning_rate": 8.30409627853513e-06,
      "loss": 0.3858,
      "step": 11811
    },
    {
      "epoch": 2.4282043375475384,
      "grad_norm": 0.24289999902248383,
      "learning_rate": 8.298315995599578e-06,
      "loss": 0.3877,
      "step": 11812
    },
    {
      "epoch": 2.428409908520917,
      "grad_norm": 0.2354183942079544,
      "learning_rate": 8.292537520784438e-06,
      "loss": 0.3713,
      "step": 11813
    },
    {
      "epoch": 2.4286154794942956,
      "grad_norm": 0.22426529228687286,
      "learning_rate": 8.286760854374421e-06,
      "loss": 0.374,
      "step": 11814
    },
    {
      "epoch": 2.428821050467674,
      "grad_norm": 0.12690819799900055,
      "learning_rate": 8.280985996654097e-06,
      "loss": 0.4512,
      "step": 11815
    },
    {
      "epoch": 2.4290266214410527,
      "grad_norm": 0.12370403110980988,
      "learning_rate": 8.275212947907967e-06,
      "loss": 0.4472,
      "step": 11816
    },
    {
      "epoch": 2.429232192414431,
      "grad_norm": 0.237684965133667,
      "learning_rate": 8.26944170842044e-06,
      "loss": 0.3942,
      "step": 11817
    },
    {
      "epoch": 2.4294377633878095,
      "grad_norm": 0.23901338875293732,
      "learning_rate": 8.26367227847584e-06,
      "loss": 0.3923,
      "step": 11818
    },
    {
      "epoch": 2.429643334361188,
      "grad_norm": 0.22691085934638977,
      "learning_rate": 8.257904658358407e-06,
      "loss": 0.3927,
      "step": 11819
    },
    {
      "epoch": 2.4298489053345667,
      "grad_norm": 0.2229507714509964,
      "learning_rate": 8.25213884835228e-06,
      "loss": 0.3897,
      "step": 11820
    },
    {
      "epoch": 2.4300544763079452,
      "grad_norm": 0.2375117689371109,
      "learning_rate": 8.246374848741511e-06,
      "loss": 0.3892,
      "step": 11821
    },
    {
      "epoch": 2.430260047281324,
      "grad_norm": 0.23138779401779175,
      "learning_rate": 8.24061265981007e-06,
      "loss": 0.3968,
      "step": 11822
    },
    {
      "epoch": 2.4304656182547024,
      "grad_norm": 0.23814985156059265,
      "learning_rate": 8.234852281841833e-06,
      "loss": 0.3955,
      "step": 11823
    },
    {
      "epoch": 2.430671189228081,
      "grad_norm": 0.2250833660364151,
      "learning_rate": 8.229093715120578e-06,
      "loss": 0.37,
      "step": 11824
    },
    {
      "epoch": 2.4308767602014596,
      "grad_norm": 0.11755650490522385,
      "learning_rate": 8.223336959930003e-06,
      "loss": 0.4526,
      "step": 11825
    },
    {
      "epoch": 2.431082331174838,
      "grad_norm": 0.11656010895967484,
      "learning_rate": 8.217582016553732e-06,
      "loss": 0.427,
      "step": 11826
    },
    {
      "epoch": 2.4312879021482168,
      "grad_norm": 0.2219466120004654,
      "learning_rate": 8.211828885275272e-06,
      "loss": 0.3825,
      "step": 11827
    },
    {
      "epoch": 2.4314934731215954,
      "grad_norm": 0.22404515743255615,
      "learning_rate": 8.206077566378058e-06,
      "loss": 0.3639,
      "step": 11828
    },
    {
      "epoch": 2.431699044094974,
      "grad_norm": 0.22220522165298462,
      "learning_rate": 8.200328060145428e-06,
      "loss": 0.3856,
      "step": 11829
    },
    {
      "epoch": 2.4319046150683525,
      "grad_norm": 0.22566857933998108,
      "learning_rate": 8.194580366860628e-06,
      "loss": 0.394,
      "step": 11830
    },
    {
      "epoch": 2.432110186041731,
      "grad_norm": 0.2224518358707428,
      "learning_rate": 8.18883448680682e-06,
      "loss": 0.3692,
      "step": 11831
    },
    {
      "epoch": 2.4323157570151093,
      "grad_norm": 0.23694801330566406,
      "learning_rate": 8.18309042026709e-06,
      "loss": 0.4063,
      "step": 11832
    },
    {
      "epoch": 2.432521327988488,
      "grad_norm": 0.1398681104183197,
      "learning_rate": 8.177348167524418e-06,
      "loss": 0.4508,
      "step": 11833
    },
    {
      "epoch": 2.4327268989618664,
      "grad_norm": 0.2483135610818863,
      "learning_rate": 8.171607728861677e-06,
      "loss": 0.3977,
      "step": 11834
    },
    {
      "epoch": 2.432932469935245,
      "grad_norm": 0.23512189090251923,
      "learning_rate": 8.165869104561702e-06,
      "loss": 0.3918,
      "step": 11835
    },
    {
      "epoch": 2.4331380409086236,
      "grad_norm": 0.11742374300956726,
      "learning_rate": 8.16013229490719e-06,
      "loss": 0.4327,
      "step": 11836
    },
    {
      "epoch": 2.433343611882002,
      "grad_norm": 0.24561573565006256,
      "learning_rate": 8.154397300180771e-06,
      "loss": 0.409,
      "step": 11837
    },
    {
      "epoch": 2.433549182855381,
      "grad_norm": 0.22359338402748108,
      "learning_rate": 8.148664120664973e-06,
      "loss": 0.3741,
      "step": 11838
    },
    {
      "epoch": 2.4337547538287594,
      "grad_norm": 0.22402852773666382,
      "learning_rate": 8.142932756642262e-06,
      "loss": 0.3976,
      "step": 11839
    },
    {
      "epoch": 2.433960324802138,
      "grad_norm": 0.22858619689941406,
      "learning_rate": 8.137203208394986e-06,
      "loss": 0.3971,
      "step": 11840
    },
    {
      "epoch": 2.4341658957755166,
      "grad_norm": 0.1260390430688858,
      "learning_rate": 8.13147547620541e-06,
      "loss": 0.449,
      "step": 11841
    },
    {
      "epoch": 2.434371466748895,
      "grad_norm": 0.2319212555885315,
      "learning_rate": 8.12574956035571e-06,
      "loss": 0.3938,
      "step": 11842
    },
    {
      "epoch": 2.4345770377222737,
      "grad_norm": 0.2238619327545166,
      "learning_rate": 8.120025461127984e-06,
      "loss": 0.3789,
      "step": 11843
    },
    {
      "epoch": 2.4347826086956523,
      "grad_norm": 0.2239915281534195,
      "learning_rate": 8.114303178804226e-06,
      "loss": 0.3881,
      "step": 11844
    },
    {
      "epoch": 2.434988179669031,
      "grad_norm": 0.2295527458190918,
      "learning_rate": 8.108582713666335e-06,
      "loss": 0.3872,
      "step": 11845
    },
    {
      "epoch": 2.4351937506424095,
      "grad_norm": 0.22697387635707855,
      "learning_rate": 8.102864065996159e-06,
      "loss": 0.3928,
      "step": 11846
    },
    {
      "epoch": 2.4353993216157876,
      "grad_norm": 0.12275702506303787,
      "learning_rate": 8.09714723607541e-06,
      "loss": 0.4449,
      "step": 11847
    },
    {
      "epoch": 2.4356048925891662,
      "grad_norm": 0.2424585521221161,
      "learning_rate": 8.09143222418573e-06,
      "loss": 0.4016,
      "step": 11848
    },
    {
      "epoch": 2.435810463562545,
      "grad_norm": 0.23178981244564056,
      "learning_rate": 8.085719030608682e-06,
      "loss": 0.3917,
      "step": 11849
    },
    {
      "epoch": 2.4360160345359234,
      "grad_norm": 0.23368723690509796,
      "learning_rate": 8.080007655625715e-06,
      "loss": 0.3883,
      "step": 11850
    },
    {
      "epoch": 2.436221605509302,
      "grad_norm": 0.23010197281837463,
      "learning_rate": 8.074298099518207e-06,
      "loss": 0.4075,
      "step": 11851
    },
    {
      "epoch": 2.4364271764826806,
      "grad_norm": 0.23444652557373047,
      "learning_rate": 8.068590362567436e-06,
      "loss": 0.3887,
      "step": 11852
    },
    {
      "epoch": 2.436632747456059,
      "grad_norm": 0.22641274333000183,
      "learning_rate": 8.062884445054602e-06,
      "loss": 0.3826,
      "step": 11853
    },
    {
      "epoch": 2.4368383184294378,
      "grad_norm": 0.12579554319381714,
      "learning_rate": 8.057180347260816e-06,
      "loss": 0.4397,
      "step": 11854
    },
    {
      "epoch": 2.4370438894028164,
      "grad_norm": 0.2325548529624939,
      "learning_rate": 8.05147806946707e-06,
      "loss": 0.3948,
      "step": 11855
    },
    {
      "epoch": 2.437249460376195,
      "grad_norm": 0.22521813213825226,
      "learning_rate": 8.045777611954315e-06,
      "loss": 0.3773,
      "step": 11856
    },
    {
      "epoch": 2.4374550313495735,
      "grad_norm": 0.22666728496551514,
      "learning_rate": 8.040078975003372e-06,
      "loss": 0.3918,
      "step": 11857
    },
    {
      "epoch": 2.437660602322952,
      "grad_norm": 0.2205967754125595,
      "learning_rate": 8.03438215889499e-06,
      "loss": 0.3929,
      "step": 11858
    },
    {
      "epoch": 2.4378661732963307,
      "grad_norm": 0.23035195469856262,
      "learning_rate": 8.028687163909804e-06,
      "loss": 0.3795,
      "step": 11859
    },
    {
      "epoch": 2.4380717442697093,
      "grad_norm": 0.22747023403644562,
      "learning_rate": 8.022993990328418e-06,
      "loss": 0.3908,
      "step": 11860
    },
    {
      "epoch": 2.438277315243088,
      "grad_norm": 0.2318742722272873,
      "learning_rate": 8.017302638431285e-06,
      "loss": 0.3972,
      "step": 11861
    },
    {
      "epoch": 2.438482886216466,
      "grad_norm": 0.22795268893241882,
      "learning_rate": 8.011613108498795e-06,
      "loss": 0.3828,
      "step": 11862
    },
    {
      "epoch": 2.4386884571898446,
      "grad_norm": 0.23046202957630157,
      "learning_rate": 8.00592540081124e-06,
      "loss": 0.391,
      "step": 11863
    },
    {
      "epoch": 2.438894028163223,
      "grad_norm": 0.23023991286754608,
      "learning_rate": 8.000239515648832e-06,
      "loss": 0.3984,
      "step": 11864
    },
    {
      "epoch": 2.439099599136602,
      "grad_norm": 0.2348402440547943,
      "learning_rate": 7.994555453291689e-06,
      "loss": 0.4067,
      "step": 11865
    },
    {
      "epoch": 2.4393051701099804,
      "grad_norm": 0.22935132682323456,
      "learning_rate": 7.98887321401982e-06,
      "loss": 0.3785,
      "step": 11866
    },
    {
      "epoch": 2.439510741083359,
      "grad_norm": 0.23405125737190247,
      "learning_rate": 7.983192798113195e-06,
      "loss": 0.3775,
      "step": 11867
    },
    {
      "epoch": 2.4397163120567376,
      "grad_norm": 0.22010092437267303,
      "learning_rate": 7.977514205851645e-06,
      "loss": 0.3812,
      "step": 11868
    },
    {
      "epoch": 2.439921883030116,
      "grad_norm": 0.22667670249938965,
      "learning_rate": 7.97183743751492e-06,
      "loss": 0.3858,
      "step": 11869
    },
    {
      "epoch": 2.4401274540034947,
      "grad_norm": 0.22953353822231293,
      "learning_rate": 7.966162493382703e-06,
      "loss": 0.3841,
      "step": 11870
    },
    {
      "epoch": 2.4403330249768733,
      "grad_norm": 0.2351302206516266,
      "learning_rate": 7.960489373734561e-06,
      "loss": 0.3691,
      "step": 11871
    },
    {
      "epoch": 2.440538595950252,
      "grad_norm": 0.22747184336185455,
      "learning_rate": 7.954818078849988e-06,
      "loss": 0.3671,
      "step": 11872
    },
    {
      "epoch": 2.4407441669236305,
      "grad_norm": 0.2345331311225891,
      "learning_rate": 7.949148609008362e-06,
      "loss": 0.3826,
      "step": 11873
    },
    {
      "epoch": 2.440949737897009,
      "grad_norm": 0.23388345539569855,
      "learning_rate": 7.943480964489024e-06,
      "loss": 0.3909,
      "step": 11874
    },
    {
      "epoch": 2.4411553088703877,
      "grad_norm": 0.2370399385690689,
      "learning_rate": 7.937815145571177e-06,
      "loss": 0.384,
      "step": 11875
    },
    {
      "epoch": 2.4413608798437663,
      "grad_norm": 0.23738206923007965,
      "learning_rate": 7.93215115253394e-06,
      "loss": 0.4001,
      "step": 11876
    },
    {
      "epoch": 2.4415664508171444,
      "grad_norm": 0.2355763465166092,
      "learning_rate": 7.926488985656372e-06,
      "loss": 0.3872,
      "step": 11877
    },
    {
      "epoch": 2.441772021790523,
      "grad_norm": 0.2362237423658371,
      "learning_rate": 7.920828645217405e-06,
      "loss": 0.3833,
      "step": 11878
    },
    {
      "epoch": 2.4419775927639016,
      "grad_norm": 0.22331978380680084,
      "learning_rate": 7.915170131495912e-06,
      "loss": 0.3734,
      "step": 11879
    },
    {
      "epoch": 2.44218316373728,
      "grad_norm": 0.23111921548843384,
      "learning_rate": 7.909513444770636e-06,
      "loss": 0.3911,
      "step": 11880
    },
    {
      "epoch": 2.4423887347106588,
      "grad_norm": 0.12303854525089264,
      "learning_rate": 7.90385858532028e-06,
      "loss": 0.4474,
      "step": 11881
    },
    {
      "epoch": 2.4425943056840373,
      "grad_norm": 0.23098498582839966,
      "learning_rate": 7.89820555342343e-06,
      "loss": 0.3952,
      "step": 11882
    },
    {
      "epoch": 2.442799876657416,
      "grad_norm": 0.2414843589067459,
      "learning_rate": 7.89255434935858e-06,
      "loss": 0.3926,
      "step": 11883
    },
    {
      "epoch": 2.4430054476307945,
      "grad_norm": 0.22785574197769165,
      "learning_rate": 7.886904973404134e-06,
      "loss": 0.3836,
      "step": 11884
    },
    {
      "epoch": 2.443211018604173,
      "grad_norm": 0.13417033851146698,
      "learning_rate": 7.881257425838412e-06,
      "loss": 0.4613,
      "step": 11885
    },
    {
      "epoch": 2.4434165895775517,
      "grad_norm": 0.11953188478946686,
      "learning_rate": 7.875611706939649e-06,
      "loss": 0.4594,
      "step": 11886
    },
    {
      "epoch": 2.4436221605509303,
      "grad_norm": 0.22670340538024902,
      "learning_rate": 7.869967816985965e-06,
      "loss": 0.3894,
      "step": 11887
    },
    {
      "epoch": 2.443827731524309,
      "grad_norm": 0.24815067648887634,
      "learning_rate": 7.86432575625543e-06,
      "loss": 0.3958,
      "step": 11888
    },
    {
      "epoch": 2.4440333024976875,
      "grad_norm": 0.22556784749031067,
      "learning_rate": 7.858685525025997e-06,
      "loss": 0.3895,
      "step": 11889
    },
    {
      "epoch": 2.444238873471066,
      "grad_norm": 0.2190685123205185,
      "learning_rate": 7.85304712357553e-06,
      "loss": 0.3572,
      "step": 11890
    },
    {
      "epoch": 2.4444444444444446,
      "grad_norm": 0.22397378087043762,
      "learning_rate": 7.847410552181804e-06,
      "loss": 0.3832,
      "step": 11891
    },
    {
      "epoch": 2.444650015417823,
      "grad_norm": 0.22663743793964386,
      "learning_rate": 7.841775811122514e-06,
      "loss": 0.3838,
      "step": 11892
    },
    {
      "epoch": 2.4448555863912014,
      "grad_norm": 0.22418825328350067,
      "learning_rate": 7.83614290067525e-06,
      "loss": 0.4074,
      "step": 11893
    },
    {
      "epoch": 2.44506115736458,
      "grad_norm": 0.2320103645324707,
      "learning_rate": 7.83051182111751e-06,
      "loss": 0.393,
      "step": 11894
    },
    {
      "epoch": 2.4452667283379586,
      "grad_norm": 0.2413313090801239,
      "learning_rate": 7.824882572726734e-06,
      "loss": 0.3944,
      "step": 11895
    },
    {
      "epoch": 2.445472299311337,
      "grad_norm": 0.24407535791397095,
      "learning_rate": 7.81925515578024e-06,
      "loss": 0.4004,
      "step": 11896
    },
    {
      "epoch": 2.4456778702847157,
      "grad_norm": 0.24596747756004333,
      "learning_rate": 7.81362957055526e-06,
      "loss": 0.4299,
      "step": 11897
    },
    {
      "epoch": 2.4458834412580943,
      "grad_norm": 0.23562337458133698,
      "learning_rate": 7.808005817328927e-06,
      "loss": 0.4011,
      "step": 11898
    },
    {
      "epoch": 2.446089012231473,
      "grad_norm": 0.1234961450099945,
      "learning_rate": 7.80238389637833e-06,
      "loss": 0.4573,
      "step": 11899
    },
    {
      "epoch": 2.4462945832048515,
      "grad_norm": 0.22207041084766388,
      "learning_rate": 7.796763807980414e-06,
      "loss": 0.3856,
      "step": 11900
    },
    {
      "epoch": 2.44650015417823,
      "grad_norm": 0.23153123259544373,
      "learning_rate": 7.79114555241205e-06,
      "loss": 0.3985,
      "step": 11901
    },
    {
      "epoch": 2.4467057251516087,
      "grad_norm": 0.2310320883989334,
      "learning_rate": 7.785529129950038e-06,
      "loss": 0.379,
      "step": 11902
    },
    {
      "epoch": 2.4469112961249873,
      "grad_norm": 0.23394078016281128,
      "learning_rate": 7.779914540871065e-06,
      "loss": 0.3878,
      "step": 11903
    },
    {
      "epoch": 2.447116867098366,
      "grad_norm": 0.24129053950309753,
      "learning_rate": 7.774301785451743e-06,
      "loss": 0.4019,
      "step": 11904
    },
    {
      "epoch": 2.4473224380717444,
      "grad_norm": 0.23060935735702515,
      "learning_rate": 7.768690863968575e-06,
      "loss": 0.383,
      "step": 11905
    },
    {
      "epoch": 2.447528009045123,
      "grad_norm": 0.2250318080186844,
      "learning_rate": 7.763081776697986e-06,
      "loss": 0.3917,
      "step": 11906
    },
    {
      "epoch": 2.447733580018501,
      "grad_norm": 0.2283366620540619,
      "learning_rate": 7.75747452391632e-06,
      "loss": 0.3753,
      "step": 11907
    },
    {
      "epoch": 2.4479391509918798,
      "grad_norm": 0.12568168342113495,
      "learning_rate": 7.751869105899797e-06,
      "loss": 0.4482,
      "step": 11908
    },
    {
      "epoch": 2.4481447219652583,
      "grad_norm": 0.2254650741815567,
      "learning_rate": 7.746265522924599e-06,
      "loss": 0.3705,
      "step": 11909
    },
    {
      "epoch": 2.448350292938637,
      "grad_norm": 0.22300590574741364,
      "learning_rate": 7.740663775266774e-06,
      "loss": 0.3809,
      "step": 11910
    },
    {
      "epoch": 2.4485558639120155,
      "grad_norm": 0.12381377071142197,
      "learning_rate": 7.735063863202297e-06,
      "loss": 0.4679,
      "step": 11911
    },
    {
      "epoch": 2.448761434885394,
      "grad_norm": 0.2367285043001175,
      "learning_rate": 7.729465787007045e-06,
      "loss": 0.4062,
      "step": 11912
    },
    {
      "epoch": 2.4489670058587727,
      "grad_norm": 0.23108406364917755,
      "learning_rate": 7.723869546956815e-06,
      "loss": 0.3886,
      "step": 11913
    },
    {
      "epoch": 2.4491725768321513,
      "grad_norm": 0.1295919418334961,
      "learning_rate": 7.71827514332729e-06,
      "loss": 0.4521,
      "step": 11914
    },
    {
      "epoch": 2.44937814780553,
      "grad_norm": 0.21976915001869202,
      "learning_rate": 7.71268257639411e-06,
      "loss": 0.3668,
      "step": 11915
    },
    {
      "epoch": 2.4495837187789085,
      "grad_norm": 0.22710269689559937,
      "learning_rate": 7.707091846432775e-06,
      "loss": 0.4069,
      "step": 11916
    },
    {
      "epoch": 2.449789289752287,
      "grad_norm": 0.23662005364894867,
      "learning_rate": 7.70150295371872e-06,
      "loss": 0.3951,
      "step": 11917
    },
    {
      "epoch": 2.4499948607256656,
      "grad_norm": 0.2241106480360031,
      "learning_rate": 7.695915898527278e-06,
      "loss": 0.3844,
      "step": 11918
    },
    {
      "epoch": 2.4502004316990442,
      "grad_norm": 0.22867663204669952,
      "learning_rate": 7.690330681133695e-06,
      "loss": 0.398,
      "step": 11919
    },
    {
      "epoch": 2.450406002672423,
      "grad_norm": 0.22373969852924347,
      "learning_rate": 7.684747301813141e-06,
      "loss": 0.3871,
      "step": 11920
    },
    {
      "epoch": 2.4506115736458014,
      "grad_norm": 0.21344764530658722,
      "learning_rate": 7.679165760840676e-06,
      "loss": 0.3814,
      "step": 11921
    },
    {
      "epoch": 2.4508171446191795,
      "grad_norm": 0.2301592379808426,
      "learning_rate": 7.67358605849127e-06,
      "loss": 0.3842,
      "step": 11922
    },
    {
      "epoch": 2.4510227155925586,
      "grad_norm": 0.23212045431137085,
      "learning_rate": 7.668008195039828e-06,
      "loss": 0.4049,
      "step": 11923
    },
    {
      "epoch": 2.4512282865659367,
      "grad_norm": 0.2373504489660263,
      "learning_rate": 7.662432170761128e-06,
      "loss": 0.371,
      "step": 11924
    },
    {
      "epoch": 2.4514338575393153,
      "grad_norm": 0.22908884286880493,
      "learning_rate": 7.65685798592988e-06,
      "loss": 0.3902,
      "step": 11925
    },
    {
      "epoch": 2.451639428512694,
      "grad_norm": 0.22210195660591125,
      "learning_rate": 7.6512856408207e-06,
      "loss": 0.4091,
      "step": 11926
    },
    {
      "epoch": 2.4518449994860725,
      "grad_norm": 0.22548379004001617,
      "learning_rate": 7.645715135708107e-06,
      "loss": 0.3848,
      "step": 11927
    },
    {
      "epoch": 2.452050570459451,
      "grad_norm": 0.23502108454704285,
      "learning_rate": 7.640146470866528e-06,
      "loss": 0.4022,
      "step": 11928
    },
    {
      "epoch": 2.4522561414328297,
      "grad_norm": 0.23006172478199005,
      "learning_rate": 7.634579646570319e-06,
      "loss": 0.3856,
      "step": 11929
    },
    {
      "epoch": 2.4524617124062082,
      "grad_norm": 0.23742365837097168,
      "learning_rate": 7.629014663093729e-06,
      "loss": 0.3902,
      "step": 11930
    },
    {
      "epoch": 2.452667283379587,
      "grad_norm": 0.1288265436887741,
      "learning_rate": 7.623451520710911e-06,
      "loss": 0.4406,
      "step": 11931
    },
    {
      "epoch": 2.4528728543529654,
      "grad_norm": 0.23163823783397675,
      "learning_rate": 7.617890219695945e-06,
      "loss": 0.4061,
      "step": 11932
    },
    {
      "epoch": 2.453078425326344,
      "grad_norm": 0.23114512860774994,
      "learning_rate": 7.612330760322799e-06,
      "loss": 0.3849,
      "step": 11933
    },
    {
      "epoch": 2.4532839962997226,
      "grad_norm": 0.2422575205564499,
      "learning_rate": 7.606773142865368e-06,
      "loss": 0.4076,
      "step": 11934
    },
    {
      "epoch": 2.453489567273101,
      "grad_norm": 0.23896224796772003,
      "learning_rate": 7.601217367597442e-06,
      "loss": 0.3913,
      "step": 11935
    },
    {
      "epoch": 2.45369513824648,
      "grad_norm": 0.22641202807426453,
      "learning_rate": 7.595663434792739e-06,
      "loss": 0.3782,
      "step": 11936
    },
    {
      "epoch": 2.453900709219858,
      "grad_norm": 0.2328871637582779,
      "learning_rate": 7.590111344724879e-06,
      "loss": 0.3799,
      "step": 11937
    },
    {
      "epoch": 2.454106280193237,
      "grad_norm": 0.22412602603435516,
      "learning_rate": 7.584561097667373e-06,
      "loss": 0.3826,
      "step": 11938
    },
    {
      "epoch": 2.454311851166615,
      "grad_norm": 0.21876847743988037,
      "learning_rate": 7.579012693893668e-06,
      "loss": 0.3916,
      "step": 11939
    },
    {
      "epoch": 2.4545174221399937,
      "grad_norm": 0.11912833899259567,
      "learning_rate": 7.5734661336770845e-06,
      "loss": 0.4674,
      "step": 11940
    },
    {
      "epoch": 2.4547229931133723,
      "grad_norm": 0.2307884842157364,
      "learning_rate": 7.56792141729091e-06,
      "loss": 0.3924,
      "step": 11941
    },
    {
      "epoch": 2.454928564086751,
      "grad_norm": 0.24826078116893768,
      "learning_rate": 7.562378545008289e-06,
      "loss": 0.3996,
      "step": 11942
    },
    {
      "epoch": 2.4551341350601295,
      "grad_norm": 0.22318950295448303,
      "learning_rate": 7.556837517102281e-06,
      "loss": 0.3761,
      "step": 11943
    },
    {
      "epoch": 2.455339706033508,
      "grad_norm": 0.22398589551448822,
      "learning_rate": 7.55129833384589e-06,
      "loss": 0.4049,
      "step": 11944
    },
    {
      "epoch": 2.4555452770068866,
      "grad_norm": 0.2238241583108902,
      "learning_rate": 7.545760995512e-06,
      "loss": 0.3946,
      "step": 11945
    },
    {
      "epoch": 2.455750847980265,
      "grad_norm": 0.23885828256607056,
      "learning_rate": 7.540225502373406e-06,
      "loss": 0.374,
      "step": 11946
    },
    {
      "epoch": 2.455956418953644,
      "grad_norm": 0.22806625068187714,
      "learning_rate": 7.53469185470281e-06,
      "loss": 0.3872,
      "step": 11947
    },
    {
      "epoch": 2.4561619899270224,
      "grad_norm": 0.2306099683046341,
      "learning_rate": 7.529160052772834e-06,
      "loss": 0.3904,
      "step": 11948
    },
    {
      "epoch": 2.456367560900401,
      "grad_norm": 0.22906504571437836,
      "learning_rate": 7.523630096855996e-06,
      "loss": 0.3872,
      "step": 11949
    },
    {
      "epoch": 2.4565731318737796,
      "grad_norm": 0.2300928682088852,
      "learning_rate": 7.518101987224747e-06,
      "loss": 0.3774,
      "step": 11950
    },
    {
      "epoch": 2.456778702847158,
      "grad_norm": 0.22766615450382233,
      "learning_rate": 7.512575724151425e-06,
      "loss": 0.375,
      "step": 11951
    },
    {
      "epoch": 2.4569842738205363,
      "grad_norm": 0.2307845950126648,
      "learning_rate": 7.507051307908282e-06,
      "loss": 0.4087,
      "step": 11952
    },
    {
      "epoch": 2.4571898447939153,
      "grad_norm": 0.23094025254249573,
      "learning_rate": 7.5015287387674745e-06,
      "loss": 0.4023,
      "step": 11953
    },
    {
      "epoch": 2.4573954157672935,
      "grad_norm": 0.25428444147109985,
      "learning_rate": 7.4960080170010855e-06,
      "loss": 0.3832,
      "step": 11954
    },
    {
      "epoch": 2.457600986740672,
      "grad_norm": 0.2268911600112915,
      "learning_rate": 7.490489142881082e-06,
      "loss": 0.3697,
      "step": 11955
    },
    {
      "epoch": 2.4578065577140507,
      "grad_norm": 0.23061802983283997,
      "learning_rate": 7.484972116679353e-06,
      "loss": 0.3872,
      "step": 11956
    },
    {
      "epoch": 2.4580121286874292,
      "grad_norm": 0.22644907236099243,
      "learning_rate": 7.479456938667715e-06,
      "loss": 0.4041,
      "step": 11957
    },
    {
      "epoch": 2.458217699660808,
      "grad_norm": 0.22928078472614288,
      "learning_rate": 7.473943609117859e-06,
      "loss": 0.3757,
      "step": 11958
    },
    {
      "epoch": 2.4584232706341864,
      "grad_norm": 0.23315031826496124,
      "learning_rate": 7.468432128301406e-06,
      "loss": 0.3962,
      "step": 11959
    },
    {
      "epoch": 2.458628841607565,
      "grad_norm": 0.23016057908535004,
      "learning_rate": 7.462922496489881e-06,
      "loss": 0.3948,
      "step": 11960
    },
    {
      "epoch": 2.4588344125809436,
      "grad_norm": 0.23658603429794312,
      "learning_rate": 7.457414713954714e-06,
      "loss": 0.358,
      "step": 11961
    },
    {
      "epoch": 2.459039983554322,
      "grad_norm": 0.23030193150043488,
      "learning_rate": 7.451908780967242e-06,
      "loss": 0.3848,
      "step": 11962
    },
    {
      "epoch": 2.4592455545277008,
      "grad_norm": 0.23462505638599396,
      "learning_rate": 7.446404697798738e-06,
      "loss": 0.3856,
      "step": 11963
    },
    {
      "epoch": 2.4594511255010794,
      "grad_norm": 0.23800687491893768,
      "learning_rate": 7.4409024647203344e-06,
      "loss": 0.3833,
      "step": 11964
    },
    {
      "epoch": 2.459656696474458,
      "grad_norm": 0.22501428425312042,
      "learning_rate": 7.43540208200313e-06,
      "loss": 0.4078,
      "step": 11965
    },
    {
      "epoch": 2.4598622674478365,
      "grad_norm": 0.23494026064872742,
      "learning_rate": 7.429903549918089e-06,
      "loss": 0.382,
      "step": 11966
    },
    {
      "epoch": 2.4600678384212147,
      "grad_norm": 0.23382548987865448,
      "learning_rate": 7.424406868736093e-06,
      "loss": 0.3714,
      "step": 11967
    },
    {
      "epoch": 2.4602734093945937,
      "grad_norm": 0.2336670309305191,
      "learning_rate": 7.418912038727947e-06,
      "loss": 0.386,
      "step": 11968
    },
    {
      "epoch": 2.460478980367972,
      "grad_norm": 0.1252935528755188,
      "learning_rate": 7.413419060164348e-06,
      "loss": 0.4512,
      "step": 11969
    },
    {
      "epoch": 2.4606845513413504,
      "grad_norm": 0.12696446478366852,
      "learning_rate": 7.4079279333159054e-06,
      "loss": 0.4467,
      "step": 11970
    },
    {
      "epoch": 2.460890122314729,
      "grad_norm": 0.23147273063659668,
      "learning_rate": 7.4024386584531574e-06,
      "loss": 0.3854,
      "step": 11971
    },
    {
      "epoch": 2.4610956932881076,
      "grad_norm": 0.23505190014839172,
      "learning_rate": 7.396951235846528e-06,
      "loss": 0.4034,
      "step": 11972
    },
    {
      "epoch": 2.461301264261486,
      "grad_norm": 0.22706139087677002,
      "learning_rate": 7.391465665766351e-06,
      "loss": 0.3958,
      "step": 11973
    },
    {
      "epoch": 2.461506835234865,
      "grad_norm": 0.2269429713487625,
      "learning_rate": 7.385981948482885e-06,
      "loss": 0.3912,
      "step": 11974
    },
    {
      "epoch": 2.4617124062082434,
      "grad_norm": 0.24472972750663757,
      "learning_rate": 7.380500084266274e-06,
      "loss": 0.4132,
      "step": 11975
    },
    {
      "epoch": 2.461917977181622,
      "grad_norm": 0.2306082546710968,
      "learning_rate": 7.375020073386597e-06,
      "loss": 0.3853,
      "step": 11976
    },
    {
      "epoch": 2.4621235481550006,
      "grad_norm": 0.23048275709152222,
      "learning_rate": 7.369541916113808e-06,
      "loss": 0.3962,
      "step": 11977
    },
    {
      "epoch": 2.462329119128379,
      "grad_norm": 0.22634848952293396,
      "learning_rate": 7.364065612717816e-06,
      "loss": 0.3774,
      "step": 11978
    },
    {
      "epoch": 2.4625346901017577,
      "grad_norm": 0.23332750797271729,
      "learning_rate": 7.3585911634684e-06,
      "loss": 0.4043,
      "step": 11979
    },
    {
      "epoch": 2.4627402610751363,
      "grad_norm": 0.22785905003547668,
      "learning_rate": 7.353118568635265e-06,
      "loss": 0.3758,
      "step": 11980
    },
    {
      "epoch": 2.462945832048515,
      "grad_norm": 0.23111788928508759,
      "learning_rate": 7.347647828488015e-06,
      "loss": 0.3824,
      "step": 11981
    },
    {
      "epoch": 2.463151403021893,
      "grad_norm": 0.12065623700618744,
      "learning_rate": 7.342178943296169e-06,
      "loss": 0.4432,
      "step": 11982
    },
    {
      "epoch": 2.463356973995272,
      "grad_norm": 0.22295540571212769,
      "learning_rate": 7.336711913329146e-06,
      "loss": 0.3759,
      "step": 11983
    },
    {
      "epoch": 2.4635625449686502,
      "grad_norm": 0.2377004474401474,
      "learning_rate": 7.331246738856297e-06,
      "loss": 0.3769,
      "step": 11984
    },
    {
      "epoch": 2.463768115942029,
      "grad_norm": 0.23187273740768433,
      "learning_rate": 7.325783420146861e-06,
      "loss": 0.3655,
      "step": 11985
    },
    {
      "epoch": 2.4639736869154074,
      "grad_norm": 0.12513786554336548,
      "learning_rate": 7.320321957469973e-06,
      "loss": 0.4312,
      "step": 11986
    },
    {
      "epoch": 2.464179257888786,
      "grad_norm": 0.23012107610702515,
      "learning_rate": 7.3148623510947215e-06,
      "loss": 0.3841,
      "step": 11987
    },
    {
      "epoch": 2.4643848288621646,
      "grad_norm": 0.23000621795654297,
      "learning_rate": 7.309404601290058e-06,
      "loss": 0.3997,
      "step": 11988
    },
    {
      "epoch": 2.464590399835543,
      "grad_norm": 0.23168058693408966,
      "learning_rate": 7.3039487083248665e-06,
      "loss": 0.3756,
      "step": 11989
    },
    {
      "epoch": 2.4647959708089218,
      "grad_norm": 0.2302679568529129,
      "learning_rate": 7.298494672467922e-06,
      "loss": 0.3849,
      "step": 11990
    },
    {
      "epoch": 2.4650015417823004,
      "grad_norm": 0.23086369037628174,
      "learning_rate": 7.2930424939879405e-06,
      "loss": 0.3872,
      "step": 11991
    },
    {
      "epoch": 2.465207112755679,
      "grad_norm": 0.2280319184064865,
      "learning_rate": 7.28759217315351e-06,
      "loss": 0.3777,
      "step": 11992
    },
    {
      "epoch": 2.4654126837290575,
      "grad_norm": 0.2329222559928894,
      "learning_rate": 7.282143710233148e-06,
      "loss": 0.3905,
      "step": 11993
    },
    {
      "epoch": 2.465618254702436,
      "grad_norm": 0.22960495948791504,
      "learning_rate": 7.276697105495274e-06,
      "loss": 0.3776,
      "step": 11994
    },
    {
      "epoch": 2.4658238256758147,
      "grad_norm": 0.22651077806949615,
      "learning_rate": 7.271252359208212e-06,
      "loss": 0.3902,
      "step": 11995
    },
    {
      "epoch": 2.4660293966491933,
      "grad_norm": 0.23243005573749542,
      "learning_rate": 7.2658094716402e-06,
      "loss": 0.3618,
      "step": 11996
    },
    {
      "epoch": 2.466234967622572,
      "grad_norm": 0.2251901775598526,
      "learning_rate": 7.260368443059382e-06,
      "loss": 0.3856,
      "step": 11997
    },
    {
      "epoch": 2.4664405385959505,
      "grad_norm": 0.22453376650810242,
      "learning_rate": 7.254929273733824e-06,
      "loss": 0.4079,
      "step": 11998
    },
    {
      "epoch": 2.4666461095693286,
      "grad_norm": 0.2299884408712387,
      "learning_rate": 7.249491963931481e-06,
      "loss": 0.3801,
      "step": 11999
    },
    {
      "epoch": 2.466851680542707,
      "grad_norm": 0.34996315836906433,
      "learning_rate": 7.244056513920224e-06,
      "loss": 0.4597,
      "step": 12000
    },
    {
      "epoch": 2.467057251516086,
      "grad_norm": 0.22876065969467163,
      "learning_rate": 7.238622923967829e-06,
      "loss": 0.3884,
      "step": 12001
    },
    {
      "epoch": 2.4672628224894644,
      "grad_norm": 0.22702383995056152,
      "learning_rate": 7.233191194341992e-06,
      "loss": 0.3792,
      "step": 12002
    },
    {
      "epoch": 2.467468393462843,
      "grad_norm": 0.23241770267486572,
      "learning_rate": 7.2277613253102985e-06,
      "loss": 0.394,
      "step": 12003
    },
    {
      "epoch": 2.4676739644362216,
      "grad_norm": 0.124427430331707,
      "learning_rate": 7.222333317140245e-06,
      "loss": 0.4528,
      "step": 12004
    },
    {
      "epoch": 2.4678795354096,
      "grad_norm": 0.2252800464630127,
      "learning_rate": 7.216907170099272e-06,
      "loss": 0.373,
      "step": 12005
    },
    {
      "epoch": 2.4680851063829787,
      "grad_norm": 0.22926414012908936,
      "learning_rate": 7.211482884454681e-06,
      "loss": 0.3816,
      "step": 12006
    },
    {
      "epoch": 2.4682906773563573,
      "grad_norm": 0.1216835305094719,
      "learning_rate": 7.206060460473699e-06,
      "loss": 0.4481,
      "step": 12007
    },
    {
      "epoch": 2.468496248329736,
      "grad_norm": 0.22425177693367004,
      "learning_rate": 7.200639898423476e-06,
      "loss": 0.3763,
      "step": 12008
    },
    {
      "epoch": 2.4687018193031145,
      "grad_norm": 0.22090640664100647,
      "learning_rate": 7.195221198571054e-06,
      "loss": 0.3812,
      "step": 12009
    },
    {
      "epoch": 2.468907390276493,
      "grad_norm": 0.23358182609081268,
      "learning_rate": 7.1898043611833845e-06,
      "loss": 0.3889,
      "step": 12010
    },
    {
      "epoch": 2.4691129612498717,
      "grad_norm": 0.2351762056350708,
      "learning_rate": 7.184389386527319e-06,
      "loss": 0.4039,
      "step": 12011
    },
    {
      "epoch": 2.4693185322232503,
      "grad_norm": 0.22724005579948425,
      "learning_rate": 7.178976274869649e-06,
      "loss": 0.4057,
      "step": 12012
    },
    {
      "epoch": 2.469524103196629,
      "grad_norm": 0.22213922441005707,
      "learning_rate": 7.173565026477041e-06,
      "loss": 0.3853,
      "step": 12013
    },
    {
      "epoch": 2.469729674170007,
      "grad_norm": 0.22167488932609558,
      "learning_rate": 7.1681556416160875e-06,
      "loss": 0.4068,
      "step": 12014
    },
    {
      "epoch": 2.4699352451433856,
      "grad_norm": 0.23194730281829834,
      "learning_rate": 7.1627481205532795e-06,
      "loss": 0.3975,
      "step": 12015
    },
    {
      "epoch": 2.470140816116764,
      "grad_norm": 0.22849147021770477,
      "learning_rate": 7.157342463555019e-06,
      "loss": 0.398,
      "step": 12016
    },
    {
      "epoch": 2.4703463870901428,
      "grad_norm": 0.22663573920726776,
      "learning_rate": 7.1519386708876185e-06,
      "loss": 0.3678,
      "step": 12017
    },
    {
      "epoch": 2.4705519580635213,
      "grad_norm": 0.5367224216461182,
      "learning_rate": 7.14653674281729e-06,
      "loss": 0.3891,
      "step": 12018
    },
    {
      "epoch": 2.4707575290369,
      "grad_norm": 0.232307568192482,
      "learning_rate": 7.1411366796101795e-06,
      "loss": 0.3862,
      "step": 12019
    },
    {
      "epoch": 2.4709631000102785,
      "grad_norm": 0.23023979365825653,
      "learning_rate": 7.135738481532311e-06,
      "loss": 0.3982,
      "step": 12020
    },
    {
      "epoch": 2.471168670983657,
      "grad_norm": 0.2264028787612915,
      "learning_rate": 7.13034214884963e-06,
      "loss": 0.3724,
      "step": 12021
    },
    {
      "epoch": 2.4713742419570357,
      "grad_norm": 0.2289603054523468,
      "learning_rate": 7.124947681827991e-06,
      "loss": 0.3857,
      "step": 12022
    },
    {
      "epoch": 2.4715798129304143,
      "grad_norm": 0.22829623520374298,
      "learning_rate": 7.119555080733154e-06,
      "loss": 0.3647,
      "step": 12023
    },
    {
      "epoch": 2.471785383903793,
      "grad_norm": 0.22573639452457428,
      "learning_rate": 7.114164345830782e-06,
      "loss": 0.405,
      "step": 12024
    },
    {
      "epoch": 2.4719909548771715,
      "grad_norm": 0.2383507937192917,
      "learning_rate": 7.108775477386444e-06,
      "loss": 0.3742,
      "step": 12025
    },
    {
      "epoch": 2.47219652585055,
      "grad_norm": 0.2287752777338028,
      "learning_rate": 7.103388475665647e-06,
      "loss": 0.3927,
      "step": 12026
    },
    {
      "epoch": 2.4724020968239286,
      "grad_norm": 0.2402697503566742,
      "learning_rate": 7.098003340933773e-06,
      "loss": 0.3899,
      "step": 12027
    },
    {
      "epoch": 2.4726076677973072,
      "grad_norm": 0.1256427764892578,
      "learning_rate": 7.09262007345611e-06,
      "loss": 0.4296,
      "step": 12028
    },
    {
      "epoch": 2.4728132387706854,
      "grad_norm": 0.2302490770816803,
      "learning_rate": 7.0872386734978865e-06,
      "loss": 0.3799,
      "step": 12029
    },
    {
      "epoch": 2.473018809744064,
      "grad_norm": 0.12290017306804657,
      "learning_rate": 7.08185914132421e-06,
      "loss": 0.4441,
      "step": 12030
    },
    {
      "epoch": 2.4732243807174425,
      "grad_norm": 0.24090375006198883,
      "learning_rate": 7.0764814772001035e-06,
      "loss": 0.3726,
      "step": 12031
    },
    {
      "epoch": 2.473429951690821,
      "grad_norm": 0.23742975294589996,
      "learning_rate": 7.071105681390495e-06,
      "loss": 0.3915,
      "step": 12032
    },
    {
      "epoch": 2.4736355226641997,
      "grad_norm": 0.12602561712265015,
      "learning_rate": 7.065731754160233e-06,
      "loss": 0.429,
      "step": 12033
    },
    {
      "epoch": 2.4738410936375783,
      "grad_norm": 0.2322402000427246,
      "learning_rate": 7.06035969577407e-06,
      "loss": 0.3945,
      "step": 12034
    },
    {
      "epoch": 2.474046664610957,
      "grad_norm": 0.22450955212116241,
      "learning_rate": 7.05498950649665e-06,
      "loss": 0.3684,
      "step": 12035
    },
    {
      "epoch": 2.4742522355843355,
      "grad_norm": 0.23430559039115906,
      "learning_rate": 7.049621186592546e-06,
      "loss": 0.378,
      "step": 12036
    },
    {
      "epoch": 2.474457806557714,
      "grad_norm": 0.2268606424331665,
      "learning_rate": 7.044254736326227e-06,
      "loss": 0.3944,
      "step": 12037
    },
    {
      "epoch": 2.4746633775310927,
      "grad_norm": 0.22248311340808868,
      "learning_rate": 7.038890155962071e-06,
      "loss": 0.3941,
      "step": 12038
    },
    {
      "epoch": 2.4748689485044713,
      "grad_norm": 0.12058508396148682,
      "learning_rate": 7.033527445764357e-06,
      "loss": 0.4526,
      "step": 12039
    },
    {
      "epoch": 2.47507451947785,
      "grad_norm": 0.12435080856084824,
      "learning_rate": 7.028166605997302e-06,
      "loss": 0.4443,
      "step": 12040
    },
    {
      "epoch": 2.4752800904512284,
      "grad_norm": 0.22780485451221466,
      "learning_rate": 7.022807636924997e-06,
      "loss": 0.3832,
      "step": 12041
    },
    {
      "epoch": 2.475485661424607,
      "grad_norm": 0.22483399510383606,
      "learning_rate": 7.017450538811455e-06,
      "loss": 0.4114,
      "step": 12042
    },
    {
      "epoch": 2.4756912323979856,
      "grad_norm": 0.22376932203769684,
      "learning_rate": 7.012095311920595e-06,
      "loss": 0.365,
      "step": 12043
    },
    {
      "epoch": 2.4758968033713638,
      "grad_norm": 0.22798992693424225,
      "learning_rate": 7.006741956516246e-06,
      "loss": 0.3874,
      "step": 12044
    },
    {
      "epoch": 2.4761023743447423,
      "grad_norm": 0.23297694325447083,
      "learning_rate": 7.001390472862141e-06,
      "loss": 0.3908,
      "step": 12045
    },
    {
      "epoch": 2.476307945318121,
      "grad_norm": 0.22531673312187195,
      "learning_rate": 6.99604086122191e-06,
      "loss": 0.3905,
      "step": 12046
    },
    {
      "epoch": 2.4765135162914995,
      "grad_norm": 0.22847385704517365,
      "learning_rate": 6.990693121859122e-06,
      "loss": 0.3764,
      "step": 12047
    },
    {
      "epoch": 2.476719087264878,
      "grad_norm": 0.12156729400157928,
      "learning_rate": 6.985347255037237e-06,
      "loss": 0.4623,
      "step": 12048
    },
    {
      "epoch": 2.4769246582382567,
      "grad_norm": 0.12409412860870361,
      "learning_rate": 6.980003261019599e-06,
      "loss": 0.4559,
      "step": 12049
    },
    {
      "epoch": 2.4771302292116353,
      "grad_norm": 0.2400665581226349,
      "learning_rate": 6.974661140069501e-06,
      "loss": 0.3763,
      "step": 12050
    },
    {
      "epoch": 2.477335800185014,
      "grad_norm": 0.22779475152492523,
      "learning_rate": 6.969320892450124e-06,
      "loss": 0.3765,
      "step": 12051
    },
    {
      "epoch": 2.4775413711583925,
      "grad_norm": 0.22973057627677917,
      "learning_rate": 6.9639825184245524e-06,
      "loss": 0.3799,
      "step": 12052
    },
    {
      "epoch": 2.477746942131771,
      "grad_norm": 0.23675696551799774,
      "learning_rate": 6.9586460182557705e-06,
      "loss": 0.399,
      "step": 12053
    },
    {
      "epoch": 2.4779525131051496,
      "grad_norm": 0.23592104017734528,
      "learning_rate": 6.953311392206702e-06,
      "loss": 0.3764,
      "step": 12054
    },
    {
      "epoch": 2.4781580840785282,
      "grad_norm": 0.12474309653043747,
      "learning_rate": 6.947978640540154e-06,
      "loss": 0.436,
      "step": 12055
    },
    {
      "epoch": 2.478363655051907,
      "grad_norm": 0.23557905852794647,
      "learning_rate": 6.942647763518844e-06,
      "loss": 0.3961,
      "step": 12056
    },
    {
      "epoch": 2.4785692260252854,
      "grad_norm": 0.23335106670856476,
      "learning_rate": 6.937318761405399e-06,
      "loss": 0.39,
      "step": 12057
    },
    {
      "epoch": 2.478774796998664,
      "grad_norm": 1.1866546869277954,
      "learning_rate": 6.931991634462352e-06,
      "loss": 0.4177,
      "step": 12058
    },
    {
      "epoch": 2.478980367972042,
      "grad_norm": 0.12616188824176788,
      "learning_rate": 6.926666382952149e-06,
      "loss": 0.4414,
      "step": 12059
    },
    {
      "epoch": 2.4791859389454207,
      "grad_norm": 0.22257229685783386,
      "learning_rate": 6.921343007137131e-06,
      "loss": 0.3853,
      "step": 12060
    },
    {
      "epoch": 2.4793915099187993,
      "grad_norm": 0.2297201305627823,
      "learning_rate": 6.916021507279572e-06,
      "loss": 0.3891,
      "step": 12061
    },
    {
      "epoch": 2.479597080892178,
      "grad_norm": 0.12035045772790909,
      "learning_rate": 6.910701883641627e-06,
      "loss": 0.4512,
      "step": 12062
    },
    {
      "epoch": 2.4798026518655565,
      "grad_norm": 0.11942754685878754,
      "learning_rate": 6.905384136485374e-06,
      "loss": 0.4546,
      "step": 12063
    },
    {
      "epoch": 2.480008222838935,
      "grad_norm": 0.12709856033325195,
      "learning_rate": 6.900068266072795e-06,
      "loss": 0.4667,
      "step": 12064
    },
    {
      "epoch": 2.4802137938123137,
      "grad_norm": 0.22888512909412384,
      "learning_rate": 6.894754272665767e-06,
      "loss": 0.3852,
      "step": 12065
    },
    {
      "epoch": 2.4804193647856922,
      "grad_norm": 0.22018122673034668,
      "learning_rate": 6.889442156526085e-06,
      "loss": 0.3962,
      "step": 12066
    },
    {
      "epoch": 2.480624935759071,
      "grad_norm": 0.23357877135276794,
      "learning_rate": 6.884131917915471e-06,
      "loss": 0.3871,
      "step": 12067
    },
    {
      "epoch": 2.4808305067324494,
      "grad_norm": 0.22664080560207367,
      "learning_rate": 6.87882355709552e-06,
      "loss": 0.3931,
      "step": 12068
    },
    {
      "epoch": 2.481036077705828,
      "grad_norm": 0.22483284771442413,
      "learning_rate": 6.873517074327758e-06,
      "loss": 0.3701,
      "step": 12069
    },
    {
      "epoch": 2.4812416486792066,
      "grad_norm": 0.12439465522766113,
      "learning_rate": 6.868212469873605e-06,
      "loss": 0.4436,
      "step": 12070
    },
    {
      "epoch": 2.481447219652585,
      "grad_norm": 0.12237696349620819,
      "learning_rate": 6.862909743994388e-06,
      "loss": 0.4515,
      "step": 12071
    },
    {
      "epoch": 2.481652790625964,
      "grad_norm": 0.22126199305057526,
      "learning_rate": 6.857608896951367e-06,
      "loss": 0.3588,
      "step": 12072
    },
    {
      "epoch": 2.4818583615993424,
      "grad_norm": 0.23091398179531097,
      "learning_rate": 6.8523099290056645e-06,
      "loss": 0.3856,
      "step": 12073
    },
    {
      "epoch": 2.4820639325727205,
      "grad_norm": 0.22415180504322052,
      "learning_rate": 6.847012840418361e-06,
      "loss": 0.397,
      "step": 12074
    },
    {
      "epoch": 2.482269503546099,
      "grad_norm": 0.12421949952840805,
      "learning_rate": 6.8417176314504125e-06,
      "loss": 0.4434,
      "step": 12075
    },
    {
      "epoch": 2.4824750745194777,
      "grad_norm": 0.12139065563678741,
      "learning_rate": 6.83642430236268e-06,
      "loss": 0.4701,
      "step": 12076
    },
    {
      "epoch": 2.4826806454928563,
      "grad_norm": 0.24218404293060303,
      "learning_rate": 6.831132853415946e-06,
      "loss": 0.4046,
      "step": 12077
    },
    {
      "epoch": 2.482886216466235,
      "grad_norm": 0.23166660964488983,
      "learning_rate": 6.825843284870901e-06,
      "loss": 0.3861,
      "step": 12078
    },
    {
      "epoch": 2.4830917874396135,
      "grad_norm": 0.2387050986289978,
      "learning_rate": 6.820555596988127e-06,
      "loss": 0.3854,
      "step": 12079
    },
    {
      "epoch": 2.483297358412992,
      "grad_norm": 0.23468570411205292,
      "learning_rate": 6.81526979002812e-06,
      "loss": 0.3764,
      "step": 12080
    },
    {
      "epoch": 2.4835029293863706,
      "grad_norm": 0.23246009647846222,
      "learning_rate": 6.809985864251303e-06,
      "loss": 0.3896,
      "step": 12081
    },
    {
      "epoch": 2.483708500359749,
      "grad_norm": 0.24410288035869598,
      "learning_rate": 6.804703819917987e-06,
      "loss": 0.3876,
      "step": 12082
    },
    {
      "epoch": 2.483914071333128,
      "grad_norm": 0.2310299128293991,
      "learning_rate": 6.799423657288384e-06,
      "loss": 0.3816,
      "step": 12083
    },
    {
      "epoch": 2.4841196423065064,
      "grad_norm": 0.22626370191574097,
      "learning_rate": 6.794145376622635e-06,
      "loss": 0.3851,
      "step": 12084
    },
    {
      "epoch": 2.484325213279885,
      "grad_norm": 0.2305128276348114,
      "learning_rate": 6.788868978180763e-06,
      "loss": 0.4095,
      "step": 12085
    },
    {
      "epoch": 2.4845307842532636,
      "grad_norm": 0.22715520858764648,
      "learning_rate": 6.78359446222272e-06,
      "loss": 0.397,
      "step": 12086
    },
    {
      "epoch": 2.484736355226642,
      "grad_norm": 0.12447824329137802,
      "learning_rate": 6.778321829008348e-06,
      "loss": 0.4611,
      "step": 12087
    },
    {
      "epoch": 2.4849419262000207,
      "grad_norm": 0.12171711772680283,
      "learning_rate": 6.773051078797419e-06,
      "loss": 0.4459,
      "step": 12088
    },
    {
      "epoch": 2.485147497173399,
      "grad_norm": 0.12131594866514206,
      "learning_rate": 6.767782211849591e-06,
      "loss": 0.4644,
      "step": 12089
    },
    {
      "epoch": 2.485353068146778,
      "grad_norm": 0.12281377613544464,
      "learning_rate": 6.7625152284244395e-06,
      "loss": 0.4399,
      "step": 12090
    },
    {
      "epoch": 2.485558639120156,
      "grad_norm": 0.2290441393852234,
      "learning_rate": 6.75725012878144e-06,
      "loss": 0.3939,
      "step": 12091
    },
    {
      "epoch": 2.4857642100935347,
      "grad_norm": 0.22904446721076965,
      "learning_rate": 6.751986913179967e-06,
      "loss": 0.3833,
      "step": 12092
    },
    {
      "epoch": 2.4859697810669132,
      "grad_norm": 0.23602800071239471,
      "learning_rate": 6.746725581879339e-06,
      "loss": 0.3835,
      "step": 12093
    },
    {
      "epoch": 2.486175352040292,
      "grad_norm": 0.2316070944070816,
      "learning_rate": 6.74146613513875e-06,
      "loss": 0.3902,
      "step": 12094
    },
    {
      "epoch": 2.4863809230136704,
      "grad_norm": 0.22582808136940002,
      "learning_rate": 6.736208573217292e-06,
      "loss": 0.4079,
      "step": 12095
    },
    {
      "epoch": 2.486586493987049,
      "grad_norm": 0.23117490112781525,
      "learning_rate": 6.730952896374002e-06,
      "loss": 0.3945,
      "step": 12096
    },
    {
      "epoch": 2.4867920649604276,
      "grad_norm": 0.22690841555595398,
      "learning_rate": 6.725699104867799e-06,
      "loss": 0.3927,
      "step": 12097
    },
    {
      "epoch": 2.486997635933806,
      "grad_norm": 0.23165901005268097,
      "learning_rate": 6.7204471989575e-06,
      "loss": 0.4029,
      "step": 12098
    },
    {
      "epoch": 2.4872032069071848,
      "grad_norm": 0.22219586372375488,
      "learning_rate": 6.715197178901853e-06,
      "loss": 0.3776,
      "step": 12099
    },
    {
      "epoch": 2.4874087778805634,
      "grad_norm": 0.2293098270893097,
      "learning_rate": 6.709949044959502e-06,
      "loss": 0.3988,
      "step": 12100
    },
    {
      "epoch": 2.487614348853942,
      "grad_norm": 0.23303751647472382,
      "learning_rate": 6.70470279738898e-06,
      "loss": 0.3915,
      "step": 12101
    },
    {
      "epoch": 2.4878199198273205,
      "grad_norm": 0.1233496144413948,
      "learning_rate": 6.6994584364487695e-06,
      "loss": 0.4614,
      "step": 12102
    },
    {
      "epoch": 2.488025490800699,
      "grad_norm": 0.23316849768161774,
      "learning_rate": 6.694215962397225e-06,
      "loss": 0.3868,
      "step": 12103
    },
    {
      "epoch": 2.4882310617740773,
      "grad_norm": 0.22257505357265472,
      "learning_rate": 6.688975375492618e-06,
      "loss": 0.374,
      "step": 12104
    },
    {
      "epoch": 2.4884366327474563,
      "grad_norm": 0.12211709469556808,
      "learning_rate": 6.6837366759931345e-06,
      "loss": 0.4395,
      "step": 12105
    },
    {
      "epoch": 2.4886422037208344,
      "grad_norm": 0.22621026635169983,
      "learning_rate": 6.678499864156851e-06,
      "loss": 0.3922,
      "step": 12106
    },
    {
      "epoch": 2.488847774694213,
      "grad_norm": 0.2442169040441513,
      "learning_rate": 6.673264940241767e-06,
      "loss": 0.3831,
      "step": 12107
    },
    {
      "epoch": 2.4890533456675916,
      "grad_norm": 0.22115904092788696,
      "learning_rate": 6.668031904505771e-06,
      "loss": 0.39,
      "step": 12108
    },
    {
      "epoch": 2.48925891664097,
      "grad_norm": 0.12432961910963058,
      "learning_rate": 6.662800757206687e-06,
      "loss": 0.4369,
      "step": 12109
    },
    {
      "epoch": 2.489464487614349,
      "grad_norm": 0.23481737077236176,
      "learning_rate": 6.657571498602224e-06,
      "loss": 0.3807,
      "step": 12110
    },
    {
      "epoch": 2.4896700585877274,
      "grad_norm": 0.24072937667369843,
      "learning_rate": 6.65234412895e-06,
      "loss": 0.3857,
      "step": 12111
    },
    {
      "epoch": 2.489875629561106,
      "grad_norm": 0.2299319952726364,
      "learning_rate": 6.647118648507545e-06,
      "loss": 0.3725,
      "step": 12112
    },
    {
      "epoch": 2.4900812005344846,
      "grad_norm": 0.2447563111782074,
      "learning_rate": 6.641895057532282e-06,
      "loss": 0.3858,
      "step": 12113
    },
    {
      "epoch": 2.490286771507863,
      "grad_norm": 0.22545365989208221,
      "learning_rate": 6.636673356281577e-06,
      "loss": 0.3783,
      "step": 12114
    },
    {
      "epoch": 2.4904923424812417,
      "grad_norm": 0.2212546318769455,
      "learning_rate": 6.631453545012663e-06,
      "loss": 0.3906,
      "step": 12115
    },
    {
      "epoch": 2.4906979134546203,
      "grad_norm": 0.2362491488456726,
      "learning_rate": 6.626235623982693e-06,
      "loss": 0.4016,
      "step": 12116
    },
    {
      "epoch": 2.490903484427999,
      "grad_norm": 0.11933384835720062,
      "learning_rate": 6.6210195934487395e-06,
      "loss": 0.4647,
      "step": 12117
    },
    {
      "epoch": 2.4911090554013775,
      "grad_norm": 0.12910796701908112,
      "learning_rate": 6.615805453667774e-06,
      "loss": 0.4296,
      "step": 12118
    },
    {
      "epoch": 2.4913146263747556,
      "grad_norm": 0.22228464484214783,
      "learning_rate": 6.6105932048966625e-06,
      "loss": 0.3975,
      "step": 12119
    },
    {
      "epoch": 2.4915201973481347,
      "grad_norm": 0.11906154453754425,
      "learning_rate": 6.6053828473921945e-06,
      "loss": 0.4488,
      "step": 12120
    },
    {
      "epoch": 2.491725768321513,
      "grad_norm": 0.11698108166456223,
      "learning_rate": 6.600174381411054e-06,
      "loss": 0.467,
      "step": 12121
    },
    {
      "epoch": 2.4919313392948914,
      "grad_norm": 0.23555971682071686,
      "learning_rate": 6.594967807209831e-06,
      "loss": 0.3887,
      "step": 12122
    },
    {
      "epoch": 2.49213691026827,
      "grad_norm": 0.23438353836536407,
      "learning_rate": 6.589763125045056e-06,
      "loss": 0.3863,
      "step": 12123
    },
    {
      "epoch": 2.4923424812416486,
      "grad_norm": 0.22644414007663727,
      "learning_rate": 6.584560335173119e-06,
      "loss": 0.3941,
      "step": 12124
    },
    {
      "epoch": 2.492548052215027,
      "grad_norm": 0.11747743934392929,
      "learning_rate": 6.579359437850339e-06,
      "loss": 0.4527,
      "step": 12125
    },
    {
      "epoch": 2.4927536231884058,
      "grad_norm": 0.23047557473182678,
      "learning_rate": 6.574160433332946e-06,
      "loss": 0.4062,
      "step": 12126
    },
    {
      "epoch": 2.4929591941617844,
      "grad_norm": 0.22950156033039093,
      "learning_rate": 6.568963321877061e-06,
      "loss": 0.3833,
      "step": 12127
    },
    {
      "epoch": 2.493164765135163,
      "grad_norm": 0.21891199052333832,
      "learning_rate": 6.563768103738734e-06,
      "loss": 0.3736,
      "step": 12128
    },
    {
      "epoch": 2.4933703361085415,
      "grad_norm": 0.22695685923099518,
      "learning_rate": 6.558574779173884e-06,
      "loss": 0.3752,
      "step": 12129
    },
    {
      "epoch": 2.49357590708192,
      "grad_norm": 0.12211208045482635,
      "learning_rate": 6.553383348438398e-06,
      "loss": 0.4442,
      "step": 12130
    },
    {
      "epoch": 2.4937814780552987,
      "grad_norm": 0.22641681134700775,
      "learning_rate": 6.548193811788011e-06,
      "loss": 0.3864,
      "step": 12131
    },
    {
      "epoch": 2.4939870490286773,
      "grad_norm": 0.11796488612890244,
      "learning_rate": 6.543006169478392e-06,
      "loss": 0.4571,
      "step": 12132
    },
    {
      "epoch": 2.494192620002056,
      "grad_norm": 0.226291224360466,
      "learning_rate": 6.537820421765109e-06,
      "loss": 0.38,
      "step": 12133
    },
    {
      "epoch": 2.494398190975434,
      "grad_norm": 0.22466683387756348,
      "learning_rate": 6.5326365689036465e-06,
      "loss": 0.4094,
      "step": 12134
    },
    {
      "epoch": 2.494603761948813,
      "grad_norm": 0.23120231926441193,
      "learning_rate": 6.5274546111493696e-06,
      "loss": 0.3899,
      "step": 12135
    },
    {
      "epoch": 2.494809332922191,
      "grad_norm": 0.23374420404434204,
      "learning_rate": 6.5222745487576e-06,
      "loss": 0.3821,
      "step": 12136
    },
    {
      "epoch": 2.49501490389557,
      "grad_norm": 0.22625453770160675,
      "learning_rate": 6.517096381983503e-06,
      "loss": 0.3882,
      "step": 12137
    },
    {
      "epoch": 2.4952204748689484,
      "grad_norm": 0.12417057901620865,
      "learning_rate": 6.51192011108221e-06,
      "loss": 0.4423,
      "step": 12138
    },
    {
      "epoch": 2.495426045842327,
      "grad_norm": 0.2231971025466919,
      "learning_rate": 6.506745736308721e-06,
      "loss": 0.3984,
      "step": 12139
    },
    {
      "epoch": 2.4956316168157056,
      "grad_norm": 0.2350044548511505,
      "learning_rate": 6.501573257917954e-06,
      "loss": 0.3884,
      "step": 12140
    },
    {
      "epoch": 2.495837187789084,
      "grad_norm": 0.23853430151939392,
      "learning_rate": 6.496402676164734e-06,
      "loss": 0.3903,
      "step": 12141
    },
    {
      "epoch": 2.4960427587624627,
      "grad_norm": 0.23373542726039886,
      "learning_rate": 6.4912339913037815e-06,
      "loss": 0.3925,
      "step": 12142
    },
    {
      "epoch": 2.4962483297358413,
      "grad_norm": 0.2317272126674652,
      "learning_rate": 6.486067203589738e-06,
      "loss": 0.4034,
      "step": 12143
    },
    {
      "epoch": 2.49645390070922,
      "grad_norm": 0.22617876529693604,
      "learning_rate": 6.480902313277152e-06,
      "loss": 0.3891,
      "step": 12144
    },
    {
      "epoch": 2.4966594716825985,
      "grad_norm": 0.22388514876365662,
      "learning_rate": 6.475739320620478e-06,
      "loss": 0.3823,
      "step": 12145
    },
    {
      "epoch": 2.496865042655977,
      "grad_norm": 0.12233025580644608,
      "learning_rate": 6.470578225874062e-06,
      "loss": 0.459,
      "step": 12146
    },
    {
      "epoch": 2.4970706136293557,
      "grad_norm": 0.2257211059331894,
      "learning_rate": 6.4654190292921724e-06,
      "loss": 0.3908,
      "step": 12147
    },
    {
      "epoch": 2.4972761846027343,
      "grad_norm": 0.2302434891462326,
      "learning_rate": 6.460261731128975e-06,
      "loss": 0.3994,
      "step": 12148
    },
    {
      "epoch": 2.4974817555761124,
      "grad_norm": 0.2282235473394394,
      "learning_rate": 6.455106331638541e-06,
      "loss": 0.3751,
      "step": 12149
    },
    {
      "epoch": 2.4976873265494914,
      "grad_norm": 0.23330600559711456,
      "learning_rate": 6.449952831074869e-06,
      "loss": 0.3851,
      "step": 12150
    },
    {
      "epoch": 2.4978928975228696,
      "grad_norm": 0.22312867641448975,
      "learning_rate": 6.4448012296918385e-06,
      "loss": 0.3799,
      "step": 12151
    },
    {
      "epoch": 2.498098468496248,
      "grad_norm": 0.22371982038021088,
      "learning_rate": 6.439651527743244e-06,
      "loss": 0.386,
      "step": 12152
    },
    {
      "epoch": 2.4983040394696268,
      "grad_norm": 0.2417476773262024,
      "learning_rate": 6.434503725482785e-06,
      "loss": 0.3929,
      "step": 12153
    },
    {
      "epoch": 2.4985096104430053,
      "grad_norm": 0.23515672981739044,
      "learning_rate": 6.429357823164076e-06,
      "loss": 0.3886,
      "step": 12154
    },
    {
      "epoch": 2.498715181416384,
      "grad_norm": 0.22999493777751923,
      "learning_rate": 6.424213821040627e-06,
      "loss": 0.3596,
      "step": 12155
    },
    {
      "epoch": 2.4989207523897625,
      "grad_norm": 0.2299181967973709,
      "learning_rate": 6.419071719365853e-06,
      "loss": 0.3789,
      "step": 12156
    },
    {
      "epoch": 2.499126323363141,
      "grad_norm": 0.23717856407165527,
      "learning_rate": 6.4139315183930986e-06,
      "loss": 0.3868,
      "step": 12157
    },
    {
      "epoch": 2.4993318943365197,
      "grad_norm": 0.22513870894908905,
      "learning_rate": 6.408793218375587e-06,
      "loss": 0.3657,
      "step": 12158
    },
    {
      "epoch": 2.4995374653098983,
      "grad_norm": 0.22355802357196808,
      "learning_rate": 6.403656819566447e-06,
      "loss": 0.3665,
      "step": 12159
    },
    {
      "epoch": 2.499743036283277,
      "grad_norm": 0.23084700107574463,
      "learning_rate": 6.3985223222187455e-06,
      "loss": 0.3808,
      "step": 12160
    },
    {
      "epoch": 2.4999486072566555,
      "grad_norm": 0.22430096566677094,
      "learning_rate": 6.393389726585429e-06,
      "loss": 0.3874,
      "step": 12161
    },
    {
      "epoch": 2.500154178230034,
      "grad_norm": 0.23496957123279572,
      "learning_rate": 6.388259032919352e-06,
      "loss": 0.4068,
      "step": 12162
    },
    {
      "epoch": 2.5003597492034126,
      "grad_norm": 0.22846169769763947,
      "learning_rate": 6.383130241473271e-06,
      "loss": 0.3625,
      "step": 12163
    },
    {
      "epoch": 2.500565320176791,
      "grad_norm": 0.23542927205562592,
      "learning_rate": 6.37800335249988e-06,
      "loss": 0.4062,
      "step": 12164
    },
    {
      "epoch": 2.50077089115017,
      "grad_norm": 0.22982755303382874,
      "learning_rate": 6.372878366251746e-06,
      "loss": 0.3788,
      "step": 12165
    },
    {
      "epoch": 2.500976462123548,
      "grad_norm": 0.2346840351819992,
      "learning_rate": 6.3677552829813525e-06,
      "loss": 0.3856,
      "step": 12166
    },
    {
      "epoch": 2.5011820330969265,
      "grad_norm": 0.23400172591209412,
      "learning_rate": 6.362634102941088e-06,
      "loss": 0.3948,
      "step": 12167
    },
    {
      "epoch": 2.501387604070305,
      "grad_norm": 0.2556484639644623,
      "learning_rate": 6.357514826383249e-06,
      "loss": 0.4074,
      "step": 12168
    },
    {
      "epoch": 2.5015931750436837,
      "grad_norm": 0.23373647034168243,
      "learning_rate": 6.352397453560041e-06,
      "loss": 0.3774,
      "step": 12169
    },
    {
      "epoch": 2.5017987460170623,
      "grad_norm": 0.23084743320941925,
      "learning_rate": 6.347281984723565e-06,
      "loss": 0.378,
      "step": 12170
    },
    {
      "epoch": 2.502004316990441,
      "grad_norm": 0.22970278561115265,
      "learning_rate": 6.342168420125852e-06,
      "loss": 0.3945,
      "step": 12171
    },
    {
      "epoch": 2.5022098879638195,
      "grad_norm": 0.22761283814907074,
      "learning_rate": 6.337056760018814e-06,
      "loss": 0.393,
      "step": 12172
    },
    {
      "epoch": 2.502415458937198,
      "grad_norm": 0.2262086719274521,
      "learning_rate": 6.331947004654279e-06,
      "loss": 0.4013,
      "step": 12173
    },
    {
      "epoch": 2.5026210299105767,
      "grad_norm": 0.22546137869358063,
      "learning_rate": 6.326839154283977e-06,
      "loss": 0.3821,
      "step": 12174
    },
    {
      "epoch": 2.5028266008839553,
      "grad_norm": 0.12685376405715942,
      "learning_rate": 6.321733209159555e-06,
      "loss": 0.4521,
      "step": 12175
    },
    {
      "epoch": 2.503032171857334,
      "grad_norm": 0.1270647794008255,
      "learning_rate": 6.316629169532559e-06,
      "loss": 0.443,
      "step": 12176
    },
    {
      "epoch": 2.5032377428307124,
      "grad_norm": 0.23198673129081726,
      "learning_rate": 6.3115270356544265e-06,
      "loss": 0.3716,
      "step": 12177
    },
    {
      "epoch": 2.503443313804091,
      "grad_norm": 0.22710855305194855,
      "learning_rate": 6.306426807776537e-06,
      "loss": 0.3858,
      "step": 12178
    },
    {
      "epoch": 2.503648884777469,
      "grad_norm": 0.222482368350029,
      "learning_rate": 6.301328486150148e-06,
      "loss": 0.3927,
      "step": 12179
    },
    {
      "epoch": 2.503854455750848,
      "grad_norm": 0.23889537155628204,
      "learning_rate": 6.2962320710264155e-06,
      "loss": 0.4017,
      "step": 12180
    },
    {
      "epoch": 2.5040600267242263,
      "grad_norm": 0.22771425545215607,
      "learning_rate": 6.291137562656433e-06,
      "loss": 0.3956,
      "step": 12181
    },
    {
      "epoch": 2.5042655976976054,
      "grad_norm": 0.23738330602645874,
      "learning_rate": 6.286044961291184e-06,
      "loss": 0.3685,
      "step": 12182
    },
    {
      "epoch": 2.5044711686709835,
      "grad_norm": 0.23009170591831207,
      "learning_rate": 6.2809542671815495e-06,
      "loss": 0.3939,
      "step": 12183
    },
    {
      "epoch": 2.504676739644362,
      "grad_norm": 0.2258174568414688,
      "learning_rate": 6.275865480578317e-06,
      "loss": 0.3827,
      "step": 12184
    },
    {
      "epoch": 2.5048823106177407,
      "grad_norm": 0.21696443855762482,
      "learning_rate": 6.2707786017322066e-06,
      "loss": 0.3773,
      "step": 12185
    },
    {
      "epoch": 2.5050878815911193,
      "grad_norm": 0.23282679915428162,
      "learning_rate": 6.265693630893814e-06,
      "loss": 0.4009,
      "step": 12186
    },
    {
      "epoch": 2.505293452564498,
      "grad_norm": 0.2228788286447525,
      "learning_rate": 6.260610568313647e-06,
      "loss": 0.3716,
      "step": 12187
    },
    {
      "epoch": 2.5054990235378765,
      "grad_norm": 0.2226657122373581,
      "learning_rate": 6.255529414242136e-06,
      "loss": 0.3872,
      "step": 12188
    },
    {
      "epoch": 2.505704594511255,
      "grad_norm": 0.22949428856372833,
      "learning_rate": 6.250450168929597e-06,
      "loss": 0.3995,
      "step": 12189
    },
    {
      "epoch": 2.5059101654846336,
      "grad_norm": 2.063056707382202,
      "learning_rate": 6.2453728326262674e-06,
      "loss": 0.4019,
      "step": 12190
    },
    {
      "epoch": 2.506115736458012,
      "grad_norm": 0.23003017902374268,
      "learning_rate": 6.240297405582264e-06,
      "loss": 0.3975,
      "step": 12191
    },
    {
      "epoch": 2.506321307431391,
      "grad_norm": 0.23214492201805115,
      "learning_rate": 6.235223888047661e-06,
      "loss": 0.3863,
      "step": 12192
    },
    {
      "epoch": 2.5065268784047694,
      "grad_norm": 0.2411757856607437,
      "learning_rate": 6.2301522802723835e-06,
      "loss": 0.3888,
      "step": 12193
    },
    {
      "epoch": 2.5067324493781475,
      "grad_norm": 0.22843949496746063,
      "learning_rate": 6.2250825825062975e-06,
      "loss": 0.4066,
      "step": 12194
    },
    {
      "epoch": 2.5069380203515266,
      "grad_norm": 0.2403997927904129,
      "learning_rate": 6.2200147949991624e-06,
      "loss": 0.3949,
      "step": 12195
    },
    {
      "epoch": 2.5071435913249047,
      "grad_norm": 0.23729455471038818,
      "learning_rate": 6.214948918000638e-06,
      "loss": 0.3915,
      "step": 12196
    },
    {
      "epoch": 2.5073491622982838,
      "grad_norm": 0.21809126436710358,
      "learning_rate": 6.209884951760296e-06,
      "loss": 0.3535,
      "step": 12197
    },
    {
      "epoch": 2.507554733271662,
      "grad_norm": 0.22672174870967865,
      "learning_rate": 6.20482289652761e-06,
      "loss": 0.3854,
      "step": 12198
    },
    {
      "epoch": 2.5077603042450405,
      "grad_norm": 0.22650691866874695,
      "learning_rate": 6.199762752551988e-06,
      "loss": 0.3908,
      "step": 12199
    },
    {
      "epoch": 2.507965875218419,
      "grad_norm": 0.22250515222549438,
      "learning_rate": 6.194704520082694e-06,
      "loss": 0.3765,
      "step": 12200
    },
    {
      "epoch": 2.5081714461917977,
      "grad_norm": 0.2284214347600937,
      "learning_rate": 6.189648199368929e-06,
      "loss": 0.3919,
      "step": 12201
    },
    {
      "epoch": 2.5083770171651762,
      "grad_norm": 0.23341004550457,
      "learning_rate": 6.184593790659807e-06,
      "loss": 0.3923,
      "step": 12202
    },
    {
      "epoch": 2.508582588138555,
      "grad_norm": 0.24929523468017578,
      "learning_rate": 6.179541294204327e-06,
      "loss": 0.3788,
      "step": 12203
    },
    {
      "epoch": 2.5087881591119334,
      "grad_norm": 0.23570400476455688,
      "learning_rate": 6.174490710251398e-06,
      "loss": 0.3904,
      "step": 12204
    },
    {
      "epoch": 2.508993730085312,
      "grad_norm": 0.22578248381614685,
      "learning_rate": 6.169442039049831e-06,
      "loss": 0.4045,
      "step": 12205
    },
    {
      "epoch": 2.5091993010586906,
      "grad_norm": 0.2417832911014557,
      "learning_rate": 6.1643952808483726e-06,
      "loss": 0.3621,
      "step": 12206
    },
    {
      "epoch": 2.509404872032069,
      "grad_norm": 0.2352113127708435,
      "learning_rate": 6.159350435895643e-06,
      "loss": 0.3799,
      "step": 12207
    },
    {
      "epoch": 2.5096104430054478,
      "grad_norm": 0.1250247210264206,
      "learning_rate": 6.154307504440175e-06,
      "loss": 0.4474,
      "step": 12208
    },
    {
      "epoch": 2.509816013978826,
      "grad_norm": 0.22554874420166016,
      "learning_rate": 6.149266486730414e-06,
      "loss": 0.3857,
      "step": 12209
    },
    {
      "epoch": 2.510021584952205,
      "grad_norm": 0.23448492586612701,
      "learning_rate": 6.144227383014705e-06,
      "loss": 0.3939,
      "step": 12210
    },
    {
      "epoch": 2.510227155925583,
      "grad_norm": 0.23547668755054474,
      "learning_rate": 6.139190193541301e-06,
      "loss": 0.4062,
      "step": 12211
    },
    {
      "epoch": 2.510432726898962,
      "grad_norm": 0.2341381311416626,
      "learning_rate": 6.1341549185583495e-06,
      "loss": 0.3777,
      "step": 12212
    },
    {
      "epoch": 2.5106382978723403,
      "grad_norm": 0.23182830214500427,
      "learning_rate": 6.129121558313939e-06,
      "loss": 0.3878,
      "step": 12213
    },
    {
      "epoch": 2.510843868845719,
      "grad_norm": 0.23118196427822113,
      "learning_rate": 6.124090113056029e-06,
      "loss": 0.3822,
      "step": 12214
    },
    {
      "epoch": 2.5110494398190975,
      "grad_norm": 0.2306642383337021,
      "learning_rate": 6.11906058303249e-06,
      "loss": 0.3742,
      "step": 12215
    },
    {
      "epoch": 2.511255010792476,
      "grad_norm": 0.2363126426935196,
      "learning_rate": 6.114032968491108e-06,
      "loss": 0.3679,
      "step": 12216
    },
    {
      "epoch": 2.5114605817658546,
      "grad_norm": 0.22103947401046753,
      "learning_rate": 6.109007269679567e-06,
      "loss": 0.378,
      "step": 12217
    },
    {
      "epoch": 2.511666152739233,
      "grad_norm": 0.23410068452358246,
      "learning_rate": 6.1039834868454676e-06,
      "loss": 0.3637,
      "step": 12218
    },
    {
      "epoch": 2.511871723712612,
      "grad_norm": 0.2275317907333374,
      "learning_rate": 6.098961620236286e-06,
      "loss": 0.3731,
      "step": 12219
    },
    {
      "epoch": 2.5120772946859904,
      "grad_norm": 0.2327854335308075,
      "learning_rate": 6.093941670099456e-06,
      "loss": 0.3812,
      "step": 12220
    },
    {
      "epoch": 2.512282865659369,
      "grad_norm": 0.11918573826551437,
      "learning_rate": 6.088923636682273e-06,
      "loss": 0.4619,
      "step": 12221
    },
    {
      "epoch": 2.5124884366327476,
      "grad_norm": 0.11901576071977615,
      "learning_rate": 6.083907520231941e-06,
      "loss": 0.4478,
      "step": 12222
    },
    {
      "epoch": 2.512694007606126,
      "grad_norm": 0.23713438212871552,
      "learning_rate": 6.0788933209956015e-06,
      "loss": 0.3682,
      "step": 12223
    },
    {
      "epoch": 2.5128995785795043,
      "grad_norm": 0.22688627243041992,
      "learning_rate": 6.0738810392202725e-06,
      "loss": 0.3878,
      "step": 12224
    },
    {
      "epoch": 2.5131051495528833,
      "grad_norm": 0.22531628608703613,
      "learning_rate": 6.068870675152875e-06,
      "loss": 0.3921,
      "step": 12225
    },
    {
      "epoch": 2.5133107205262615,
      "grad_norm": 0.23456744849681854,
      "learning_rate": 6.063862229040268e-06,
      "loss": 0.3799,
      "step": 12226
    },
    {
      "epoch": 2.5135162914996405,
      "grad_norm": 0.22432486712932587,
      "learning_rate": 6.058855701129178e-06,
      "loss": 0.386,
      "step": 12227
    },
    {
      "epoch": 2.5137218624730187,
      "grad_norm": 0.23045605421066284,
      "learning_rate": 6.0538510916662595e-06,
      "loss": 0.3704,
      "step": 12228
    },
    {
      "epoch": 2.5139274334463972,
      "grad_norm": 0.23640716075897217,
      "learning_rate": 6.048848400898063e-06,
      "loss": 0.3814,
      "step": 12229
    },
    {
      "epoch": 2.514133004419776,
      "grad_norm": 0.23872627317905426,
      "learning_rate": 6.043847629071049e-06,
      "loss": 0.3968,
      "step": 12230
    },
    {
      "epoch": 2.5143385753931544,
      "grad_norm": 0.2490765005350113,
      "learning_rate": 6.038848776431582e-06,
      "loss": 0.396,
      "step": 12231
    },
    {
      "epoch": 2.514544146366533,
      "grad_norm": 0.24178048968315125,
      "learning_rate": 6.033851843225918e-06,
      "loss": 0.393,
      "step": 12232
    },
    {
      "epoch": 2.5147497173399116,
      "grad_norm": 0.12222810834646225,
      "learning_rate": 6.028856829700258e-06,
      "loss": 0.4281,
      "step": 12233
    },
    {
      "epoch": 2.51495528831329,
      "grad_norm": 0.23273582756519318,
      "learning_rate": 6.023863736100677e-06,
      "loss": 0.3911,
      "step": 12234
    },
    {
      "epoch": 2.5151608592866688,
      "grad_norm": 0.23268084228038788,
      "learning_rate": 6.0188725626731475e-06,
      "loss": 0.3771,
      "step": 12235
    },
    {
      "epoch": 2.5153664302600474,
      "grad_norm": 0.22554205358028412,
      "learning_rate": 6.013883309663577e-06,
      "loss": 0.3792,
      "step": 12236
    },
    {
      "epoch": 2.515572001233426,
      "grad_norm": 0.12636855244636536,
      "learning_rate": 6.00889597731775e-06,
      "loss": 0.4423,
      "step": 12237
    },
    {
      "epoch": 2.5157775722068045,
      "grad_norm": 0.22077181935310364,
      "learning_rate": 6.0039105658813745e-06,
      "loss": 0.4052,
      "step": 12238
    },
    {
      "epoch": 2.515983143180183,
      "grad_norm": 0.2312643826007843,
      "learning_rate": 5.998927075600054e-06,
      "loss": 0.3937,
      "step": 12239
    },
    {
      "epoch": 2.5161887141535617,
      "grad_norm": 0.22247177362442017,
      "learning_rate": 5.993945506719307e-06,
      "loss": 0.3851,
      "step": 12240
    },
    {
      "epoch": 2.51639428512694,
      "grad_norm": 0.12727056443691254,
      "learning_rate": 5.988965859484558e-06,
      "loss": 0.4577,
      "step": 12241
    },
    {
      "epoch": 2.516599856100319,
      "grad_norm": 0.2271554172039032,
      "learning_rate": 5.9839881341411235e-06,
      "loss": 0.3691,
      "step": 12242
    },
    {
      "epoch": 2.516805427073697,
      "grad_norm": 0.22784371674060822,
      "learning_rate": 5.97901233093423e-06,
      "loss": 0.3829,
      "step": 12243
    },
    {
      "epoch": 2.5170109980470756,
      "grad_norm": 0.235183447599411,
      "learning_rate": 5.974038450109005e-06,
      "loss": 0.399,
      "step": 12244
    },
    {
      "epoch": 2.517216569020454,
      "grad_norm": 0.22664892673492432,
      "learning_rate": 5.969066491910514e-06,
      "loss": 0.3783,
      "step": 12245
    },
    {
      "epoch": 2.517422139993833,
      "grad_norm": 0.2340896725654602,
      "learning_rate": 5.9640964565836684e-06,
      "loss": 0.3887,
      "step": 12246
    },
    {
      "epoch": 2.5176277109672114,
      "grad_norm": 0.23754270374774933,
      "learning_rate": 5.959128344373354e-06,
      "loss": 0.3782,
      "step": 12247
    },
    {
      "epoch": 2.51783328194059,
      "grad_norm": 0.2322191596031189,
      "learning_rate": 5.9541621555243055e-06,
      "loss": 0.3946,
      "step": 12248
    },
    {
      "epoch": 2.5180388529139686,
      "grad_norm": 0.24042516946792603,
      "learning_rate": 5.9491978902811915e-06,
      "loss": 0.405,
      "step": 12249
    },
    {
      "epoch": 2.518244423887347,
      "grad_norm": 0.23382358253002167,
      "learning_rate": 5.944235548888571e-06,
      "loss": 0.3944,
      "step": 12250
    },
    {
      "epoch": 2.5184499948607257,
      "grad_norm": 0.22752118110656738,
      "learning_rate": 5.939275131590924e-06,
      "loss": 0.3723,
      "step": 12251
    },
    {
      "epoch": 2.5186555658341043,
      "grad_norm": 0.23085589706897736,
      "learning_rate": 5.934316638632615e-06,
      "loss": 0.3916,
      "step": 12252
    },
    {
      "epoch": 2.518861136807483,
      "grad_norm": 0.23417700827121735,
      "learning_rate": 5.929360070257928e-06,
      "loss": 0.3699,
      "step": 12253
    },
    {
      "epoch": 2.5190667077808615,
      "grad_norm": 0.23297809064388275,
      "learning_rate": 5.924405426711064e-06,
      "loss": 0.3863,
      "step": 12254
    },
    {
      "epoch": 2.51927227875424,
      "grad_norm": 0.3042377531528473,
      "learning_rate": 5.919452708236101e-06,
      "loss": 0.3995,
      "step": 12255
    },
    {
      "epoch": 2.5194778497276182,
      "grad_norm": 0.3225111663341522,
      "learning_rate": 5.914501915077045e-06,
      "loss": 0.387,
      "step": 12256
    },
    {
      "epoch": 2.5196834207009973,
      "grad_norm": 0.12028972804546356,
      "learning_rate": 5.909553047477796e-06,
      "loss": 0.447,
      "step": 12257
    },
    {
      "epoch": 2.5198889916743754,
      "grad_norm": 0.22156624495983124,
      "learning_rate": 5.904606105682159e-06,
      "loss": 0.3813,
      "step": 12258
    },
    {
      "epoch": 2.520094562647754,
      "grad_norm": 0.23767083883285522,
      "learning_rate": 5.899661089933842e-06,
      "loss": 0.3754,
      "step": 12259
    },
    {
      "epoch": 2.5203001336211326,
      "grad_norm": 0.24094241857528687,
      "learning_rate": 5.894718000476468e-06,
      "loss": 0.3879,
      "step": 12260
    },
    {
      "epoch": 2.520505704594511,
      "grad_norm": 0.23110216856002808,
      "learning_rate": 5.889776837553565e-06,
      "loss": 0.384,
      "step": 12261
    },
    {
      "epoch": 2.5207112755678898,
      "grad_norm": 0.23392094671726227,
      "learning_rate": 5.884837601408556e-06,
      "loss": 0.3925,
      "step": 12262
    },
    {
      "epoch": 2.5209168465412684,
      "grad_norm": 0.23152020573616028,
      "learning_rate": 5.879900292284778e-06,
      "loss": 0.391,
      "step": 12263
    },
    {
      "epoch": 2.521122417514647,
      "grad_norm": 0.2314983457326889,
      "learning_rate": 5.8749649104254634e-06,
      "loss": 0.3918,
      "step": 12264
    },
    {
      "epoch": 2.5213279884880255,
      "grad_norm": 0.2333018183708191,
      "learning_rate": 5.870031456073747e-06,
      "loss": 0.3686,
      "step": 12265
    },
    {
      "epoch": 2.521533559461404,
      "grad_norm": 0.22460217773914337,
      "learning_rate": 5.8650999294727e-06,
      "loss": 0.38,
      "step": 12266
    },
    {
      "epoch": 2.5217391304347827,
      "grad_norm": 0.2324889600276947,
      "learning_rate": 5.8601703308652585e-06,
      "loss": 0.3957,
      "step": 12267
    },
    {
      "epoch": 2.5219447014081613,
      "grad_norm": 0.22676560282707214,
      "learning_rate": 5.8552426604942814e-06,
      "loss": 0.3589,
      "step": 12268
    },
    {
      "epoch": 2.52215027238154,
      "grad_norm": 0.22790227830410004,
      "learning_rate": 5.8503169186025465e-06,
      "loss": 0.3892,
      "step": 12269
    },
    {
      "epoch": 2.5223558433549185,
      "grad_norm": 0.23212410509586334,
      "learning_rate": 5.845393105432708e-06,
      "loss": 0.3854,
      "step": 12270
    },
    {
      "epoch": 2.5225614143282966,
      "grad_norm": 0.23569580912590027,
      "learning_rate": 5.8404712212273436e-06,
      "loss": 0.3756,
      "step": 12271
    },
    {
      "epoch": 2.5227669853016756,
      "grad_norm": 0.22882294654846191,
      "learning_rate": 5.835551266228932e-06,
      "loss": 0.3866,
      "step": 12272
    },
    {
      "epoch": 2.522972556275054,
      "grad_norm": 0.22856424748897552,
      "learning_rate": 5.8306332406798574e-06,
      "loss": 0.3792,
      "step": 12273
    },
    {
      "epoch": 2.5231781272484324,
      "grad_norm": 0.240891695022583,
      "learning_rate": 5.825717144822393e-06,
      "loss": 0.3868,
      "step": 12274
    },
    {
      "epoch": 2.523383698221811,
      "grad_norm": 0.23485086858272552,
      "learning_rate": 5.820802978898757e-06,
      "loss": 0.3834,
      "step": 12275
    },
    {
      "epoch": 2.5235892691951896,
      "grad_norm": 0.23131176829338074,
      "learning_rate": 5.81589074315103e-06,
      "loss": 0.3774,
      "step": 12276
    },
    {
      "epoch": 2.523794840168568,
      "grad_norm": 0.24229222536087036,
      "learning_rate": 5.810980437821223e-06,
      "loss": 0.4105,
      "step": 12277
    },
    {
      "epoch": 2.5240004111419467,
      "grad_norm": 0.23501570522785187,
      "learning_rate": 5.806072063151243e-06,
      "loss": 0.3863,
      "step": 12278
    },
    {
      "epoch": 2.5242059821153253,
      "grad_norm": 0.22483564913272858,
      "learning_rate": 5.801165619382897e-06,
      "loss": 0.3781,
      "step": 12279
    },
    {
      "epoch": 2.524411553088704,
      "grad_norm": 0.2277892529964447,
      "learning_rate": 5.7962611067579116e-06,
      "loss": 0.3625,
      "step": 12280
    },
    {
      "epoch": 2.5246171240620825,
      "grad_norm": 0.2191118448972702,
      "learning_rate": 5.791358525517887e-06,
      "loss": 0.3664,
      "step": 12281
    },
    {
      "epoch": 2.524822695035461,
      "grad_norm": 0.12358484417200089,
      "learning_rate": 5.786457875904382e-06,
      "loss": 0.4396,
      "step": 12282
    },
    {
      "epoch": 2.5250282660088397,
      "grad_norm": 0.23930969834327698,
      "learning_rate": 5.781559158158813e-06,
      "loss": 0.3858,
      "step": 12283
    },
    {
      "epoch": 2.5252338369822183,
      "grad_norm": 0.231824591755867,
      "learning_rate": 5.776662372522516e-06,
      "loss": 0.3838,
      "step": 12284
    },
    {
      "epoch": 2.525439407955597,
      "grad_norm": 0.1248823031783104,
      "learning_rate": 5.771767519236734e-06,
      "loss": 0.4319,
      "step": 12285
    },
    {
      "epoch": 2.525644978928975,
      "grad_norm": 0.22508475184440613,
      "learning_rate": 5.766874598542609e-06,
      "loss": 0.3756,
      "step": 12286
    },
    {
      "epoch": 2.525850549902354,
      "grad_norm": 0.22717183828353882,
      "learning_rate": 5.761983610681201e-06,
      "loss": 0.3744,
      "step": 12287
    },
    {
      "epoch": 2.526056120875732,
      "grad_norm": 0.22601410746574402,
      "learning_rate": 5.757094555893466e-06,
      "loss": 0.3717,
      "step": 12288
    },
    {
      "epoch": 2.5262616918491108,
      "grad_norm": 0.2239820659160614,
      "learning_rate": 5.752207434420249e-06,
      "loss": 0.3665,
      "step": 12289
    },
    {
      "epoch": 2.5264672628224893,
      "grad_norm": 0.23698551952838898,
      "learning_rate": 5.747322246502343e-06,
      "loss": 0.4048,
      "step": 12290
    },
    {
      "epoch": 2.526672833795868,
      "grad_norm": 0.22845512628555298,
      "learning_rate": 5.742438992380399e-06,
      "loss": 0.3882,
      "step": 12291
    },
    {
      "epoch": 2.5268784047692465,
      "grad_norm": 0.232425257563591,
      "learning_rate": 5.7375576722949975e-06,
      "loss": 0.3715,
      "step": 12292
    },
    {
      "epoch": 2.527083975742625,
      "grad_norm": 0.21982984244823456,
      "learning_rate": 5.732678286486614e-06,
      "loss": 0.3603,
      "step": 12293
    },
    {
      "epoch": 2.5272895467160037,
      "grad_norm": 0.22463706135749817,
      "learning_rate": 5.727800835195642e-06,
      "loss": 0.3708,
      "step": 12294
    },
    {
      "epoch": 2.5274951176893823,
      "grad_norm": 0.12560081481933594,
      "learning_rate": 5.722925318662354e-06,
      "loss": 0.4498,
      "step": 12295
    },
    {
      "epoch": 2.527700688662761,
      "grad_norm": 0.2263532429933548,
      "learning_rate": 5.718051737126963e-06,
      "loss": 0.3865,
      "step": 12296
    },
    {
      "epoch": 2.5279062596361395,
      "grad_norm": 0.23695407807826996,
      "learning_rate": 5.713180090829561e-06,
      "loss": 0.3791,
      "step": 12297
    },
    {
      "epoch": 2.528111830609518,
      "grad_norm": 0.12093336135149002,
      "learning_rate": 5.708310380010148e-06,
      "loss": 0.4565,
      "step": 12298
    },
    {
      "epoch": 2.5283174015828966,
      "grad_norm": 0.237099289894104,
      "learning_rate": 5.703442604908635e-06,
      "loss": 0.4034,
      "step": 12299
    },
    {
      "epoch": 2.5285229725562752,
      "grad_norm": 0.26493915915489197,
      "learning_rate": 5.698576765764832e-06,
      "loss": 0.3807,
      "step": 12300
    },
    {
      "epoch": 2.5287285435296534,
      "grad_norm": 0.2276540845632553,
      "learning_rate": 5.693712862818446e-06,
      "loss": 0.381,
      "step": 12301
    },
    {
      "epoch": 2.5289341145030324,
      "grad_norm": 0.22775860130786896,
      "learning_rate": 5.688850896309126e-06,
      "loss": 0.3737,
      "step": 12302
    },
    {
      "epoch": 2.5291396854764105,
      "grad_norm": 0.12160097062587738,
      "learning_rate": 5.6839908664763745e-06,
      "loss": 0.4252,
      "step": 12303
    },
    {
      "epoch": 2.529345256449789,
      "grad_norm": 0.24990959465503693,
      "learning_rate": 5.679132773559636e-06,
      "loss": 0.3963,
      "step": 12304
    },
    {
      "epoch": 2.5295508274231677,
      "grad_norm": 0.2322610765695572,
      "learning_rate": 5.674276617798239e-06,
      "loss": 0.3973,
      "step": 12305
    },
    {
      "epoch": 2.5297563983965463,
      "grad_norm": 0.23721322417259216,
      "learning_rate": 5.669422399431426e-06,
      "loss": 0.4063,
      "step": 12306
    },
    {
      "epoch": 2.529961969369925,
      "grad_norm": 0.23168140649795532,
      "learning_rate": 5.6645701186983416e-06,
      "loss": 0.3688,
      "step": 12307
    },
    {
      "epoch": 2.5301675403433035,
      "grad_norm": 0.22344398498535156,
      "learning_rate": 5.65971977583802e-06,
      "loss": 0.3936,
      "step": 12308
    },
    {
      "epoch": 2.530373111316682,
      "grad_norm": 0.2303028702735901,
      "learning_rate": 5.6548713710894444e-06,
      "loss": 0.3847,
      "step": 12309
    },
    {
      "epoch": 2.5305786822900607,
      "grad_norm": 0.23595616221427917,
      "learning_rate": 5.650024904691443e-06,
      "loss": 0.3789,
      "step": 12310
    },
    {
      "epoch": 2.5307842532634393,
      "grad_norm": 0.23113130033016205,
      "learning_rate": 5.645180376882806e-06,
      "loss": 0.3743,
      "step": 12311
    },
    {
      "epoch": 2.530989824236818,
      "grad_norm": 0.23540560901165009,
      "learning_rate": 5.640337787902188e-06,
      "loss": 0.3958,
      "step": 12312
    },
    {
      "epoch": 2.5311953952101964,
      "grad_norm": 0.23868121206760406,
      "learning_rate": 5.635497137988157e-06,
      "loss": 0.3984,
      "step": 12313
    },
    {
      "epoch": 2.531400966183575,
      "grad_norm": 0.23868517577648163,
      "learning_rate": 5.6306584273791965e-06,
      "loss": 0.4009,
      "step": 12314
    },
    {
      "epoch": 2.5316065371569536,
      "grad_norm": 0.1235857680439949,
      "learning_rate": 5.625821656313673e-06,
      "loss": 0.456,
      "step": 12315
    },
    {
      "epoch": 2.5318121081303318,
      "grad_norm": 0.22929808497428894,
      "learning_rate": 5.620986825029889e-06,
      "loss": 0.3708,
      "step": 12316
    },
    {
      "epoch": 2.532017679103711,
      "grad_norm": 0.22845540940761566,
      "learning_rate": 5.6161539337660305e-06,
      "loss": 0.3914,
      "step": 12317
    },
    {
      "epoch": 2.532223250077089,
      "grad_norm": 0.22550463676452637,
      "learning_rate": 5.611322982760191e-06,
      "loss": 0.3671,
      "step": 12318
    },
    {
      "epoch": 2.5324288210504675,
      "grad_norm": 0.223682701587677,
      "learning_rate": 5.606493972250359e-06,
      "loss": 0.3678,
      "step": 12319
    },
    {
      "epoch": 2.532634392023846,
      "grad_norm": 0.22872628271579742,
      "learning_rate": 5.601666902474447e-06,
      "loss": 0.3995,
      "step": 12320
    },
    {
      "epoch": 2.5328399629972247,
      "grad_norm": 0.12020973116159439,
      "learning_rate": 5.596841773670258e-06,
      "loss": 0.4526,
      "step": 12321
    },
    {
      "epoch": 2.5330455339706033,
      "grad_norm": 0.22796304523944855,
      "learning_rate": 5.592018586075498e-06,
      "loss": 0.3907,
      "step": 12322
    },
    {
      "epoch": 2.533251104943982,
      "grad_norm": 0.22996073961257935,
      "learning_rate": 5.5871973399278e-06,
      "loss": 0.3912,
      "step": 12323
    },
    {
      "epoch": 2.5334566759173605,
      "grad_norm": 0.23374302685260773,
      "learning_rate": 5.582378035464671e-06,
      "loss": 0.3796,
      "step": 12324
    },
    {
      "epoch": 2.533662246890739,
      "grad_norm": 0.12319260090589523,
      "learning_rate": 5.577560672923539e-06,
      "loss": 0.4496,
      "step": 12325
    },
    {
      "epoch": 2.5338678178641176,
      "grad_norm": 0.23780031502246857,
      "learning_rate": 5.572745252541736e-06,
      "loss": 0.3854,
      "step": 12326
    },
    {
      "epoch": 2.534073388837496,
      "grad_norm": 0.22886228561401367,
      "learning_rate": 5.567931774556487e-06,
      "loss": 0.3914,
      "step": 12327
    },
    {
      "epoch": 2.534278959810875,
      "grad_norm": 0.2357688695192337,
      "learning_rate": 5.563120239204937e-06,
      "loss": 0.3849,
      "step": 12328
    },
    {
      "epoch": 2.5344845307842534,
      "grad_norm": 0.23062871396541595,
      "learning_rate": 5.558310646724115e-06,
      "loss": 0.369,
      "step": 12329
    },
    {
      "epoch": 2.534690101757632,
      "grad_norm": 0.24323634803295135,
      "learning_rate": 5.553502997350989e-06,
      "loss": 0.3802,
      "step": 12330
    },
    {
      "epoch": 2.53489567273101,
      "grad_norm": 0.24064137041568756,
      "learning_rate": 5.548697291322398e-06,
      "loss": 0.3606,
      "step": 12331
    },
    {
      "epoch": 2.535101243704389,
      "grad_norm": 0.22627827525138855,
      "learning_rate": 5.543893528875087e-06,
      "loss": 0.3883,
      "step": 12332
    },
    {
      "epoch": 2.5353068146777673,
      "grad_norm": 0.2306792140007019,
      "learning_rate": 5.539091710245729e-06,
      "loss": 0.3751,
      "step": 12333
    },
    {
      "epoch": 2.535512385651146,
      "grad_norm": 0.12287892401218414,
      "learning_rate": 5.534291835670888e-06,
      "loss": 0.4274,
      "step": 12334
    },
    {
      "epoch": 2.5357179566245245,
      "grad_norm": 0.22518762946128845,
      "learning_rate": 5.529493905387025e-06,
      "loss": 0.3907,
      "step": 12335
    },
    {
      "epoch": 2.535923527597903,
      "grad_norm": 0.2295764982700348,
      "learning_rate": 5.524697919630501e-06,
      "loss": 0.3925,
      "step": 12336
    },
    {
      "epoch": 2.5361290985712817,
      "grad_norm": 0.23383575677871704,
      "learning_rate": 5.519903878637617e-06,
      "loss": 0.4127,
      "step": 12337
    },
    {
      "epoch": 2.5363346695446602,
      "grad_norm": 0.24502725899219513,
      "learning_rate": 5.515111782644535e-06,
      "loss": 0.3995,
      "step": 12338
    },
    {
      "epoch": 2.536540240518039,
      "grad_norm": 0.24664360284805298,
      "learning_rate": 5.510321631887345e-06,
      "loss": 0.3686,
      "step": 12339
    },
    {
      "epoch": 2.5367458114914174,
      "grad_norm": 0.12008702009916306,
      "learning_rate": 5.505533426602033e-06,
      "loss": 0.4564,
      "step": 12340
    },
    {
      "epoch": 2.536951382464796,
      "grad_norm": 0.23110847175121307,
      "learning_rate": 5.500747167024496e-06,
      "loss": 0.3741,
      "step": 12341
    },
    {
      "epoch": 2.5371569534381746,
      "grad_norm": 0.22447291016578674,
      "learning_rate": 5.495962853390521e-06,
      "loss": 0.3598,
      "step": 12342
    },
    {
      "epoch": 2.537362524411553,
      "grad_norm": 0.11924073100090027,
      "learning_rate": 5.491180485935813e-06,
      "loss": 0.4384,
      "step": 12343
    },
    {
      "epoch": 2.5375680953849318,
      "grad_norm": 0.22318169474601746,
      "learning_rate": 5.48640006489598e-06,
      "loss": 0.3797,
      "step": 12344
    },
    {
      "epoch": 2.5377736663583104,
      "grad_norm": 0.22601492702960968,
      "learning_rate": 5.4816215905065375e-06,
      "loss": 0.3914,
      "step": 12345
    },
    {
      "epoch": 2.5379792373316885,
      "grad_norm": 0.23133814334869385,
      "learning_rate": 5.476845063002888e-06,
      "loss": 0.3854,
      "step": 12346
    },
    {
      "epoch": 2.5381848083050675,
      "grad_norm": 0.12131541967391968,
      "learning_rate": 5.472070482620347e-06,
      "loss": 0.4463,
      "step": 12347
    },
    {
      "epoch": 2.5383903792784457,
      "grad_norm": 0.23147541284561157,
      "learning_rate": 5.467297849594143e-06,
      "loss": 0.3833,
      "step": 12348
    },
    {
      "epoch": 2.5385959502518247,
      "grad_norm": 0.23673370480537415,
      "learning_rate": 5.462527164159402e-06,
      "loss": 0.381,
      "step": 12349
    },
    {
      "epoch": 2.538801521225203,
      "grad_norm": 0.22934825718402863,
      "learning_rate": 5.457758426551136e-06,
      "loss": 0.3894,
      "step": 12350
    },
    {
      "epoch": 2.5390070921985815,
      "grad_norm": 0.22352683544158936,
      "learning_rate": 5.4529916370043065e-06,
      "loss": 0.3989,
      "step": 12351
    },
    {
      "epoch": 2.53921266317196,
      "grad_norm": 0.23551110923290253,
      "learning_rate": 5.448226795753732e-06,
      "loss": 0.4017,
      "step": 12352
    },
    {
      "epoch": 2.5394182341453386,
      "grad_norm": 0.23551301658153534,
      "learning_rate": 5.443463903034154e-06,
      "loss": 0.3999,
      "step": 12353
    },
    {
      "epoch": 2.539623805118717,
      "grad_norm": 0.22477497160434723,
      "learning_rate": 5.43870295908023e-06,
      "loss": 0.3857,
      "step": 12354
    },
    {
      "epoch": 2.539829376092096,
      "grad_norm": 0.222430020570755,
      "learning_rate": 5.433943964126501e-06,
      "loss": 0.3661,
      "step": 12355
    },
    {
      "epoch": 2.5400349470654744,
      "grad_norm": 0.23153965175151825,
      "learning_rate": 5.429186918407423e-06,
      "loss": 0.3748,
      "step": 12356
    },
    {
      "epoch": 2.540240518038853,
      "grad_norm": 0.12405303865671158,
      "learning_rate": 5.4244318221573395e-06,
      "loss": 0.4329,
      "step": 12357
    },
    {
      "epoch": 2.5404460890122316,
      "grad_norm": 0.22384849190711975,
      "learning_rate": 5.419678675610535e-06,
      "loss": 0.3792,
      "step": 12358
    },
    {
      "epoch": 2.54065165998561,
      "grad_norm": 0.2254662811756134,
      "learning_rate": 5.414927479001167e-06,
      "loss": 0.3913,
      "step": 12359
    },
    {
      "epoch": 2.5408572309589887,
      "grad_norm": 0.23094946146011353,
      "learning_rate": 5.410178232563299e-06,
      "loss": 0.3677,
      "step": 12360
    },
    {
      "epoch": 2.541062801932367,
      "grad_norm": 0.23447729647159576,
      "learning_rate": 5.405430936530908e-06,
      "loss": 0.3659,
      "step": 12361
    },
    {
      "epoch": 2.541268372905746,
      "grad_norm": 0.23138076066970825,
      "learning_rate": 5.400685591137871e-06,
      "loss": 0.375,
      "step": 12362
    },
    {
      "epoch": 2.541473943879124,
      "grad_norm": 0.12305799126625061,
      "learning_rate": 5.395942196617968e-06,
      "loss": 0.4492,
      "step": 12363
    },
    {
      "epoch": 2.541679514852503,
      "grad_norm": 0.1181621253490448,
      "learning_rate": 5.391200753204876e-06,
      "loss": 0.4415,
      "step": 12364
    },
    {
      "epoch": 2.5418850858258812,
      "grad_norm": 0.12282504886388779,
      "learning_rate": 5.386461261132198e-06,
      "loss": 0.4412,
      "step": 12365
    },
    {
      "epoch": 2.54209065679926,
      "grad_norm": 0.23556901514530182,
      "learning_rate": 5.381723720633422e-06,
      "loss": 0.3947,
      "step": 12366
    },
    {
      "epoch": 2.5422962277726384,
      "grad_norm": 0.1227208599448204,
      "learning_rate": 5.376988131941943e-06,
      "loss": 0.4529,
      "step": 12367
    },
    {
      "epoch": 2.542501798746017,
      "grad_norm": 0.2223055213689804,
      "learning_rate": 5.3722544952910625e-06,
      "loss": 0.3783,
      "step": 12368
    },
    {
      "epoch": 2.5427073697193956,
      "grad_norm": 0.12605048716068268,
      "learning_rate": 5.367522810913984e-06,
      "loss": 0.4487,
      "step": 12369
    },
    {
      "epoch": 2.542912940692774,
      "grad_norm": 0.12217556685209274,
      "learning_rate": 5.362793079043813e-06,
      "loss": 0.4541,
      "step": 12370
    },
    {
      "epoch": 2.5431185116661528,
      "grad_norm": 0.2289581149816513,
      "learning_rate": 5.358065299913551e-06,
      "loss": 0.3795,
      "step": 12371
    },
    {
      "epoch": 2.5433240826395314,
      "grad_norm": 0.24024073779582977,
      "learning_rate": 5.3533394737561425e-06,
      "loss": 0.3983,
      "step": 12372
    },
    {
      "epoch": 2.54352965361291,
      "grad_norm": 0.2249261736869812,
      "learning_rate": 5.348615600804381e-06,
      "loss": 0.397,
      "step": 12373
    },
    {
      "epoch": 2.5437352245862885,
      "grad_norm": 0.231714129447937,
      "learning_rate": 5.3438936812909965e-06,
      "loss": 0.3847,
      "step": 12374
    },
    {
      "epoch": 2.543940795559667,
      "grad_norm": 0.2364065796136856,
      "learning_rate": 5.339173715448626e-06,
      "loss": 0.3956,
      "step": 12375
    },
    {
      "epoch": 2.5441463665330453,
      "grad_norm": 0.23184433579444885,
      "learning_rate": 5.33445570350979e-06,
      "loss": 0.3686,
      "step": 12376
    },
    {
      "epoch": 2.5443519375064243,
      "grad_norm": 0.23327279090881348,
      "learning_rate": 5.3297396457069164e-06,
      "loss": 0.3834,
      "step": 12377
    },
    {
      "epoch": 2.5445575084798024,
      "grad_norm": 0.2422483265399933,
      "learning_rate": 5.3250255422723655e-06,
      "loss": 0.3607,
      "step": 12378
    },
    {
      "epoch": 2.5447630794531815,
      "grad_norm": 0.22602832317352295,
      "learning_rate": 5.320313393438361e-06,
      "loss": 0.3734,
      "step": 12379
    },
    {
      "epoch": 2.5449686504265596,
      "grad_norm": 0.22192765772342682,
      "learning_rate": 5.315603199437057e-06,
      "loss": 0.3825,
      "step": 12380
    },
    {
      "epoch": 2.545174221399938,
      "grad_norm": 0.2314867079257965,
      "learning_rate": 5.310894960500493e-06,
      "loss": 0.3918,
      "step": 12381
    },
    {
      "epoch": 2.545379792373317,
      "grad_norm": 0.22359047830104828,
      "learning_rate": 5.306188676860634e-06,
      "loss": 0.3916,
      "step": 12382
    },
    {
      "epoch": 2.5455853633466954,
      "grad_norm": 0.23620890080928802,
      "learning_rate": 5.301484348749329e-06,
      "loss": 0.4001,
      "step": 12383
    },
    {
      "epoch": 2.545790934320074,
      "grad_norm": 0.23437555134296417,
      "learning_rate": 5.296781976398327e-06,
      "loss": 0.3721,
      "step": 12384
    },
    {
      "epoch": 2.5459965052934526,
      "grad_norm": 0.22928333282470703,
      "learning_rate": 5.292081560039319e-06,
      "loss": 0.3894,
      "step": 12385
    },
    {
      "epoch": 2.546202076266831,
      "grad_norm": 0.23154671490192413,
      "learning_rate": 5.287383099903855e-06,
      "loss": 0.3979,
      "step": 12386
    },
    {
      "epoch": 2.5464076472402097,
      "grad_norm": 0.23585215210914612,
      "learning_rate": 5.282686596223412e-06,
      "loss": 0.3604,
      "step": 12387
    },
    {
      "epoch": 2.5466132182135883,
      "grad_norm": 0.23773467540740967,
      "learning_rate": 5.277992049229358e-06,
      "loss": 0.3868,
      "step": 12388
    },
    {
      "epoch": 2.546818789186967,
      "grad_norm": 0.22915463149547577,
      "learning_rate": 5.273299459152977e-06,
      "loss": 0.371,
      "step": 12389
    },
    {
      "epoch": 2.5470243601603455,
      "grad_norm": 0.2335277944803238,
      "learning_rate": 5.268608826225454e-06,
      "loss": 0.3819,
      "step": 12390
    },
    {
      "epoch": 2.5472299311337236,
      "grad_norm": 0.24060097336769104,
      "learning_rate": 5.263920150677854e-06,
      "loss": 0.4,
      "step": 12391
    },
    {
      "epoch": 2.5474355021071027,
      "grad_norm": 0.2347687929868698,
      "learning_rate": 5.259233432741198e-06,
      "loss": 0.4035,
      "step": 12392
    },
    {
      "epoch": 2.547641073080481,
      "grad_norm": 0.23900634050369263,
      "learning_rate": 5.25454867264636e-06,
      "loss": 0.3856,
      "step": 12393
    },
    {
      "epoch": 2.54784664405386,
      "grad_norm": 0.22846248745918274,
      "learning_rate": 5.249865870624136e-06,
      "loss": 0.391,
      "step": 12394
    },
    {
      "epoch": 2.548052215027238,
      "grad_norm": 0.2248847782611847,
      "learning_rate": 5.2451850269052214e-06,
      "loss": 0.3927,
      "step": 12395
    },
    {
      "epoch": 2.5482577860006166,
      "grad_norm": 0.1248060017824173,
      "learning_rate": 5.2405061417202366e-06,
      "loss": 0.4394,
      "step": 12396
    },
    {
      "epoch": 2.548463356973995,
      "grad_norm": 0.22413742542266846,
      "learning_rate": 5.235829215299683e-06,
      "loss": 0.3923,
      "step": 12397
    },
    {
      "epoch": 2.5486689279473738,
      "grad_norm": 0.12117671221494675,
      "learning_rate": 5.2311542478739505e-06,
      "loss": 0.4538,
      "step": 12398
    },
    {
      "epoch": 2.5488744989207524,
      "grad_norm": 0.22767889499664307,
      "learning_rate": 5.226481239673385e-06,
      "loss": 0.3679,
      "step": 12399
    },
    {
      "epoch": 2.549080069894131,
      "grad_norm": 0.23341724276542664,
      "learning_rate": 5.221810190928183e-06,
      "loss": 0.3912,
      "step": 12400
    },
    {
      "epoch": 2.5492856408675095,
      "grad_norm": 0.2280956357717514,
      "learning_rate": 5.21714110186847e-06,
      "loss": 0.3871,
      "step": 12401
    },
    {
      "epoch": 2.549491211840888,
      "grad_norm": 0.22712482511997223,
      "learning_rate": 5.212473972724271e-06,
      "loss": 0.3725,
      "step": 12402
    },
    {
      "epoch": 2.5496967828142667,
      "grad_norm": 0.22607560455799103,
      "learning_rate": 5.207808803725519e-06,
      "loss": 0.3927,
      "step": 12403
    },
    {
      "epoch": 2.5499023537876453,
      "grad_norm": 0.26311928033828735,
      "learning_rate": 5.203145595102033e-06,
      "loss": 0.4036,
      "step": 12404
    },
    {
      "epoch": 2.550107924761024,
      "grad_norm": 0.12306389212608337,
      "learning_rate": 5.198484347083541e-06,
      "loss": 0.4641,
      "step": 12405
    },
    {
      "epoch": 2.550313495734402,
      "grad_norm": 0.12494704872369766,
      "learning_rate": 5.193825059899709e-06,
      "loss": 0.4593,
      "step": 12406
    },
    {
      "epoch": 2.550519066707781,
      "grad_norm": 0.2371709644794464,
      "learning_rate": 5.189167733780062e-06,
      "loss": 0.4007,
      "step": 12407
    },
    {
      "epoch": 2.550724637681159,
      "grad_norm": 0.22765342891216278,
      "learning_rate": 5.184512368954043e-06,
      "loss": 0.3812,
      "step": 12408
    },
    {
      "epoch": 2.5509302086545382,
      "grad_norm": 0.12098430842161179,
      "learning_rate": 5.1798589656510035e-06,
      "loss": 0.4594,
      "step": 12409
    },
    {
      "epoch": 2.5511357796279164,
      "grad_norm": 0.2373332679271698,
      "learning_rate": 5.1752075241001945e-06,
      "loss": 0.3945,
      "step": 12410
    },
    {
      "epoch": 2.551341350601295,
      "grad_norm": 0.12555475533008575,
      "learning_rate": 5.170558044530767e-06,
      "loss": 0.4456,
      "step": 12411
    },
    {
      "epoch": 2.5515469215746736,
      "grad_norm": 0.22965727746486664,
      "learning_rate": 5.16591052717178e-06,
      "loss": 0.3749,
      "step": 12412
    },
    {
      "epoch": 2.551752492548052,
      "grad_norm": 0.23548352718353271,
      "learning_rate": 5.161264972252198e-06,
      "loss": 0.3978,
      "step": 12413
    },
    {
      "epoch": 2.5519580635214307,
      "grad_norm": 0.22701576352119446,
      "learning_rate": 5.156621380000889e-06,
      "loss": 0.3722,
      "step": 12414
    },
    {
      "epoch": 2.5521636344948093,
      "grad_norm": 0.24250133335590363,
      "learning_rate": 5.15197975064662e-06,
      "loss": 0.3762,
      "step": 12415
    },
    {
      "epoch": 2.552369205468188,
      "grad_norm": 0.235441654920578,
      "learning_rate": 5.147340084418053e-06,
      "loss": 0.3904,
      "step": 12416
    },
    {
      "epoch": 2.5525747764415665,
      "grad_norm": 0.2249901443719864,
      "learning_rate": 5.1427023815437655e-06,
      "loss": 0.3812,
      "step": 12417
    },
    {
      "epoch": 2.552780347414945,
      "grad_norm": 0.11771126836538315,
      "learning_rate": 5.138066642252249e-06,
      "loss": 0.4564,
      "step": 12418
    },
    {
      "epoch": 2.5529859183883237,
      "grad_norm": 0.22527842223644257,
      "learning_rate": 5.133432866771862e-06,
      "loss": 0.3784,
      "step": 12419
    },
    {
      "epoch": 2.5531914893617023,
      "grad_norm": 0.22456350922584534,
      "learning_rate": 5.1288010553309096e-06,
      "loss": 0.367,
      "step": 12420
    },
    {
      "epoch": 2.553397060335081,
      "grad_norm": 0.24883276224136353,
      "learning_rate": 5.124171208157577e-06,
      "loss": 0.4066,
      "step": 12421
    },
    {
      "epoch": 2.5536026313084594,
      "grad_norm": 0.22530537843704224,
      "learning_rate": 5.119543325479944e-06,
      "loss": 0.3663,
      "step": 12422
    },
    {
      "epoch": 2.5538082022818376,
      "grad_norm": 0.23030851781368256,
      "learning_rate": 5.114917407526017e-06,
      "loss": 0.3692,
      "step": 12423
    },
    {
      "epoch": 2.5540137732552166,
      "grad_norm": 0.22793278098106384,
      "learning_rate": 5.110293454523685e-06,
      "loss": 0.3891,
      "step": 12424
    },
    {
      "epoch": 2.5542193442285948,
      "grad_norm": 0.22494344413280487,
      "learning_rate": 5.1056714667007475e-06,
      "loss": 0.3759,
      "step": 12425
    },
    {
      "epoch": 2.5544249152019733,
      "grad_norm": 0.2244558185338974,
      "learning_rate": 5.101051444284902e-06,
      "loss": 0.3901,
      "step": 12426
    },
    {
      "epoch": 2.554630486175352,
      "grad_norm": 0.22287413477897644,
      "learning_rate": 5.096433387503776e-06,
      "loss": 0.3852,
      "step": 12427
    },
    {
      "epoch": 2.5548360571487305,
      "grad_norm": 0.22315384447574615,
      "learning_rate": 5.091817296584869e-06,
      "loss": 0.3859,
      "step": 12428
    },
    {
      "epoch": 2.555041628122109,
      "grad_norm": 0.2284417599439621,
      "learning_rate": 5.087203171755592e-06,
      "loss": 0.3805,
      "step": 12429
    },
    {
      "epoch": 2.5552471990954877,
      "grad_norm": 0.22787770628929138,
      "learning_rate": 5.08259101324326e-06,
      "loss": 0.3772,
      "step": 12430
    },
    {
      "epoch": 2.5554527700688663,
      "grad_norm": 0.11939222365617752,
      "learning_rate": 5.0779808212751e-06,
      "loss": 0.4522,
      "step": 12431
    },
    {
      "epoch": 2.555658341042245,
      "grad_norm": 0.22622445225715637,
      "learning_rate": 5.0733725960782266e-06,
      "loss": 0.3636,
      "step": 12432
    },
    {
      "epoch": 2.5558639120156235,
      "grad_norm": 0.23444552719593048,
      "learning_rate": 5.068766337879662e-06,
      "loss": 0.3865,
      "step": 12433
    },
    {
      "epoch": 2.556069482989002,
      "grad_norm": 0.1237218827009201,
      "learning_rate": 5.064162046906351e-06,
      "loss": 0.4495,
      "step": 12434
    },
    {
      "epoch": 2.5562750539623806,
      "grad_norm": 0.2306404858827591,
      "learning_rate": 5.059559723385115e-06,
      "loss": 0.3957,
      "step": 12435
    },
    {
      "epoch": 2.5564806249357592,
      "grad_norm": 0.2336534857749939,
      "learning_rate": 5.054959367542689e-06,
      "loss": 0.3902,
      "step": 12436
    },
    {
      "epoch": 2.556686195909138,
      "grad_norm": 0.23504294455051422,
      "learning_rate": 5.0503609796057175e-06,
      "loss": 0.3697,
      "step": 12437
    },
    {
      "epoch": 2.556891766882516,
      "grad_norm": 0.23617815971374512,
      "learning_rate": 5.045764559800722e-06,
      "loss": 0.3986,
      "step": 12438
    },
    {
      "epoch": 2.557097337855895,
      "grad_norm": 0.2346443086862564,
      "learning_rate": 5.041170108354174e-06,
      "loss": 0.3879,
      "step": 12439
    },
    {
      "epoch": 2.557302908829273,
      "grad_norm": 0.2332235723733902,
      "learning_rate": 5.0365776254924055e-06,
      "loss": 0.3873,
      "step": 12440
    },
    {
      "epoch": 2.5575084798026517,
      "grad_norm": 0.2314586490392685,
      "learning_rate": 5.031987111441657e-06,
      "loss": 0.3749,
      "step": 12441
    },
    {
      "epoch": 2.5577140507760303,
      "grad_norm": 0.22944357991218567,
      "learning_rate": 5.027398566428106e-06,
      "loss": 0.3968,
      "step": 12442
    },
    {
      "epoch": 2.557919621749409,
      "grad_norm": 0.23463036119937897,
      "learning_rate": 5.0228119906777975e-06,
      "loss": 0.3848,
      "step": 12443
    },
    {
      "epoch": 2.5581251927227875,
      "grad_norm": 0.12207529693841934,
      "learning_rate": 5.018227384416686e-06,
      "loss": 0.4292,
      "step": 12444
    },
    {
      "epoch": 2.558330763696166,
      "grad_norm": 0.12470246851444244,
      "learning_rate": 5.013644747870641e-06,
      "loss": 0.4441,
      "step": 12445
    },
    {
      "epoch": 2.5585363346695447,
      "grad_norm": 0.2320421189069748,
      "learning_rate": 5.009064081265421e-06,
      "loss": 0.3746,
      "step": 12446
    },
    {
      "epoch": 2.5587419056429233,
      "grad_norm": 0.22109293937683105,
      "learning_rate": 5.004485384826685e-06,
      "loss": 0.3845,
      "step": 12447
    },
    {
      "epoch": 2.558947476616302,
      "grad_norm": 0.2225075662136078,
      "learning_rate": 4.999908658780025e-06,
      "loss": 0.3755,
      "step": 12448
    },
    {
      "epoch": 2.5591530475896804,
      "grad_norm": 0.12030386924743652,
      "learning_rate": 4.995333903350908e-06,
      "loss": 0.4508,
      "step": 12449
    },
    {
      "epoch": 2.559358618563059,
      "grad_norm": 0.2298811674118042,
      "learning_rate": 4.990761118764711e-06,
      "loss": 0.384,
      "step": 12450
    },
    {
      "epoch": 2.5595641895364376,
      "grad_norm": 0.23811288177967072,
      "learning_rate": 4.9861903052467065e-06,
      "loss": 0.3781,
      "step": 12451
    },
    {
      "epoch": 2.559769760509816,
      "grad_norm": 0.22522372007369995,
      "learning_rate": 4.981621463022082e-06,
      "loss": 0.3895,
      "step": 12452
    },
    {
      "epoch": 2.5599753314831943,
      "grad_norm": 0.2294057160615921,
      "learning_rate": 4.9770545923159244e-06,
      "loss": 0.3782,
      "step": 12453
    },
    {
      "epoch": 2.5601809024565734,
      "grad_norm": 0.225949227809906,
      "learning_rate": 4.972489693353206e-06,
      "loss": 0.3833,
      "step": 12454
    },
    {
      "epoch": 2.5603864734299515,
      "grad_norm": 0.22200778126716614,
      "learning_rate": 4.967926766358847e-06,
      "loss": 0.3662,
      "step": 12455
    },
    {
      "epoch": 2.56059204440333,
      "grad_norm": 0.25845810770988464,
      "learning_rate": 4.963365811557625e-06,
      "loss": 0.3953,
      "step": 12456
    },
    {
      "epoch": 2.5607976153767087,
      "grad_norm": 0.24813143908977509,
      "learning_rate": 4.958806829174239e-06,
      "loss": 0.3734,
      "step": 12457
    },
    {
      "epoch": 2.5610031863500873,
      "grad_norm": 0.2332116812467575,
      "learning_rate": 4.954249819433291e-06,
      "loss": 0.4004,
      "step": 12458
    },
    {
      "epoch": 2.561208757323466,
      "grad_norm": 0.3196330666542053,
      "learning_rate": 4.949694782559268e-06,
      "loss": 0.3785,
      "step": 12459
    },
    {
      "epoch": 2.5614143282968445,
      "grad_norm": 0.1242533028125763,
      "learning_rate": 4.945141718776601e-06,
      "loss": 0.4463,
      "step": 12460
    },
    {
      "epoch": 2.561619899270223,
      "grad_norm": 0.12104514986276627,
      "learning_rate": 4.94059062830958e-06,
      "loss": 0.4533,
      "step": 12461
    },
    {
      "epoch": 2.5618254702436016,
      "grad_norm": 0.23640932142734528,
      "learning_rate": 4.9360415113824195e-06,
      "loss": 0.3747,
      "step": 12462
    },
    {
      "epoch": 2.56203104121698,
      "grad_norm": 0.13072489202022552,
      "learning_rate": 4.931494368219237e-06,
      "loss": 0.44,
      "step": 12463
    },
    {
      "epoch": 2.562236612190359,
      "grad_norm": 0.22700245678424835,
      "learning_rate": 4.926949199044052e-06,
      "loss": 0.3968,
      "step": 12464
    },
    {
      "epoch": 2.5624421831637374,
      "grad_norm": 0.23678947985172272,
      "learning_rate": 4.922406004080776e-06,
      "loss": 0.3869,
      "step": 12465
    },
    {
      "epoch": 2.562647754137116,
      "grad_norm": 0.22681771218776703,
      "learning_rate": 4.91786478355324e-06,
      "loss": 0.3858,
      "step": 12466
    },
    {
      "epoch": 2.5628533251104946,
      "grad_norm": 0.12208957225084305,
      "learning_rate": 4.91332553768515e-06,
      "loss": 0.4612,
      "step": 12467
    },
    {
      "epoch": 2.5630588960838727,
      "grad_norm": 0.22747553884983063,
      "learning_rate": 4.908788266700153e-06,
      "loss": 0.3569,
      "step": 12468
    },
    {
      "epoch": 2.5632644670572517,
      "grad_norm": 0.23218391835689545,
      "learning_rate": 4.904252970821774e-06,
      "loss": 0.3702,
      "step": 12469
    },
    {
      "epoch": 2.56347003803063,
      "grad_norm": 0.23117561638355255,
      "learning_rate": 4.899719650273443e-06,
      "loss": 0.3971,
      "step": 12470
    },
    {
      "epoch": 2.5636756090040085,
      "grad_norm": 0.22626996040344238,
      "learning_rate": 4.895188305278499e-06,
      "loss": 0.3674,
      "step": 12471
    },
    {
      "epoch": 2.563881179977387,
      "grad_norm": 0.23139001429080963,
      "learning_rate": 4.890658936060177e-06,
      "loss": 0.3867,
      "step": 12472
    },
    {
      "epoch": 2.5640867509507657,
      "grad_norm": 0.12001971155405045,
      "learning_rate": 4.8861315428416195e-06,
      "loss": 0.4376,
      "step": 12473
    },
    {
      "epoch": 2.5642923219241442,
      "grad_norm": 0.22510318458080292,
      "learning_rate": 4.8816061258458565e-06,
      "loss": 0.3841,
      "step": 12474
    },
    {
      "epoch": 2.564497892897523,
      "grad_norm": 0.22726254165172577,
      "learning_rate": 4.877082685295861e-06,
      "loss": 0.364,
      "step": 12475
    },
    {
      "epoch": 2.5647034638709014,
      "grad_norm": 0.2362823784351349,
      "learning_rate": 4.872561221414465e-06,
      "loss": 0.376,
      "step": 12476
    },
    {
      "epoch": 2.56490903484428,
      "grad_norm": 0.2281261384487152,
      "learning_rate": 4.868041734424418e-06,
      "loss": 0.3786,
      "step": 12477
    },
    {
      "epoch": 2.5651146058176586,
      "grad_norm": 0.22835490107536316,
      "learning_rate": 4.863524224548385e-06,
      "loss": 0.3793,
      "step": 12478
    },
    {
      "epoch": 2.565320176791037,
      "grad_norm": 0.22907859086990356,
      "learning_rate": 4.859008692008911e-06,
      "loss": 0.3848,
      "step": 12479
    },
    {
      "epoch": 2.5655257477644158,
      "grad_norm": 0.12358912080526352,
      "learning_rate": 4.854495137028458e-06,
      "loss": 0.4492,
      "step": 12480
    },
    {
      "epoch": 2.5657313187377944,
      "grad_norm": 0.23854859173297882,
      "learning_rate": 4.849983559829394e-06,
      "loss": 0.3861,
      "step": 12481
    },
    {
      "epoch": 2.565936889711173,
      "grad_norm": 0.23517528176307678,
      "learning_rate": 4.845473960633981e-06,
      "loss": 0.3655,
      "step": 12482
    },
    {
      "epoch": 2.566142460684551,
      "grad_norm": 0.11823319643735886,
      "learning_rate": 4.840966339664371e-06,
      "loss": 0.4302,
      "step": 12483
    },
    {
      "epoch": 2.56634803165793,
      "grad_norm": 0.22691769897937775,
      "learning_rate": 4.836460697142662e-06,
      "loss": 0.3748,
      "step": 12484
    },
    {
      "epoch": 2.5665536026313083,
      "grad_norm": 0.23544248938560486,
      "learning_rate": 4.831957033290806e-06,
      "loss": 0.3853,
      "step": 12485
    },
    {
      "epoch": 2.566759173604687,
      "grad_norm": 0.22319963574409485,
      "learning_rate": 4.827455348330684e-06,
      "loss": 0.389,
      "step": 12486
    },
    {
      "epoch": 2.5669647445780654,
      "grad_norm": 0.22622336447238922,
      "learning_rate": 4.822955642484072e-06,
      "loss": 0.376,
      "step": 12487
    },
    {
      "epoch": 2.567170315551444,
      "grad_norm": 0.22860629856586456,
      "learning_rate": 4.818457915972635e-06,
      "loss": 0.3648,
      "step": 12488
    },
    {
      "epoch": 2.5673758865248226,
      "grad_norm": 0.12275035679340363,
      "learning_rate": 4.813962169017981e-06,
      "loss": 0.441,
      "step": 12489
    },
    {
      "epoch": 2.567581457498201,
      "grad_norm": 0.2211294323205948,
      "learning_rate": 4.809468401841578e-06,
      "loss": 0.3881,
      "step": 12490
    },
    {
      "epoch": 2.56778702847158,
      "grad_norm": 0.2277289628982544,
      "learning_rate": 4.804976614664821e-06,
      "loss": 0.3607,
      "step": 12491
    },
    {
      "epoch": 2.5679925994449584,
      "grad_norm": 0.23206478357315063,
      "learning_rate": 4.800486807708995e-06,
      "loss": 0.3881,
      "step": 12492
    },
    {
      "epoch": 2.568198170418337,
      "grad_norm": 0.24027037620544434,
      "learning_rate": 4.795998981195294e-06,
      "loss": 0.3896,
      "step": 12493
    },
    {
      "epoch": 2.5684037413917156,
      "grad_norm": 0.24075712263584137,
      "learning_rate": 4.791513135344807e-06,
      "loss": 0.3876,
      "step": 12494
    },
    {
      "epoch": 2.568609312365094,
      "grad_norm": 0.23393917083740234,
      "learning_rate": 4.787029270378522e-06,
      "loss": 0.3844,
      "step": 12495
    },
    {
      "epoch": 2.5688148833384727,
      "grad_norm": 0.22370143234729767,
      "learning_rate": 4.782547386517362e-06,
      "loss": 0.3913,
      "step": 12496
    },
    {
      "epoch": 2.5690204543118513,
      "grad_norm": 0.23047272861003876,
      "learning_rate": 4.778067483982119e-06,
      "loss": 0.3883,
      "step": 12497
    },
    {
      "epoch": 2.5692260252852295,
      "grad_norm": 0.12189171463251114,
      "learning_rate": 4.773589562993489e-06,
      "loss": 0.4429,
      "step": 12498
    },
    {
      "epoch": 2.5694315962586085,
      "grad_norm": 0.23024466633796692,
      "learning_rate": 4.769113623772089e-06,
      "loss": 0.3858,
      "step": 12499
    },
    {
      "epoch": 2.5696371672319867,
      "grad_norm": 0.12103652209043503,
      "learning_rate": 4.764639666538418e-06,
      "loss": 0.4603,
      "step": 12500
    },
    {
      "epoch": 2.5698427382053652,
      "grad_norm": 0.23692312836647034,
      "learning_rate": 4.76016769151289e-06,
      "loss": 0.3932,
      "step": 12501
    },
    {
      "epoch": 2.570048309178744,
      "grad_norm": 0.22613804042339325,
      "learning_rate": 4.755697698915813e-06,
      "loss": 0.3724,
      "step": 12502
    },
    {
      "epoch": 2.5702538801521224,
      "grad_norm": 0.2332460582256317,
      "learning_rate": 4.7512296889674205e-06,
      "loss": 0.3811,
      "step": 12503
    },
    {
      "epoch": 2.570459451125501,
      "grad_norm": 0.2254786342382431,
      "learning_rate": 4.746763661887813e-06,
      "loss": 0.3876,
      "step": 12504
    },
    {
      "epoch": 2.5706650220988796,
      "grad_norm": 0.2281585931777954,
      "learning_rate": 4.742299617897014e-06,
      "loss": 0.3865,
      "step": 12505
    },
    {
      "epoch": 2.570870593072258,
      "grad_norm": 0.23506386578083038,
      "learning_rate": 4.737837557214951e-06,
      "loss": 0.3798,
      "step": 12506
    },
    {
      "epoch": 2.5710761640456368,
      "grad_norm": 0.23268993198871613,
      "learning_rate": 4.7333774800614505e-06,
      "loss": 0.3984,
      "step": 12507
    },
    {
      "epoch": 2.5712817350190154,
      "grad_norm": 0.23153680562973022,
      "learning_rate": 4.728919386656236e-06,
      "loss": 0.386,
      "step": 12508
    },
    {
      "epoch": 2.571487305992394,
      "grad_norm": 0.1192520409822464,
      "learning_rate": 4.72446327721893e-06,
      "loss": 0.4267,
      "step": 12509
    },
    {
      "epoch": 2.5716928769657725,
      "grad_norm": 0.23475117981433868,
      "learning_rate": 4.720009151969075e-06,
      "loss": 0.3883,
      "step": 12510
    },
    {
      "epoch": 2.571898447939151,
      "grad_norm": 0.24187681078910828,
      "learning_rate": 4.715557011126102e-06,
      "loss": 0.3814,
      "step": 12511
    },
    {
      "epoch": 2.5721040189125297,
      "grad_norm": 0.23029695451259613,
      "learning_rate": 4.7111068549093485e-06,
      "loss": 0.3786,
      "step": 12512
    },
    {
      "epoch": 2.572309589885908,
      "grad_norm": 0.1344357579946518,
      "learning_rate": 4.7066586835380475e-06,
      "loss": 0.4468,
      "step": 12513
    },
    {
      "epoch": 2.572515160859287,
      "grad_norm": 0.2278510481119156,
      "learning_rate": 4.7022124972313446e-06,
      "loss": 0.3777,
      "step": 12514
    },
    {
      "epoch": 2.572720731832665,
      "grad_norm": 0.23537226021289825,
      "learning_rate": 4.697768296208279e-06,
      "loss": 0.3934,
      "step": 12515
    },
    {
      "epoch": 2.572926302806044,
      "grad_norm": 0.2430686354637146,
      "learning_rate": 4.693326080687791e-06,
      "loss": 0.4047,
      "step": 12516
    },
    {
      "epoch": 2.573131873779422,
      "grad_norm": 0.2354026734828949,
      "learning_rate": 4.688885850888745e-06,
      "loss": 0.3855,
      "step": 12517
    },
    {
      "epoch": 2.573337444752801,
      "grad_norm": 0.22142033278942108,
      "learning_rate": 4.6844476070298715e-06,
      "loss": 0.4079,
      "step": 12518
    },
    {
      "epoch": 2.5735430157261794,
      "grad_norm": 0.23135825991630554,
      "learning_rate": 4.680011349329835e-06,
      "loss": 0.3854,
      "step": 12519
    },
    {
      "epoch": 2.573748586699558,
      "grad_norm": 0.23446208238601685,
      "learning_rate": 4.675577078007187e-06,
      "loss": 0.3963,
      "step": 12520
    },
    {
      "epoch": 2.5739541576729366,
      "grad_norm": 0.11839111894369125,
      "learning_rate": 4.671144793280376e-06,
      "loss": 0.4355,
      "step": 12521
    },
    {
      "epoch": 2.574159728646315,
      "grad_norm": 0.23686189949512482,
      "learning_rate": 4.666714495367763e-06,
      "loss": 0.3901,
      "step": 12522
    },
    {
      "epoch": 2.5743652996196937,
      "grad_norm": 0.11666683852672577,
      "learning_rate": 4.662286184487604e-06,
      "loss": 0.4504,
      "step": 12523
    },
    {
      "epoch": 2.5745708705930723,
      "grad_norm": 0.23141448199748993,
      "learning_rate": 4.6578598608580744e-06,
      "loss": 0.3776,
      "step": 12524
    },
    {
      "epoch": 2.574776441566451,
      "grad_norm": 0.23291108012199402,
      "learning_rate": 4.653435524697234e-06,
      "loss": 0.3911,
      "step": 12525
    },
    {
      "epoch": 2.5749820125398295,
      "grad_norm": 0.2317928522825241,
      "learning_rate": 4.649013176223034e-06,
      "loss": 0.3803,
      "step": 12526
    },
    {
      "epoch": 2.575187583513208,
      "grad_norm": 0.23690040409564972,
      "learning_rate": 4.644592815653365e-06,
      "loss": 0.3758,
      "step": 12527
    },
    {
      "epoch": 2.5753931544865862,
      "grad_norm": 0.22948139905929565,
      "learning_rate": 4.640174443205982e-06,
      "loss": 0.3874,
      "step": 12528
    },
    {
      "epoch": 2.5755987254599653,
      "grad_norm": 0.2342422902584076,
      "learning_rate": 4.635758059098568e-06,
      "loss": 0.3791,
      "step": 12529
    },
    {
      "epoch": 2.5758042964333434,
      "grad_norm": 0.2423672080039978,
      "learning_rate": 4.6313436635486865e-06,
      "loss": 0.3912,
      "step": 12530
    },
    {
      "epoch": 2.5760098674067224,
      "grad_norm": 0.23133836686611176,
      "learning_rate": 4.626931256773821e-06,
      "loss": 0.3838,
      "step": 12531
    },
    {
      "epoch": 2.5762154383801006,
      "grad_norm": 0.22452816367149353,
      "learning_rate": 4.622520838991355e-06,
      "loss": 0.393,
      "step": 12532
    },
    {
      "epoch": 2.576421009353479,
      "grad_norm": 0.23489362001419067,
      "learning_rate": 4.618112410418561e-06,
      "loss": 0.3839,
      "step": 12533
    },
    {
      "epoch": 2.5766265803268578,
      "grad_norm": 0.22381377220153809,
      "learning_rate": 4.613705971272626e-06,
      "loss": 0.3874,
      "step": 12534
    },
    {
      "epoch": 2.5768321513002364,
      "grad_norm": 0.22615040838718414,
      "learning_rate": 4.6093015217706305e-06,
      "loss": 0.3871,
      "step": 12535
    },
    {
      "epoch": 2.577037722273615,
      "grad_norm": 0.12121882289648056,
      "learning_rate": 4.604899062129556e-06,
      "loss": 0.4319,
      "step": 12536
    },
    {
      "epoch": 2.5772432932469935,
      "grad_norm": 0.21779850125312805,
      "learning_rate": 4.600498592566309e-06,
      "loss": 0.3738,
      "step": 12537
    },
    {
      "epoch": 2.577448864220372,
      "grad_norm": 0.12148154526948929,
      "learning_rate": 4.596100113297666e-06,
      "loss": 0.4412,
      "step": 12538
    },
    {
      "epoch": 2.5776544351937507,
      "grad_norm": 0.24054840207099915,
      "learning_rate": 4.591703624540323e-06,
      "loss": 0.3983,
      "step": 12539
    },
    {
      "epoch": 2.5778600061671293,
      "grad_norm": 0.23665878176689148,
      "learning_rate": 4.587309126510879e-06,
      "loss": 0.391,
      "step": 12540
    },
    {
      "epoch": 2.578065577140508,
      "grad_norm": 0.23454469442367554,
      "learning_rate": 4.582916619425823e-06,
      "loss": 0.38,
      "step": 12541
    },
    {
      "epoch": 2.5782711481138865,
      "grad_norm": 0.22932063043117523,
      "learning_rate": 4.578526103501554e-06,
      "loss": 0.386,
      "step": 12542
    },
    {
      "epoch": 2.5784767190872646,
      "grad_norm": 0.2300751805305481,
      "learning_rate": 4.574137578954369e-06,
      "loss": 0.3736,
      "step": 12543
    },
    {
      "epoch": 2.5786822900606436,
      "grad_norm": 0.23985406756401062,
      "learning_rate": 4.569751046000483e-06,
      "loss": 0.4049,
      "step": 12544
    },
    {
      "epoch": 2.578887861034022,
      "grad_norm": 0.2458028644323349,
      "learning_rate": 4.5653665048559895e-06,
      "loss": 0.3769,
      "step": 12545
    },
    {
      "epoch": 2.579093432007401,
      "grad_norm": 0.23812150955200195,
      "learning_rate": 4.560983955736901e-06,
      "loss": 0.3921,
      "step": 12546
    },
    {
      "epoch": 2.579299002980779,
      "grad_norm": 0.23339690268039703,
      "learning_rate": 4.5566033988591146e-06,
      "loss": 0.3839,
      "step": 12547
    },
    {
      "epoch": 2.5795045739541576,
      "grad_norm": 0.22212813794612885,
      "learning_rate": 4.5522248344384525e-06,
      "loss": 0.3801,
      "step": 12548
    },
    {
      "epoch": 2.579710144927536,
      "grad_norm": 0.23296941816806793,
      "learning_rate": 4.547848262690621e-06,
      "loss": 0.3995,
      "step": 12549
    },
    {
      "epoch": 2.5799157159009147,
      "grad_norm": 0.23013028502464294,
      "learning_rate": 4.543473683831221e-06,
      "loss": 0.3542,
      "step": 12550
    },
    {
      "epoch": 2.5801212868742933,
      "grad_norm": 0.23488110303878784,
      "learning_rate": 4.539101098075791e-06,
      "loss": 0.3884,
      "step": 12551
    },
    {
      "epoch": 2.580326857847672,
      "grad_norm": 0.2313051074743271,
      "learning_rate": 4.534730505639736e-06,
      "loss": 0.3894,
      "step": 12552
    },
    {
      "epoch": 2.5805324288210505,
      "grad_norm": 0.2325943112373352,
      "learning_rate": 4.5303619067383785e-06,
      "loss": 0.38,
      "step": 12553
    },
    {
      "epoch": 2.580737999794429,
      "grad_norm": 0.22922460734844208,
      "learning_rate": 4.525995301586931e-06,
      "loss": 0.4037,
      "step": 12554
    },
    {
      "epoch": 2.5809435707678077,
      "grad_norm": 0.12386429309844971,
      "learning_rate": 4.521630690400517e-06,
      "loss": 0.4532,
      "step": 12555
    },
    {
      "epoch": 2.5811491417411863,
      "grad_norm": 0.223622664809227,
      "learning_rate": 4.517268073394169e-06,
      "loss": 0.3716,
      "step": 12556
    },
    {
      "epoch": 2.581354712714565,
      "grad_norm": 0.23587054014205933,
      "learning_rate": 4.512907450782795e-06,
      "loss": 0.3963,
      "step": 12557
    },
    {
      "epoch": 2.581560283687943,
      "grad_norm": 0.23143510520458221,
      "learning_rate": 4.508548822781248e-06,
      "loss": 0.3691,
      "step": 12558
    },
    {
      "epoch": 2.581765854661322,
      "grad_norm": 0.227389857172966,
      "learning_rate": 4.504192189604236e-06,
      "loss": 0.3778,
      "step": 12559
    },
    {
      "epoch": 2.5819714256347,
      "grad_norm": 0.2255561500787735,
      "learning_rate": 4.499837551466404e-06,
      "loss": 0.3913,
      "step": 12560
    },
    {
      "epoch": 2.582176996608079,
      "grad_norm": 0.2301749736070633,
      "learning_rate": 4.4954849085822795e-06,
      "loss": 0.3736,
      "step": 12561
    },
    {
      "epoch": 2.5823825675814573,
      "grad_norm": 0.23604105412960052,
      "learning_rate": 4.491134261166295e-06,
      "loss": 0.3876,
      "step": 12562
    },
    {
      "epoch": 2.582588138554836,
      "grad_norm": 0.24186021089553833,
      "learning_rate": 4.4867856094327845e-06,
      "loss": 0.3961,
      "step": 12563
    },
    {
      "epoch": 2.5827937095282145,
      "grad_norm": 0.23529918491840363,
      "learning_rate": 4.482438953595982e-06,
      "loss": 0.3746,
      "step": 12564
    },
    {
      "epoch": 2.582999280501593,
      "grad_norm": 0.2367369532585144,
      "learning_rate": 4.4780942938700425e-06,
      "loss": 0.397,
      "step": 12565
    },
    {
      "epoch": 2.5832048514749717,
      "grad_norm": 0.23594695329666138,
      "learning_rate": 4.473751630468997e-06,
      "loss": 0.395,
      "step": 12566
    },
    {
      "epoch": 2.5834104224483503,
      "grad_norm": 0.221700519323349,
      "learning_rate": 4.469410963606791e-06,
      "loss": 0.3851,
      "step": 12567
    },
    {
      "epoch": 2.583615993421729,
      "grad_norm": 0.12304549664258957,
      "learning_rate": 4.465072293497258e-06,
      "loss": 0.4312,
      "step": 12568
    },
    {
      "epoch": 2.5838215643951075,
      "grad_norm": 0.22101683914661407,
      "learning_rate": 4.460735620354163e-06,
      "loss": 0.3857,
      "step": 12569
    },
    {
      "epoch": 2.584027135368486,
      "grad_norm": 0.11826925724744797,
      "learning_rate": 4.456400944391144e-06,
      "loss": 0.4562,
      "step": 12570
    },
    {
      "epoch": 2.5842327063418646,
      "grad_norm": 0.11855246126651764,
      "learning_rate": 4.45206826582174e-06,
      "loss": 0.4436,
      "step": 12571
    },
    {
      "epoch": 2.5844382773152432,
      "grad_norm": 0.22833800315856934,
      "learning_rate": 4.447737584859421e-06,
      "loss": 0.38,
      "step": 12572
    },
    {
      "epoch": 2.5846438482886214,
      "grad_norm": 0.12766826152801514,
      "learning_rate": 4.443408901717526e-06,
      "loss": 0.4538,
      "step": 12573
    },
    {
      "epoch": 2.5848494192620004,
      "grad_norm": 0.2362251728773117,
      "learning_rate": 4.43908221660932e-06,
      "loss": 0.3791,
      "step": 12574
    },
    {
      "epoch": 2.5850549902353785,
      "grad_norm": 0.2291366159915924,
      "learning_rate": 4.434757529747952e-06,
      "loss": 0.3797,
      "step": 12575
    },
    {
      "epoch": 2.5852605612087576,
      "grad_norm": 0.11885092407464981,
      "learning_rate": 4.430434841346476e-06,
      "loss": 0.4403,
      "step": 12576
    },
    {
      "epoch": 2.5854661321821357,
      "grad_norm": 0.2298079878091812,
      "learning_rate": 4.426114151617852e-06,
      "loss": 0.3733,
      "step": 12577
    },
    {
      "epoch": 2.5856717031555143,
      "grad_norm": 0.2344081848859787,
      "learning_rate": 4.421795460774936e-06,
      "loss": 0.3807,
      "step": 12578
    },
    {
      "epoch": 2.585877274128893,
      "grad_norm": 0.23046311736106873,
      "learning_rate": 4.417478769030506e-06,
      "loss": 0.3999,
      "step": 12579
    },
    {
      "epoch": 2.5860828451022715,
      "grad_norm": 0.22831617295742035,
      "learning_rate": 4.4131640765972125e-06,
      "loss": 0.367,
      "step": 12580
    },
    {
      "epoch": 2.58628841607565,
      "grad_norm": 0.2355516254901886,
      "learning_rate": 4.408851383687621e-06,
      "loss": 0.3845,
      "step": 12581
    },
    {
      "epoch": 2.5864939870490287,
      "grad_norm": 0.23236438632011414,
      "learning_rate": 4.4045406905142014e-06,
      "loss": 0.399,
      "step": 12582
    },
    {
      "epoch": 2.5866995580224073,
      "grad_norm": 0.2408120036125183,
      "learning_rate": 4.400231997289323e-06,
      "loss": 0.3817,
      "step": 12583
    },
    {
      "epoch": 2.586905128995786,
      "grad_norm": 0.11931653320789337,
      "learning_rate": 4.395925304225247e-06,
      "loss": 0.4378,
      "step": 12584
    },
    {
      "epoch": 2.5871106999691644,
      "grad_norm": 0.2242845743894577,
      "learning_rate": 4.391620611534138e-06,
      "loss": 0.3632,
      "step": 12585
    },
    {
      "epoch": 2.587316270942543,
      "grad_norm": 0.2249579280614853,
      "learning_rate": 4.387317919428092e-06,
      "loss": 0.3774,
      "step": 12586
    },
    {
      "epoch": 2.5875218419159216,
      "grad_norm": 0.245198056101799,
      "learning_rate": 4.383017228119064e-06,
      "loss": 0.3825,
      "step": 12587
    },
    {
      "epoch": 2.5877274128893,
      "grad_norm": 0.23710772395133972,
      "learning_rate": 4.378718537818934e-06,
      "loss": 0.3911,
      "step": 12588
    },
    {
      "epoch": 2.587932983862679,
      "grad_norm": 0.22874656319618225,
      "learning_rate": 4.374421848739483e-06,
      "loss": 0.3732,
      "step": 12589
    },
    {
      "epoch": 2.588138554836057,
      "grad_norm": 0.2265346497297287,
      "learning_rate": 4.370127161092373e-06,
      "loss": 0.367,
      "step": 12590
    },
    {
      "epoch": 2.588344125809436,
      "grad_norm": 0.22639183700084686,
      "learning_rate": 4.365834475089203e-06,
      "loss": 0.3966,
      "step": 12591
    },
    {
      "epoch": 2.588549696782814,
      "grad_norm": 0.2292342483997345,
      "learning_rate": 4.361543790941434e-06,
      "loss": 0.3785,
      "step": 12592
    },
    {
      "epoch": 2.5887552677561927,
      "grad_norm": 0.23128138482570648,
      "learning_rate": 4.357255108860468e-06,
      "loss": 0.3829,
      "step": 12593
    },
    {
      "epoch": 2.5889608387295713,
      "grad_norm": 0.2288789004087448,
      "learning_rate": 4.35296842905758e-06,
      "loss": 0.3774,
      "step": 12594
    },
    {
      "epoch": 2.58916640970295,
      "grad_norm": 0.22914868593215942,
      "learning_rate": 4.348683751743952e-06,
      "loss": 0.3669,
      "step": 12595
    },
    {
      "epoch": 2.5893719806763285,
      "grad_norm": 0.12475783377885818,
      "learning_rate": 4.344401077130674e-06,
      "loss": 0.466,
      "step": 12596
    },
    {
      "epoch": 2.589577551649707,
      "grad_norm": 0.22877533733844757,
      "learning_rate": 4.340120405428733e-06,
      "loss": 0.386,
      "step": 12597
    },
    {
      "epoch": 2.5897831226230856,
      "grad_norm": 0.2277032434940338,
      "learning_rate": 4.335841736849015e-06,
      "loss": 0.3892,
      "step": 12598
    },
    {
      "epoch": 2.589988693596464,
      "grad_norm": 0.22553406655788422,
      "learning_rate": 4.331565071602301e-06,
      "loss": 0.3725,
      "step": 12599
    },
    {
      "epoch": 2.590194264569843,
      "grad_norm": 0.23949706554412842,
      "learning_rate": 4.327290409899299e-06,
      "loss": 0.3877,
      "step": 12600
    },
    {
      "epoch": 2.5903998355432214,
      "grad_norm": 0.23723599314689636,
      "learning_rate": 4.323017751950593e-06,
      "loss": 0.3816,
      "step": 12601
    },
    {
      "epoch": 2.5906054065166,
      "grad_norm": 0.2249545156955719,
      "learning_rate": 4.318747097966682e-06,
      "loss": 0.3656,
      "step": 12602
    },
    {
      "epoch": 2.5908109774899786,
      "grad_norm": 0.22489015758037567,
      "learning_rate": 4.314478448157962e-06,
      "loss": 0.3837,
      "step": 12603
    },
    {
      "epoch": 2.591016548463357,
      "grad_norm": 0.22682681679725647,
      "learning_rate": 4.31021180273472e-06,
      "loss": 0.388,
      "step": 12604
    },
    {
      "epoch": 2.5912221194367353,
      "grad_norm": 0.22748106718063354,
      "learning_rate": 4.305947161907161e-06,
      "loss": 0.3789,
      "step": 12605
    },
    {
      "epoch": 2.5914276904101143,
      "grad_norm": 0.22520016133785248,
      "learning_rate": 4.301684525885369e-06,
      "loss": 0.3921,
      "step": 12606
    },
    {
      "epoch": 2.5916332613834925,
      "grad_norm": 0.22781188786029816,
      "learning_rate": 4.297423894879371e-06,
      "loss": 0.3787,
      "step": 12607
    },
    {
      "epoch": 2.591838832356871,
      "grad_norm": 0.1223108097910881,
      "learning_rate": 4.293165269099049e-06,
      "loss": 0.4503,
      "step": 12608
    },
    {
      "epoch": 2.5920444033302497,
      "grad_norm": 0.2299673855304718,
      "learning_rate": 4.288908648754213e-06,
      "loss": 0.3899,
      "step": 12609
    },
    {
      "epoch": 2.5922499743036282,
      "grad_norm": 0.28654077649116516,
      "learning_rate": 4.284654034054568e-06,
      "loss": 0.3756,
      "step": 12610
    },
    {
      "epoch": 2.592455545277007,
      "grad_norm": 0.24485927820205688,
      "learning_rate": 4.280401425209705e-06,
      "loss": 0.3807,
      "step": 12611
    },
    {
      "epoch": 2.5926611162503854,
      "grad_norm": 0.12086854130029678,
      "learning_rate": 4.276150822429146e-06,
      "loss": 0.4414,
      "step": 12612
    },
    {
      "epoch": 2.592866687223764,
      "grad_norm": 0.12190677970647812,
      "learning_rate": 4.2719022259223e-06,
      "loss": 0.4467,
      "step": 12613
    },
    {
      "epoch": 2.5930722581971426,
      "grad_norm": 0.23326410353183746,
      "learning_rate": 4.267655635898454e-06,
      "loss": 0.3714,
      "step": 12614
    },
    {
      "epoch": 2.593277829170521,
      "grad_norm": 0.12049704790115356,
      "learning_rate": 4.263411052566845e-06,
      "loss": 0.4688,
      "step": 12615
    },
    {
      "epoch": 2.5934834001438998,
      "grad_norm": 0.22923533618450165,
      "learning_rate": 4.259168476136571e-06,
      "loss": 0.3817,
      "step": 12616
    },
    {
      "epoch": 2.5936889711172784,
      "grad_norm": 0.23745588958263397,
      "learning_rate": 4.25492790681664e-06,
      "loss": 0.3685,
      "step": 12617
    },
    {
      "epoch": 2.593894542090657,
      "grad_norm": 0.1278616040945053,
      "learning_rate": 4.250689344815975e-06,
      "loss": 0.4721,
      "step": 12618
    },
    {
      "epoch": 2.5941001130640355,
      "grad_norm": 0.2362724095582962,
      "learning_rate": 4.2464527903433685e-06,
      "loss": 0.399,
      "step": 12619
    },
    {
      "epoch": 2.5943056840374137,
      "grad_norm": 0.22642555832862854,
      "learning_rate": 4.242218243607564e-06,
      "loss": 0.3937,
      "step": 12620
    },
    {
      "epoch": 2.5945112550107927,
      "grad_norm": 0.11976632475852966,
      "learning_rate": 4.237985704817164e-06,
      "loss": 0.4401,
      "step": 12621
    },
    {
      "epoch": 2.594716825984171,
      "grad_norm": 0.22681719064712524,
      "learning_rate": 4.233755174180688e-06,
      "loss": 0.3904,
      "step": 12622
    },
    {
      "epoch": 2.5949223969575494,
      "grad_norm": 0.23691929876804352,
      "learning_rate": 4.2295266519065575e-06,
      "loss": 0.3986,
      "step": 12623
    },
    {
      "epoch": 2.595127967930928,
      "grad_norm": 0.2224111258983612,
      "learning_rate": 4.225300138203082e-06,
      "loss": 0.3766,
      "step": 12624
    },
    {
      "epoch": 2.5953335389043066,
      "grad_norm": 0.22920066118240356,
      "learning_rate": 4.22107563327849e-06,
      "loss": 0.3883,
      "step": 12625
    },
    {
      "epoch": 2.595539109877685,
      "grad_norm": 0.22633981704711914,
      "learning_rate": 4.216853137340895e-06,
      "loss": 0.381,
      "step": 12626
    },
    {
      "epoch": 2.595744680851064,
      "grad_norm": 0.2328253835439682,
      "learning_rate": 4.21263265059833e-06,
      "loss": 0.3936,
      "step": 12627
    },
    {
      "epoch": 2.5959502518244424,
      "grad_norm": 0.2358826845884323,
      "learning_rate": 4.208414173258719e-06,
      "loss": 0.3572,
      "step": 12628
    },
    {
      "epoch": 2.596155822797821,
      "grad_norm": 0.23712804913520813,
      "learning_rate": 4.204197705529881e-06,
      "loss": 0.4026,
      "step": 12629
    },
    {
      "epoch": 2.5963613937711996,
      "grad_norm": 0.2283300757408142,
      "learning_rate": 4.199983247619545e-06,
      "loss": 0.3851,
      "step": 12630
    },
    {
      "epoch": 2.596566964744578,
      "grad_norm": 0.22347889840602875,
      "learning_rate": 4.195770799735333e-06,
      "loss": 0.3673,
      "step": 12631
    },
    {
      "epoch": 2.5967725357179567,
      "grad_norm": 0.23281633853912354,
      "learning_rate": 4.1915603620847675e-06,
      "loss": 0.4097,
      "step": 12632
    },
    {
      "epoch": 2.5969781066913353,
      "grad_norm": 0.2382623255252838,
      "learning_rate": 4.187351934875289e-06,
      "loss": 0.409,
      "step": 12633
    },
    {
      "epoch": 2.597183677664714,
      "grad_norm": 0.2310085892677307,
      "learning_rate": 4.18314551831423e-06,
      "loss": 0.3912,
      "step": 12634
    },
    {
      "epoch": 2.597389248638092,
      "grad_norm": 0.2320510298013687,
      "learning_rate": 4.1789411126088015e-06,
      "loss": 0.3582,
      "step": 12635
    },
    {
      "epoch": 2.597594819611471,
      "grad_norm": 0.23400908708572388,
      "learning_rate": 4.174738717966154e-06,
      "loss": 0.3725,
      "step": 12636
    },
    {
      "epoch": 2.5978003905848492,
      "grad_norm": 0.23173139989376068,
      "learning_rate": 4.170538334593318e-06,
      "loss": 0.3992,
      "step": 12637
    },
    {
      "epoch": 2.598005961558228,
      "grad_norm": 0.23350614309310913,
      "learning_rate": 4.1663399626972175e-06,
      "loss": 0.3795,
      "step": 12638
    },
    {
      "epoch": 2.5982115325316064,
      "grad_norm": 0.23148328065872192,
      "learning_rate": 4.162143602484692e-06,
      "loss": 0.3611,
      "step": 12639
    },
    {
      "epoch": 2.598417103504985,
      "grad_norm": 0.22413556277751923,
      "learning_rate": 4.15794925416247e-06,
      "loss": 0.3857,
      "step": 12640
    },
    {
      "epoch": 2.5986226744783636,
      "grad_norm": 0.22561226785182953,
      "learning_rate": 4.153756917937197e-06,
      "loss": 0.3802,
      "step": 12641
    },
    {
      "epoch": 2.598828245451742,
      "grad_norm": 0.22936685383319855,
      "learning_rate": 4.149566594015408e-06,
      "loss": 0.374,
      "step": 12642
    },
    {
      "epoch": 2.5990338164251208,
      "grad_norm": 0.12491568177938461,
      "learning_rate": 4.145378282603538e-06,
      "loss": 0.4393,
      "step": 12643
    },
    {
      "epoch": 2.5992393873984994,
      "grad_norm": 0.22605833411216736,
      "learning_rate": 4.141191983907927e-06,
      "loss": 0.3838,
      "step": 12644
    },
    {
      "epoch": 2.599444958371878,
      "grad_norm": 0.22735659778118134,
      "learning_rate": 4.137007698134814e-06,
      "loss": 0.3879,
      "step": 12645
    },
    {
      "epoch": 2.5996505293452565,
      "grad_norm": 0.23198509216308594,
      "learning_rate": 4.1328254254903345e-06,
      "loss": 0.3801,
      "step": 12646
    },
    {
      "epoch": 2.599856100318635,
      "grad_norm": 0.11979317665100098,
      "learning_rate": 4.12864516618053e-06,
      "loss": 0.4608,
      "step": 12647
    },
    {
      "epoch": 2.6000616712920137,
      "grad_norm": 0.23859287798404694,
      "learning_rate": 4.124466920411354e-06,
      "loss": 0.3896,
      "step": 12648
    },
    {
      "epoch": 2.6002672422653923,
      "grad_norm": 0.22083625197410583,
      "learning_rate": 4.120290688388638e-06,
      "loss": 0.3972,
      "step": 12649
    },
    {
      "epoch": 2.6004728132387704,
      "grad_norm": 0.23605285584926605,
      "learning_rate": 4.116116470318131e-06,
      "loss": 0.4005,
      "step": 12650
    },
    {
      "epoch": 2.6006783842121495,
      "grad_norm": 0.2248559296131134,
      "learning_rate": 4.111944266405476e-06,
      "loss": 0.3765,
      "step": 12651
    },
    {
      "epoch": 2.6008839551855276,
      "grad_norm": 0.23217467963695526,
      "learning_rate": 4.107774076856211e-06,
      "loss": 0.3721,
      "step": 12652
    },
    {
      "epoch": 2.601089526158906,
      "grad_norm": 0.23271602392196655,
      "learning_rate": 4.103605901875783e-06,
      "loss": 0.383,
      "step": 12653
    },
    {
      "epoch": 2.601295097132285,
      "grad_norm": 0.22513030469417572,
      "learning_rate": 4.099439741669553e-06,
      "loss": 0.375,
      "step": 12654
    },
    {
      "epoch": 2.6015006681056634,
      "grad_norm": 0.2263166457414627,
      "learning_rate": 4.0952755964427555e-06,
      "loss": 0.3759,
      "step": 12655
    },
    {
      "epoch": 2.601706239079042,
      "grad_norm": 0.22585409879684448,
      "learning_rate": 4.091113466400533e-06,
      "loss": 0.3809,
      "step": 12656
    },
    {
      "epoch": 2.6019118100524206,
      "grad_norm": 0.22669021785259247,
      "learning_rate": 4.08695335174795e-06,
      "loss": 0.3782,
      "step": 12657
    },
    {
      "epoch": 2.602117381025799,
      "grad_norm": 0.2331555336713791,
      "learning_rate": 4.082795252689949e-06,
      "loss": 0.3776,
      "step": 12658
    },
    {
      "epoch": 2.6023229519991777,
      "grad_norm": 0.2259588986635208,
      "learning_rate": 4.07863916943138e-06,
      "loss": 0.4052,
      "step": 12659
    },
    {
      "epoch": 2.6025285229725563,
      "grad_norm": 0.2281801849603653,
      "learning_rate": 4.074485102176994e-06,
      "loss": 0.391,
      "step": 12660
    },
    {
      "epoch": 2.602734093945935,
      "grad_norm": 0.22164572775363922,
      "learning_rate": 4.070333051131434e-06,
      "loss": 0.3553,
      "step": 12661
    },
    {
      "epoch": 2.6029396649193135,
      "grad_norm": 0.2315491884946823,
      "learning_rate": 4.0661830164992644e-06,
      "loss": 0.3879,
      "step": 12662
    },
    {
      "epoch": 2.603145235892692,
      "grad_norm": 0.22418269515037537,
      "learning_rate": 4.062034998484938e-06,
      "loss": 0.3866,
      "step": 12663
    },
    {
      "epoch": 2.6033508068660707,
      "grad_norm": 0.22450844943523407,
      "learning_rate": 4.0578889972928e-06,
      "loss": 0.3846,
      "step": 12664
    },
    {
      "epoch": 2.603556377839449,
      "grad_norm": 0.22189322113990784,
      "learning_rate": 4.053745013127109e-06,
      "loss": 0.3828,
      "step": 12665
    },
    {
      "epoch": 2.603761948812828,
      "grad_norm": 0.22486789524555206,
      "learning_rate": 4.04960304619202e-06,
      "loss": 0.3742,
      "step": 12666
    },
    {
      "epoch": 2.603967519786206,
      "grad_norm": 0.23218779265880585,
      "learning_rate": 4.045463096691585e-06,
      "loss": 0.3841,
      "step": 12667
    },
    {
      "epoch": 2.6041730907595846,
      "grad_norm": 0.24457047879695892,
      "learning_rate": 4.041325164829752e-06,
      "loss": 0.3924,
      "step": 12668
    },
    {
      "epoch": 2.604378661732963,
      "grad_norm": 0.1159941703081131,
      "learning_rate": 4.037189250810401e-06,
      "loss": 0.4393,
      "step": 12669
    },
    {
      "epoch": 2.6045842327063418,
      "grad_norm": 0.2236376702785492,
      "learning_rate": 4.033055354837276e-06,
      "loss": 0.376,
      "step": 12670
    },
    {
      "epoch": 2.6047898036797204,
      "grad_norm": 0.23160508275032043,
      "learning_rate": 4.0289234771140335e-06,
      "loss": 0.3798,
      "step": 12671
    },
    {
      "epoch": 2.604995374653099,
      "grad_norm": 0.11661987006664276,
      "learning_rate": 4.02479361784423e-06,
      "loss": 0.4366,
      "step": 12672
    },
    {
      "epoch": 2.6052009456264775,
      "grad_norm": 0.22946786880493164,
      "learning_rate": 4.020665777231327e-06,
      "loss": 0.3895,
      "step": 12673
    },
    {
      "epoch": 2.605406516599856,
      "grad_norm": 0.2358265221118927,
      "learning_rate": 4.0165399554786894e-06,
      "loss": 0.37,
      "step": 12674
    },
    {
      "epoch": 2.6056120875732347,
      "grad_norm": 0.23617896437644958,
      "learning_rate": 4.0124161527895635e-06,
      "loss": 0.3892,
      "step": 12675
    },
    {
      "epoch": 2.6058176585466133,
      "grad_norm": 0.23179112374782562,
      "learning_rate": 4.008294369367121e-06,
      "loss": 0.3648,
      "step": 12676
    },
    {
      "epoch": 2.606023229519992,
      "grad_norm": 0.12185750156641006,
      "learning_rate": 4.004174605414424e-06,
      "loss": 0.4366,
      "step": 12677
    },
    {
      "epoch": 2.6062288004933705,
      "grad_norm": 0.12000511586666107,
      "learning_rate": 4.000056861134422e-06,
      "loss": 0.4486,
      "step": 12678
    },
    {
      "epoch": 2.606434371466749,
      "grad_norm": 0.25182247161865234,
      "learning_rate": 3.995941136729992e-06,
      "loss": 0.4121,
      "step": 12679
    },
    {
      "epoch": 2.606639942440127,
      "grad_norm": 0.2277366816997528,
      "learning_rate": 3.991827432403891e-06,
      "loss": 0.3816,
      "step": 12680
    },
    {
      "epoch": 2.6068455134135062,
      "grad_norm": 0.24362795054912567,
      "learning_rate": 3.987715748358783e-06,
      "loss": 0.3746,
      "step": 12681
    },
    {
      "epoch": 2.6070510843868844,
      "grad_norm": 0.23480716347694397,
      "learning_rate": 3.983606084797215e-06,
      "loss": 0.3913,
      "step": 12682
    },
    {
      "epoch": 2.607256655360263,
      "grad_norm": 0.232587993144989,
      "learning_rate": 3.9794984419216755e-06,
      "loss": 0.3749,
      "step": 12683
    },
    {
      "epoch": 2.6074622263336416,
      "grad_norm": 0.22933539748191833,
      "learning_rate": 3.9753928199345225e-06,
      "loss": 0.3873,
      "step": 12684
    },
    {
      "epoch": 2.60766779730702,
      "grad_norm": 0.22452722489833832,
      "learning_rate": 3.971289219038014e-06,
      "loss": 0.3707,
      "step": 12685
    },
    {
      "epoch": 2.6078733682803987,
      "grad_norm": 0.23321999609470367,
      "learning_rate": 3.967187639434315e-06,
      "loss": 0.3864,
      "step": 12686
    },
    {
      "epoch": 2.6080789392537773,
      "grad_norm": 0.1229674443602562,
      "learning_rate": 3.963088081325497e-06,
      "loss": 0.4495,
      "step": 12687
    },
    {
      "epoch": 2.608284510227156,
      "grad_norm": 0.24325041472911835,
      "learning_rate": 3.958990544913513e-06,
      "loss": 0.3694,
      "step": 12688
    },
    {
      "epoch": 2.6084900812005345,
      "grad_norm": 0.22455255687236786,
      "learning_rate": 3.9548950304002536e-06,
      "loss": 0.38,
      "step": 12689
    },
    {
      "epoch": 2.608695652173913,
      "grad_norm": 0.23633895814418793,
      "learning_rate": 3.950801537987466e-06,
      "loss": 0.3842,
      "step": 12690
    },
    {
      "epoch": 2.6089012231472917,
      "grad_norm": 0.2320830076932907,
      "learning_rate": 3.946710067876824e-06,
      "loss": 0.3621,
      "step": 12691
    },
    {
      "epoch": 2.6091067941206703,
      "grad_norm": 0.22698310017585754,
      "learning_rate": 3.942620620269896e-06,
      "loss": 0.3754,
      "step": 12692
    },
    {
      "epoch": 2.609312365094049,
      "grad_norm": 0.23345276713371277,
      "learning_rate": 3.938533195368147e-06,
      "loss": 0.3781,
      "step": 12693
    },
    {
      "epoch": 2.6095179360674274,
      "grad_norm": 0.23347729444503784,
      "learning_rate": 3.93444779337295e-06,
      "loss": 0.3761,
      "step": 12694
    },
    {
      "epoch": 2.6097235070408056,
      "grad_norm": 0.2320747673511505,
      "learning_rate": 3.9303644144855595e-06,
      "loss": 0.3811,
      "step": 12695
    },
    {
      "epoch": 2.6099290780141846,
      "grad_norm": 0.23767243325710297,
      "learning_rate": 3.926283058907159e-06,
      "loss": 0.3885,
      "step": 12696
    },
    {
      "epoch": 2.6101346489875628,
      "grad_norm": 0.2290267050266266,
      "learning_rate": 3.922203726838818e-06,
      "loss": 0.3869,
      "step": 12697
    },
    {
      "epoch": 2.610340219960942,
      "grad_norm": 0.2302408069372177,
      "learning_rate": 3.918126418481507e-06,
      "loss": 0.3751,
      "step": 12698
    },
    {
      "epoch": 2.61054579093432,
      "grad_norm": 0.22879059612751007,
      "learning_rate": 3.914051134036077e-06,
      "loss": 0.3831,
      "step": 12699
    },
    {
      "epoch": 2.6107513619076985,
      "grad_norm": 0.23263303935527802,
      "learning_rate": 3.9099778737033215e-06,
      "loss": 0.3813,
      "step": 12700
    },
    {
      "epoch": 2.610956932881077,
      "grad_norm": 0.23693881928920746,
      "learning_rate": 3.905906637683902e-06,
      "loss": 0.3898,
      "step": 12701
    },
    {
      "epoch": 2.6111625038544557,
      "grad_norm": 0.22104987502098083,
      "learning_rate": 3.901837426178384e-06,
      "loss": 0.3801,
      "step": 12702
    },
    {
      "epoch": 2.6113680748278343,
      "grad_norm": 0.23495499789714813,
      "learning_rate": 3.897770239387247e-06,
      "loss": 0.3659,
      "step": 12703
    },
    {
      "epoch": 2.611573645801213,
      "grad_norm": 0.23057711124420166,
      "learning_rate": 3.893705077510861e-06,
      "loss": 0.3858,
      "step": 12704
    },
    {
      "epoch": 2.6117792167745915,
      "grad_norm": 0.2297714799642563,
      "learning_rate": 3.8896419407494955e-06,
      "loss": 0.3919,
      "step": 12705
    },
    {
      "epoch": 2.61198478774797,
      "grad_norm": 0.12049584090709686,
      "learning_rate": 3.885580829303326e-06,
      "loss": 0.4581,
      "step": 12706
    },
    {
      "epoch": 2.6121903587213486,
      "grad_norm": 0.23567521572113037,
      "learning_rate": 3.8815217433724165e-06,
      "loss": 0.3795,
      "step": 12707
    },
    {
      "epoch": 2.6123959296947272,
      "grad_norm": 0.22988666594028473,
      "learning_rate": 3.877464683156743e-06,
      "loss": 0.3793,
      "step": 12708
    },
    {
      "epoch": 2.612601500668106,
      "grad_norm": 0.2348259687423706,
      "learning_rate": 3.873409648856175e-06,
      "loss": 0.3949,
      "step": 12709
    },
    {
      "epoch": 2.612807071641484,
      "grad_norm": 0.2306186556816101,
      "learning_rate": 3.869356640670493e-06,
      "loss": 0.3803,
      "step": 12710
    },
    {
      "epoch": 2.613012642614863,
      "grad_norm": 0.23866905272006989,
      "learning_rate": 3.865305658799362e-06,
      "loss": 0.3926,
      "step": 12711
    },
    {
      "epoch": 2.613218213588241,
      "grad_norm": 0.23288170993328094,
      "learning_rate": 3.861256703442363e-06,
      "loss": 0.3704,
      "step": 12712
    },
    {
      "epoch": 2.61342378456162,
      "grad_norm": 0.12461275607347488,
      "learning_rate": 3.857209774798965e-06,
      "loss": 0.4396,
      "step": 12713
    },
    {
      "epoch": 2.6136293555349983,
      "grad_norm": 0.2187352180480957,
      "learning_rate": 3.853164873068535e-06,
      "loss": 0.3681,
      "step": 12714
    },
    {
      "epoch": 2.613834926508377,
      "grad_norm": 0.1244712844491005,
      "learning_rate": 3.849121998450358e-06,
      "loss": 0.4467,
      "step": 12715
    },
    {
      "epoch": 2.6140404974817555,
      "grad_norm": 0.12192442268133163,
      "learning_rate": 3.84508115114359e-06,
      "loss": 0.4495,
      "step": 12716
    },
    {
      "epoch": 2.614246068455134,
      "grad_norm": 0.22983375191688538,
      "learning_rate": 3.841042331347321e-06,
      "loss": 0.3779,
      "step": 12717
    },
    {
      "epoch": 2.6144516394285127,
      "grad_norm": 0.23377950489521027,
      "learning_rate": 3.8370055392605225e-06,
      "loss": 0.3805,
      "step": 12718
    },
    {
      "epoch": 2.6146572104018913,
      "grad_norm": 0.24588587880134583,
      "learning_rate": 3.832970775082071e-06,
      "loss": 0.393,
      "step": 12719
    },
    {
      "epoch": 2.61486278137527,
      "grad_norm": 0.23526208102703094,
      "learning_rate": 3.82893803901072e-06,
      "loss": 0.3813,
      "step": 12720
    },
    {
      "epoch": 2.6150683523486484,
      "grad_norm": 0.24966707825660706,
      "learning_rate": 3.824907331245169e-06,
      "loss": 0.3916,
      "step": 12721
    },
    {
      "epoch": 2.615273923322027,
      "grad_norm": 0.2245279848575592,
      "learning_rate": 3.820878651983982e-06,
      "loss": 0.3745,
      "step": 12722
    },
    {
      "epoch": 2.6154794942954056,
      "grad_norm": 0.2309785783290863,
      "learning_rate": 3.816852001425625e-06,
      "loss": 0.3854,
      "step": 12723
    },
    {
      "epoch": 2.615685065268784,
      "grad_norm": 0.22770026326179504,
      "learning_rate": 3.812827379768491e-06,
      "loss": 0.3834,
      "step": 12724
    },
    {
      "epoch": 2.6158906362421623,
      "grad_norm": 0.12256407737731934,
      "learning_rate": 3.80880478721084e-06,
      "loss": 0.4432,
      "step": 12725
    },
    {
      "epoch": 2.6160962072155414,
      "grad_norm": 0.12518732249736786,
      "learning_rate": 3.8047842239508542e-06,
      "loss": 0.446,
      "step": 12726
    },
    {
      "epoch": 2.6163017781889195,
      "grad_norm": 0.2416573315858841,
      "learning_rate": 3.8007656901865996e-06,
      "loss": 0.3825,
      "step": 12727
    },
    {
      "epoch": 2.6165073491622985,
      "grad_norm": 0.23824763298034668,
      "learning_rate": 3.7967491861160583e-06,
      "loss": 0.3737,
      "step": 12728
    },
    {
      "epoch": 2.6167129201356767,
      "grad_norm": 0.2198529988527298,
      "learning_rate": 3.7927347119370966e-06,
      "loss": 0.3874,
      "step": 12729
    },
    {
      "epoch": 2.6169184911090553,
      "grad_norm": 0.2278008759021759,
      "learning_rate": 3.7887222678474868e-06,
      "loss": 0.356,
      "step": 12730
    },
    {
      "epoch": 2.617124062082434,
      "grad_norm": 0.2338324338197708,
      "learning_rate": 3.7847118540449202e-06,
      "loss": 0.3923,
      "step": 12731
    },
    {
      "epoch": 2.6173296330558125,
      "grad_norm": 0.21889689564704895,
      "learning_rate": 3.780703470726959e-06,
      "loss": 0.3612,
      "step": 12732
    },
    {
      "epoch": 2.617535204029191,
      "grad_norm": 0.238824263215065,
      "learning_rate": 3.7766971180910803e-06,
      "loss": 0.3983,
      "step": 12733
    },
    {
      "epoch": 2.6177407750025696,
      "grad_norm": 0.12167999893426895,
      "learning_rate": 3.7726927963346564e-06,
      "loss": 0.4387,
      "step": 12734
    },
    {
      "epoch": 2.617946345975948,
      "grad_norm": 0.23274268209934235,
      "learning_rate": 3.768690505654964e-06,
      "loss": 0.3855,
      "step": 12735
    },
    {
      "epoch": 2.618151916949327,
      "grad_norm": 0.24479152262210846,
      "learning_rate": 3.7646902462491765e-06,
      "loss": 0.3923,
      "step": 12736
    },
    {
      "epoch": 2.6183574879227054,
      "grad_norm": 0.2456178367137909,
      "learning_rate": 3.7606920183143546e-06,
      "loss": 0.4016,
      "step": 12737
    },
    {
      "epoch": 2.618563058896084,
      "grad_norm": 0.23310574889183044,
      "learning_rate": 3.756695822047497e-06,
      "loss": 0.3859,
      "step": 12738
    },
    {
      "epoch": 2.6187686298694626,
      "grad_norm": 0.12393064796924591,
      "learning_rate": 3.7527016576454603e-06,
      "loss": 0.4474,
      "step": 12739
    },
    {
      "epoch": 2.6189742008428407,
      "grad_norm": 0.23465074598789215,
      "learning_rate": 3.748709525305028e-06,
      "loss": 0.3948,
      "step": 12740
    },
    {
      "epoch": 2.6191797718162197,
      "grad_norm": 0.2182430624961853,
      "learning_rate": 3.7447194252228624e-06,
      "loss": 0.3807,
      "step": 12741
    },
    {
      "epoch": 2.619385342789598,
      "grad_norm": 0.23260043561458588,
      "learning_rate": 3.740731357595551e-06,
      "loss": 0.3909,
      "step": 12742
    },
    {
      "epoch": 2.619590913762977,
      "grad_norm": 0.11928752809762955,
      "learning_rate": 3.736745322619557e-06,
      "loss": 0.45,
      "step": 12743
    },
    {
      "epoch": 2.619796484736355,
      "grad_norm": 0.23598788678646088,
      "learning_rate": 3.7327613204912532e-06,
      "loss": 0.3824,
      "step": 12744
    },
    {
      "epoch": 2.6200020557097337,
      "grad_norm": 0.24089393019676208,
      "learning_rate": 3.7287793514069226e-06,
      "loss": 0.3849,
      "step": 12745
    },
    {
      "epoch": 2.6202076266831122,
      "grad_norm": 0.23498208820819855,
      "learning_rate": 3.724799415562733e-06,
      "loss": 0.3896,
      "step": 12746
    },
    {
      "epoch": 2.620413197656491,
      "grad_norm": 0.23025161027908325,
      "learning_rate": 3.720821513154758e-06,
      "loss": 0.3816,
      "step": 12747
    },
    {
      "epoch": 2.6206187686298694,
      "grad_norm": 0.22431518137454987,
      "learning_rate": 3.7168456443789656e-06,
      "loss": 0.3795,
      "step": 12748
    },
    {
      "epoch": 2.620824339603248,
      "grad_norm": 0.12069787830114365,
      "learning_rate": 3.7128718094312293e-06,
      "loss": 0.4559,
      "step": 12749
    },
    {
      "epoch": 2.6210299105766266,
      "grad_norm": 0.2284901887178421,
      "learning_rate": 3.708900008507327e-06,
      "loss": 0.3781,
      "step": 12750
    },
    {
      "epoch": 2.621235481550005,
      "grad_norm": 0.12058551609516144,
      "learning_rate": 3.704930241802918e-06,
      "loss": 0.446,
      "step": 12751
    },
    {
      "epoch": 2.6214410525233838,
      "grad_norm": 0.22641105949878693,
      "learning_rate": 3.700962509513595e-06,
      "loss": 0.3953,
      "step": 12752
    },
    {
      "epoch": 2.6216466234967624,
      "grad_norm": 0.11827776581048965,
      "learning_rate": 3.6969968118348127e-06,
      "loss": 0.4369,
      "step": 12753
    },
    {
      "epoch": 2.621852194470141,
      "grad_norm": 0.12709125876426697,
      "learning_rate": 3.6930331489619537e-06,
      "loss": 0.4419,
      "step": 12754
    },
    {
      "epoch": 2.6220577654435195,
      "grad_norm": 0.24272185564041138,
      "learning_rate": 3.689071521090277e-06,
      "loss": 0.396,
      "step": 12755
    },
    {
      "epoch": 2.622263336416898,
      "grad_norm": 0.23644161224365234,
      "learning_rate": 3.685111928414962e-06,
      "loss": 0.3728,
      "step": 12756
    },
    {
      "epoch": 2.6224689073902763,
      "grad_norm": 0.23552681505680084,
      "learning_rate": 3.6811543711310777e-06,
      "loss": 0.4081,
      "step": 12757
    },
    {
      "epoch": 2.6226744783636553,
      "grad_norm": 0.23269321024417877,
      "learning_rate": 3.6771988494335823e-06,
      "loss": 0.3695,
      "step": 12758
    },
    {
      "epoch": 2.6228800493370334,
      "grad_norm": 0.22955691814422607,
      "learning_rate": 3.673245363517371e-06,
      "loss": 0.3751,
      "step": 12759
    },
    {
      "epoch": 2.623085620310412,
      "grad_norm": 0.22635483741760254,
      "learning_rate": 3.669293913577197e-06,
      "loss": 0.4007,
      "step": 12760
    },
    {
      "epoch": 2.6232911912837906,
      "grad_norm": 0.2312253713607788,
      "learning_rate": 3.6653444998077302e-06,
      "loss": 0.3691,
      "step": 12761
    },
    {
      "epoch": 2.623496762257169,
      "grad_norm": 0.24891069531440735,
      "learning_rate": 3.661397122403545e-06,
      "loss": 0.3871,
      "step": 12762
    },
    {
      "epoch": 2.623702333230548,
      "grad_norm": 0.22625122964382172,
      "learning_rate": 3.6574517815591002e-06,
      "loss": 0.3823,
      "step": 12763
    },
    {
      "epoch": 2.6239079042039264,
      "grad_norm": 0.2243538647890091,
      "learning_rate": 3.653508477468781e-06,
      "loss": 0.3765,
      "step": 12764
    },
    {
      "epoch": 2.624113475177305,
      "grad_norm": 0.23248639702796936,
      "learning_rate": 3.649567210326832e-06,
      "loss": 0.3974,
      "step": 12765
    },
    {
      "epoch": 2.6243190461506836,
      "grad_norm": 0.22589054703712463,
      "learning_rate": 3.6456279803274474e-06,
      "loss": 0.3502,
      "step": 12766
    },
    {
      "epoch": 2.624524617124062,
      "grad_norm": 0.23221521079540253,
      "learning_rate": 3.6416907876646824e-06,
      "loss": 0.3724,
      "step": 12767
    },
    {
      "epoch": 2.6247301880974407,
      "grad_norm": 0.22528620064258575,
      "learning_rate": 3.6377556325325014e-06,
      "loss": 0.3963,
      "step": 12768
    },
    {
      "epoch": 2.6249357590708193,
      "grad_norm": 0.2201879322528839,
      "learning_rate": 3.6338225151247797e-06,
      "loss": 0.3879,
      "step": 12769
    },
    {
      "epoch": 2.625141330044198,
      "grad_norm": 0.2363358587026596,
      "learning_rate": 3.629891435635272e-06,
      "loss": 0.3906,
      "step": 12770
    },
    {
      "epoch": 2.6253469010175765,
      "grad_norm": 0.12273525446653366,
      "learning_rate": 3.625962394257644e-06,
      "loss": 0.4524,
      "step": 12771
    },
    {
      "epoch": 2.6255524719909547,
      "grad_norm": 0.23554597795009613,
      "learning_rate": 3.6220353911854748e-06,
      "loss": 0.3759,
      "step": 12772
    },
    {
      "epoch": 2.6257580429643337,
      "grad_norm": 0.12295962870121002,
      "learning_rate": 3.6181104266122206e-06,
      "loss": 0.4549,
      "step": 12773
    },
    {
      "epoch": 2.625963613937712,
      "grad_norm": 0.22669798135757446,
      "learning_rate": 3.6141875007312465e-06,
      "loss": 0.3875,
      "step": 12774
    },
    {
      "epoch": 2.6261691849110904,
      "grad_norm": 0.12479596585035324,
      "learning_rate": 3.610266613735818e-06,
      "loss": 0.4492,
      "step": 12775
    },
    {
      "epoch": 2.626374755884469,
      "grad_norm": 0.1302644908428192,
      "learning_rate": 3.6063477658191e-06,
      "loss": 0.4415,
      "step": 12776
    },
    {
      "epoch": 2.6265803268578476,
      "grad_norm": 0.12051938474178314,
      "learning_rate": 3.6024309571741533e-06,
      "loss": 0.4435,
      "step": 12777
    },
    {
      "epoch": 2.626785897831226,
      "grad_norm": 0.2350439727306366,
      "learning_rate": 3.5985161879939338e-06,
      "loss": 0.3832,
      "step": 12778
    },
    {
      "epoch": 2.6269914688046048,
      "grad_norm": 0.12057623267173767,
      "learning_rate": 3.5946034584713225e-06,
      "loss": 0.454,
      "step": 12779
    },
    {
      "epoch": 2.6271970397779834,
      "grad_norm": 0.22473283112049103,
      "learning_rate": 3.5906927687990644e-06,
      "loss": 0.38,
      "step": 12780
    },
    {
      "epoch": 2.627402610751362,
      "grad_norm": 0.2340707778930664,
      "learning_rate": 3.586784119169831e-06,
      "loss": 0.3914,
      "step": 12781
    },
    {
      "epoch": 2.6276081817247405,
      "grad_norm": 0.11768918484449387,
      "learning_rate": 3.582877509776178e-06,
      "loss": 0.4492,
      "step": 12782
    },
    {
      "epoch": 2.627813752698119,
      "grad_norm": 0.2253478765487671,
      "learning_rate": 3.5789729408105665e-06,
      "loss": 0.3697,
      "step": 12783
    },
    {
      "epoch": 2.6280193236714977,
      "grad_norm": 0.22701376676559448,
      "learning_rate": 3.575070412465353e-06,
      "loss": 0.3721,
      "step": 12784
    },
    {
      "epoch": 2.6282248946448763,
      "grad_norm": 0.1314348578453064,
      "learning_rate": 3.571169924932803e-06,
      "loss": 0.4403,
      "step": 12785
    },
    {
      "epoch": 2.628430465618255,
      "grad_norm": 0.22781673073768616,
      "learning_rate": 3.567271478405078e-06,
      "loss": 0.3857,
      "step": 12786
    },
    {
      "epoch": 2.628636036591633,
      "grad_norm": 0.2242654412984848,
      "learning_rate": 3.56337507307422e-06,
      "loss": 0.383,
      "step": 12787
    },
    {
      "epoch": 2.628841607565012,
      "grad_norm": 0.23189879953861237,
      "learning_rate": 3.5594807091322047e-06,
      "loss": 0.3873,
      "step": 12788
    },
    {
      "epoch": 2.62904717853839,
      "grad_norm": 0.23214222490787506,
      "learning_rate": 3.555588386770884e-06,
      "loss": 0.4035,
      "step": 12789
    },
    {
      "epoch": 2.629252749511769,
      "grad_norm": 0.23310469090938568,
      "learning_rate": 3.551698106182014e-06,
      "loss": 0.3994,
      "step": 12790
    },
    {
      "epoch": 2.6294583204851474,
      "grad_norm": 0.2216804176568985,
      "learning_rate": 3.5478098675572474e-06,
      "loss": 0.3785,
      "step": 12791
    },
    {
      "epoch": 2.629663891458526,
      "grad_norm": 0.23494820296764374,
      "learning_rate": 3.543923671088135e-06,
      "loss": 0.3643,
      "step": 12792
    },
    {
      "epoch": 2.6298694624319046,
      "grad_norm": 0.23349052667617798,
      "learning_rate": 3.540039516966144e-06,
      "loss": 0.4023,
      "step": 12793
    },
    {
      "epoch": 2.630075033405283,
      "grad_norm": 0.12207508087158203,
      "learning_rate": 3.536157405382627e-06,
      "loss": 0.4413,
      "step": 12794
    },
    {
      "epoch": 2.6302806043786617,
      "grad_norm": 0.22845908999443054,
      "learning_rate": 3.5322773365288298e-06,
      "loss": 0.3583,
      "step": 12795
    },
    {
      "epoch": 2.6304861753520403,
      "grad_norm": 0.24039143323898315,
      "learning_rate": 3.5283993105959103e-06,
      "loss": 0.3928,
      "step": 12796
    },
    {
      "epoch": 2.630691746325419,
      "grad_norm": 0.23867449164390564,
      "learning_rate": 3.524523327774915e-06,
      "loss": 0.3646,
      "step": 12797
    },
    {
      "epoch": 2.6308973172987975,
      "grad_norm": 0.12439849972724915,
      "learning_rate": 3.520649388256802e-06,
      "loss": 0.4475,
      "step": 12798
    },
    {
      "epoch": 2.631102888272176,
      "grad_norm": 0.23779630661010742,
      "learning_rate": 3.516777492232413e-06,
      "loss": 0.371,
      "step": 12799
    },
    {
      "epoch": 2.6313084592455547,
      "grad_norm": 0.2336895763874054,
      "learning_rate": 3.512907639892511e-06,
      "loss": 0.3766,
      "step": 12800
    },
    {
      "epoch": 2.6315140302189333,
      "grad_norm": 0.23059743642807007,
      "learning_rate": 3.5090398314277427e-06,
      "loss": 0.4003,
      "step": 12801
    },
    {
      "epoch": 2.6317196011923114,
      "grad_norm": 0.2365521341562271,
      "learning_rate": 3.5051740670286466e-06,
      "loss": 0.396,
      "step": 12802
    },
    {
      "epoch": 2.6319251721656904,
      "grad_norm": 0.2317541390657425,
      "learning_rate": 3.5013103468856846e-06,
      "loss": 0.3752,
      "step": 12803
    },
    {
      "epoch": 2.6321307431390686,
      "grad_norm": 0.23596826195716858,
      "learning_rate": 3.4974486711891948e-06,
      "loss": 0.3894,
      "step": 12804
    },
    {
      "epoch": 2.632336314112447,
      "grad_norm": 0.22818133234977722,
      "learning_rate": 3.49358904012942e-06,
      "loss": 0.3867,
      "step": 12805
    },
    {
      "epoch": 2.6325418850858258,
      "grad_norm": 0.23365046083927155,
      "learning_rate": 3.4897314538965178e-06,
      "loss": 0.3997,
      "step": 12806
    },
    {
      "epoch": 2.6327474560592043,
      "grad_norm": 0.21868395805358887,
      "learning_rate": 3.4858759126805315e-06,
      "loss": 0.3677,
      "step": 12807
    },
    {
      "epoch": 2.632953027032583,
      "grad_norm": 0.23094679415225983,
      "learning_rate": 3.4820224166713938e-06,
      "loss": 0.407,
      "step": 12808
    },
    {
      "epoch": 2.6331585980059615,
      "grad_norm": 0.2458486258983612,
      "learning_rate": 3.4781709660589636e-06,
      "loss": 0.3881,
      "step": 12809
    },
    {
      "epoch": 2.63336416897934,
      "grad_norm": 0.23680594563484192,
      "learning_rate": 3.4743215610329785e-06,
      "loss": 0.3726,
      "step": 12810
    },
    {
      "epoch": 2.6335697399527187,
      "grad_norm": 0.22720733284950256,
      "learning_rate": 3.4704742017830815e-06,
      "loss": 0.3768,
      "step": 12811
    },
    {
      "epoch": 2.6337753109260973,
      "grad_norm": 0.1218261644244194,
      "learning_rate": 3.466628888498807e-06,
      "loss": 0.4454,
      "step": 12812
    },
    {
      "epoch": 2.633980881899476,
      "grad_norm": 0.2283984124660492,
      "learning_rate": 3.4627856213695977e-06,
      "loss": 0.3878,
      "step": 12813
    },
    {
      "epoch": 2.6341864528728545,
      "grad_norm": 0.22897951304912567,
      "learning_rate": 3.4589444005848023e-06,
      "loss": 0.3816,
      "step": 12814
    },
    {
      "epoch": 2.634392023846233,
      "grad_norm": 0.12013474106788635,
      "learning_rate": 3.455105226333654e-06,
      "loss": 0.4596,
      "step": 12815
    },
    {
      "epoch": 2.6345975948196116,
      "grad_norm": 0.22502024471759796,
      "learning_rate": 3.4512680988052878e-06,
      "loss": 0.371,
      "step": 12816
    },
    {
      "epoch": 2.63480316579299,
      "grad_norm": 0.23252278566360474,
      "learning_rate": 3.447433018188751e-06,
      "loss": 0.3728,
      "step": 12817
    },
    {
      "epoch": 2.635008736766369,
      "grad_norm": 0.23052488267421722,
      "learning_rate": 3.4435999846729684e-06,
      "loss": 0.395,
      "step": 12818
    },
    {
      "epoch": 2.635214307739747,
      "grad_norm": 0.22712182998657227,
      "learning_rate": 3.4397689984467786e-06,
      "loss": 0.3595,
      "step": 12819
    },
    {
      "epoch": 2.6354198787131256,
      "grad_norm": 0.11764495819807053,
      "learning_rate": 3.4359400596989154e-06,
      "loss": 0.4578,
      "step": 12820
    },
    {
      "epoch": 2.635625449686504,
      "grad_norm": 0.23514217138290405,
      "learning_rate": 3.4321131686180186e-06,
      "loss": 0.3898,
      "step": 12821
    },
    {
      "epoch": 2.6358310206598827,
      "grad_norm": 0.11776993423700333,
      "learning_rate": 3.428288325392622e-06,
      "loss": 0.444,
      "step": 12822
    },
    {
      "epoch": 2.6360365916332613,
      "grad_norm": 0.21985335648059845,
      "learning_rate": 3.4244655302111493e-06,
      "loss": 0.3853,
      "step": 12823
    },
    {
      "epoch": 2.63624216260664,
      "grad_norm": 0.23297996819019318,
      "learning_rate": 3.420644783261941e-06,
      "loss": 0.3617,
      "step": 12824
    },
    {
      "epoch": 2.6364477335800185,
      "grad_norm": 0.23519185185432434,
      "learning_rate": 3.4168260847332207e-06,
      "loss": 0.3854,
      "step": 12825
    },
    {
      "epoch": 2.636653304553397,
      "grad_norm": 0.23188424110412598,
      "learning_rate": 3.413009434813113e-06,
      "loss": 0.378,
      "step": 12826
    },
    {
      "epoch": 2.6368588755267757,
      "grad_norm": 0.12326161563396454,
      "learning_rate": 3.409194833689663e-06,
      "loss": 0.4533,
      "step": 12827
    },
    {
      "epoch": 2.6370644465001543,
      "grad_norm": 0.230136439204216,
      "learning_rate": 3.405382281550785e-06,
      "loss": 0.3748,
      "step": 12828
    },
    {
      "epoch": 2.637270017473533,
      "grad_norm": 0.23117460310459137,
      "learning_rate": 3.4015717785843033e-06,
      "loss": 0.4093,
      "step": 12829
    },
    {
      "epoch": 2.6374755884469114,
      "grad_norm": 0.12335003167390823,
      "learning_rate": 3.3977633249779582e-06,
      "loss": 0.4496,
      "step": 12830
    },
    {
      "epoch": 2.63768115942029,
      "grad_norm": 0.22897659242153168,
      "learning_rate": 3.393956920919365e-06,
      "loss": 0.3755,
      "step": 12831
    },
    {
      "epoch": 2.637886730393668,
      "grad_norm": 0.24940082430839539,
      "learning_rate": 3.390152566596048e-06,
      "loss": 0.3918,
      "step": 12832
    },
    {
      "epoch": 2.638092301367047,
      "grad_norm": 0.22477301955223083,
      "learning_rate": 3.386350262195428e-06,
      "loss": 0.3838,
      "step": 12833
    },
    {
      "epoch": 2.6382978723404253,
      "grad_norm": 0.2210949808359146,
      "learning_rate": 3.3825500079048244e-06,
      "loss": 0.3877,
      "step": 12834
    },
    {
      "epoch": 2.638503443313804,
      "grad_norm": 0.23475997149944305,
      "learning_rate": 3.378751803911468e-06,
      "loss": 0.379,
      "step": 12835
    },
    {
      "epoch": 2.6387090142871825,
      "grad_norm": 0.23455579578876495,
      "learning_rate": 3.3749556504024738e-06,
      "loss": 0.3899,
      "step": 12836
    },
    {
      "epoch": 2.638914585260561,
      "grad_norm": 0.2290680706501007,
      "learning_rate": 3.3711615475648574e-06,
      "loss": 0.3715,
      "step": 12837
    },
    {
      "epoch": 2.6391201562339397,
      "grad_norm": 0.23036018013954163,
      "learning_rate": 3.367369495585544e-06,
      "loss": 0.3757,
      "step": 12838
    },
    {
      "epoch": 2.6393257272073183,
      "grad_norm": 0.2289496660232544,
      "learning_rate": 3.3635794946513393e-06,
      "loss": 0.3768,
      "step": 12839
    },
    {
      "epoch": 2.639531298180697,
      "grad_norm": 0.23706848919391632,
      "learning_rate": 3.3597915449489694e-06,
      "loss": 0.3983,
      "step": 12840
    },
    {
      "epoch": 2.6397368691540755,
      "grad_norm": 0.22767172753810883,
      "learning_rate": 3.356005646665034e-06,
      "loss": 0.4012,
      "step": 12841
    },
    {
      "epoch": 2.639942440127454,
      "grad_norm": 0.2272636741399765,
      "learning_rate": 3.352221799986065e-06,
      "loss": 0.3776,
      "step": 12842
    },
    {
      "epoch": 2.6401480111008326,
      "grad_norm": 0.2348739355802536,
      "learning_rate": 3.3484400050984677e-06,
      "loss": 0.3836,
      "step": 12843
    },
    {
      "epoch": 2.6403535820742112,
      "grad_norm": 0.23279324173927307,
      "learning_rate": 3.3446602621885533e-06,
      "loss": 0.38,
      "step": 12844
    },
    {
      "epoch": 2.64055915304759,
      "grad_norm": 0.23363368213176727,
      "learning_rate": 3.3408825714425273e-06,
      "loss": 0.3875,
      "step": 12845
    },
    {
      "epoch": 2.6407647240209684,
      "grad_norm": 0.2381156086921692,
      "learning_rate": 3.3371069330465066e-06,
      "loss": 0.3899,
      "step": 12846
    },
    {
      "epoch": 2.6409702949943465,
      "grad_norm": 0.22464582324028015,
      "learning_rate": 3.333333347186487e-06,
      "loss": 0.3757,
      "step": 12847
    },
    {
      "epoch": 2.6411758659677256,
      "grad_norm": 0.22474953532218933,
      "learning_rate": 3.3295618140483898e-06,
      "loss": 0.3826,
      "step": 12848
    },
    {
      "epoch": 2.6413814369411037,
      "grad_norm": 0.23333978652954102,
      "learning_rate": 3.3257923338180166e-06,
      "loss": 0.3958,
      "step": 12849
    },
    {
      "epoch": 2.6415870079144823,
      "grad_norm": 0.22718356549739838,
      "learning_rate": 3.3220249066810683e-06,
      "loss": 0.3781,
      "step": 12850
    },
    {
      "epoch": 2.641792578887861,
      "grad_norm": 0.2350386381149292,
      "learning_rate": 3.318259532823147e-06,
      "loss": 0.382,
      "step": 12851
    },
    {
      "epoch": 2.6419981498612395,
      "grad_norm": 0.23376323282718658,
      "learning_rate": 3.314496212429764e-06,
      "loss": 0.3888,
      "step": 12852
    },
    {
      "epoch": 2.642203720834618,
      "grad_norm": 0.12379579991102219,
      "learning_rate": 3.3107349456863164e-06,
      "loss": 0.4619,
      "step": 12853
    },
    {
      "epoch": 2.6424092918079967,
      "grad_norm": 0.2283748835325241,
      "learning_rate": 3.3069757327780903e-06,
      "loss": 0.3903,
      "step": 12854
    },
    {
      "epoch": 2.6426148627813753,
      "grad_norm": 0.12083147466182709,
      "learning_rate": 3.303218573890308e-06,
      "loss": 0.4596,
      "step": 12855
    },
    {
      "epoch": 2.642820433754754,
      "grad_norm": 0.12731818854808807,
      "learning_rate": 3.2994634692080566e-06,
      "loss": 0.4275,
      "step": 12856
    },
    {
      "epoch": 2.6430260047281324,
      "grad_norm": 0.22971482574939728,
      "learning_rate": 3.295710418916333e-06,
      "loss": 0.3684,
      "step": 12857
    },
    {
      "epoch": 2.643231575701511,
      "grad_norm": 0.23392242193222046,
      "learning_rate": 3.291959423200029e-06,
      "loss": 0.3943,
      "step": 12858
    },
    {
      "epoch": 2.6434371466748896,
      "grad_norm": 0.23680520057678223,
      "learning_rate": 3.288210482243942e-06,
      "loss": 0.3717,
      "step": 12859
    },
    {
      "epoch": 2.643642717648268,
      "grad_norm": 0.2301972508430481,
      "learning_rate": 3.284463596232769e-06,
      "loss": 0.3778,
      "step": 12860
    },
    {
      "epoch": 2.643848288621647,
      "grad_norm": 0.2333422154188156,
      "learning_rate": 3.280718765351083e-06,
      "loss": 0.3792,
      "step": 12861
    },
    {
      "epoch": 2.644053859595025,
      "grad_norm": 0.12037578970193863,
      "learning_rate": 3.2769759897834006e-06,
      "loss": 0.447,
      "step": 12862
    },
    {
      "epoch": 2.644259430568404,
      "grad_norm": 0.23077453672885895,
      "learning_rate": 3.273235269714095e-06,
      "loss": 0.3731,
      "step": 12863
    },
    {
      "epoch": 2.644465001541782,
      "grad_norm": 0.23764555156230927,
      "learning_rate": 3.2694966053274583e-06,
      "loss": 0.3879,
      "step": 12864
    },
    {
      "epoch": 2.644670572515161,
      "grad_norm": 0.23211322724819183,
      "learning_rate": 3.2657599968076737e-06,
      "loss": 0.3737,
      "step": 12865
    },
    {
      "epoch": 2.6448761434885393,
      "grad_norm": 0.24329562485218048,
      "learning_rate": 3.2620254443388283e-06,
      "loss": 0.3848,
      "step": 12866
    },
    {
      "epoch": 2.645081714461918,
      "grad_norm": 0.12165253609418869,
      "learning_rate": 3.25829294810491e-06,
      "loss": 0.4326,
      "step": 12867
    },
    {
      "epoch": 2.6452872854352965,
      "grad_norm": 0.1259094923734665,
      "learning_rate": 3.2545625082897874e-06,
      "loss": 0.4411,
      "step": 12868
    },
    {
      "epoch": 2.645492856408675,
      "grad_norm": 0.9432693123817444,
      "learning_rate": 3.250834125077263e-06,
      "loss": 0.4054,
      "step": 12869
    },
    {
      "epoch": 2.6456984273820536,
      "grad_norm": 0.23042386770248413,
      "learning_rate": 3.2471077986510045e-06,
      "loss": 0.3848,
      "step": 12870
    },
    {
      "epoch": 2.645903998355432,
      "grad_norm": 0.24361389875411987,
      "learning_rate": 3.243383529194591e-06,
      "loss": 0.3802,
      "step": 12871
    },
    {
      "epoch": 2.646109569328811,
      "grad_norm": 0.2344541847705841,
      "learning_rate": 3.2396613168914945e-06,
      "loss": 0.382,
      "step": 12872
    },
    {
      "epoch": 2.6463151403021894,
      "grad_norm": 0.2395932823419571,
      "learning_rate": 3.2359411619251094e-06,
      "loss": 0.3851,
      "step": 12873
    },
    {
      "epoch": 2.646520711275568,
      "grad_norm": 0.23469178378582,
      "learning_rate": 3.232223064478694e-06,
      "loss": 0.3817,
      "step": 12874
    },
    {
      "epoch": 2.6467262822489466,
      "grad_norm": 0.12158174812793732,
      "learning_rate": 3.228507024735416e-06,
      "loss": 0.4588,
      "step": 12875
    },
    {
      "epoch": 2.646931853222325,
      "grad_norm": 0.2328415811061859,
      "learning_rate": 3.2247930428783698e-06,
      "loss": 0.3656,
      "step": 12876
    },
    {
      "epoch": 2.6471374241957033,
      "grad_norm": 0.2348622977733612,
      "learning_rate": 3.2210811190905133e-06,
      "loss": 0.3897,
      "step": 12877
    },
    {
      "epoch": 2.6473429951690823,
      "grad_norm": 0.2322535365819931,
      "learning_rate": 3.2173712535547156e-06,
      "loss": 0.3972,
      "step": 12878
    },
    {
      "epoch": 2.6475485661424605,
      "grad_norm": 0.2271070033311844,
      "learning_rate": 3.2136634464537407e-06,
      "loss": 0.3874,
      "step": 12879
    },
    {
      "epoch": 2.6477541371158395,
      "grad_norm": 0.22299452126026154,
      "learning_rate": 3.209957697970262e-06,
      "loss": 0.3853,
      "step": 12880
    },
    {
      "epoch": 2.6479597080892177,
      "grad_norm": 0.22766993939876556,
      "learning_rate": 3.206254008286844e-06,
      "loss": 0.3715,
      "step": 12881
    },
    {
      "epoch": 2.6481652790625962,
      "grad_norm": 0.23258061707019806,
      "learning_rate": 3.202552377585936e-06,
      "loss": 0.3779,
      "step": 12882
    },
    {
      "epoch": 2.648370850035975,
      "grad_norm": 0.2237747460603714,
      "learning_rate": 3.198852806049921e-06,
      "loss": 0.3782,
      "step": 12883
    },
    {
      "epoch": 2.6485764210093534,
      "grad_norm": 0.2268817126750946,
      "learning_rate": 3.1951552938610486e-06,
      "loss": 0.3831,
      "step": 12884
    },
    {
      "epoch": 2.648781991982732,
      "grad_norm": 0.22952783107757568,
      "learning_rate": 3.1914598412014784e-06,
      "loss": 0.387,
      "step": 12885
    },
    {
      "epoch": 2.6489875629561106,
      "grad_norm": 0.11640308797359467,
      "learning_rate": 3.1877664482532748e-06,
      "loss": 0.4273,
      "step": 12886
    },
    {
      "epoch": 2.649193133929489,
      "grad_norm": 0.23823915421962738,
      "learning_rate": 3.184075115198382e-06,
      "loss": 0.36,
      "step": 12887
    },
    {
      "epoch": 2.6493987049028678,
      "grad_norm": 0.2281506359577179,
      "learning_rate": 3.180385842218665e-06,
      "loss": 0.3906,
      "step": 12888
    },
    {
      "epoch": 2.6496042758762464,
      "grad_norm": 0.22782327234745026,
      "learning_rate": 3.176698629495868e-06,
      "loss": 0.3764,
      "step": 12889
    },
    {
      "epoch": 2.649809846849625,
      "grad_norm": 0.23879340291023254,
      "learning_rate": 3.1730134772116507e-06,
      "loss": 0.3965,
      "step": 12890
    },
    {
      "epoch": 2.6500154178230035,
      "grad_norm": 0.22930049896240234,
      "learning_rate": 3.1693303855475626e-06,
      "loss": 0.3817,
      "step": 12891
    },
    {
      "epoch": 2.6502209887963817,
      "grad_norm": 0.12056277692317963,
      "learning_rate": 3.1656493546850492e-06,
      "loss": 0.4472,
      "step": 12892
    },
    {
      "epoch": 2.6504265597697607,
      "grad_norm": 0.22143647074699402,
      "learning_rate": 3.16197038480545e-06,
      "loss": 0.3693,
      "step": 12893
    },
    {
      "epoch": 2.650632130743139,
      "grad_norm": 0.2295861691236496,
      "learning_rate": 3.1582934760900302e-06,
      "loss": 0.3659,
      "step": 12894
    },
    {
      "epoch": 2.650837701716518,
      "grad_norm": 0.2308284193277359,
      "learning_rate": 3.1546186287199196e-06,
      "loss": 0.3898,
      "step": 12895
    },
    {
      "epoch": 2.651043272689896,
      "grad_norm": 0.12249313294887543,
      "learning_rate": 3.1509458428761593e-06,
      "loss": 0.4343,
      "step": 12896
    },
    {
      "epoch": 2.6512488436632746,
      "grad_norm": 0.2282872498035431,
      "learning_rate": 3.1472751187397034e-06,
      "loss": 0.3752,
      "step": 12897
    },
    {
      "epoch": 2.651454414636653,
      "grad_norm": 0.23152220249176025,
      "learning_rate": 3.1436064564913824e-06,
      "loss": 0.3756,
      "step": 12898
    },
    {
      "epoch": 2.651659985610032,
      "grad_norm": 0.2310069352388382,
      "learning_rate": 3.1399398563119376e-06,
      "loss": 0.3788,
      "step": 12899
    },
    {
      "epoch": 2.6518655565834104,
      "grad_norm": 0.24678552150726318,
      "learning_rate": 3.1362753183819987e-06,
      "loss": 0.4028,
      "step": 12900
    },
    {
      "epoch": 2.652071127556789,
      "grad_norm": 0.2333284318447113,
      "learning_rate": 3.1326128428821065e-06,
      "loss": 0.3579,
      "step": 12901
    },
    {
      "epoch": 2.6522766985301676,
      "grad_norm": 0.22593623399734497,
      "learning_rate": 3.128952429992692e-06,
      "loss": 0.3587,
      "step": 12902
    },
    {
      "epoch": 2.652482269503546,
      "grad_norm": 0.22910958528518677,
      "learning_rate": 3.1252940798940757e-06,
      "loss": 0.3806,
      "step": 12903
    },
    {
      "epoch": 2.6526878404769247,
      "grad_norm": 0.2289619743824005,
      "learning_rate": 3.1216377927665083e-06,
      "loss": 0.3707,
      "step": 12904
    },
    {
      "epoch": 2.6528934114503033,
      "grad_norm": 0.2235552966594696,
      "learning_rate": 3.1179835687901104e-06,
      "loss": 0.3581,
      "step": 12905
    },
    {
      "epoch": 2.653098982423682,
      "grad_norm": 0.23109345138072968,
      "learning_rate": 3.1143314081449036e-06,
      "loss": 0.3845,
      "step": 12906
    },
    {
      "epoch": 2.65330455339706,
      "grad_norm": 0.22421690821647644,
      "learning_rate": 3.1106813110108143e-06,
      "loss": 0.349,
      "step": 12907
    },
    {
      "epoch": 2.653510124370439,
      "grad_norm": 0.23160065710544586,
      "learning_rate": 3.1070332775676675e-06,
      "loss": 0.3736,
      "step": 12908
    },
    {
      "epoch": 2.6537156953438172,
      "grad_norm": 0.24303646385669708,
      "learning_rate": 3.1033873079951803e-06,
      "loss": 0.3866,
      "step": 12909
    },
    {
      "epoch": 2.6539212663171963,
      "grad_norm": 0.23496584594249725,
      "learning_rate": 3.0997434024729737e-06,
      "loss": 0.3996,
      "step": 12910
    },
    {
      "epoch": 2.6541268372905744,
      "grad_norm": 0.23909246921539307,
      "learning_rate": 3.0961015611805742e-06,
      "loss": 0.3741,
      "step": 12911
    },
    {
      "epoch": 2.654332408263953,
      "grad_norm": 0.24438230693340302,
      "learning_rate": 3.0924617842973936e-06,
      "loss": 0.3972,
      "step": 12912
    },
    {
      "epoch": 2.6545379792373316,
      "grad_norm": 0.23331284523010254,
      "learning_rate": 3.0888240720027427e-06,
      "loss": 0.3722,
      "step": 12913
    },
    {
      "epoch": 2.65474355021071,
      "grad_norm": 0.22317220270633698,
      "learning_rate": 3.085188424475834e-06,
      "loss": 0.3871,
      "step": 12914
    },
    {
      "epoch": 2.6549491211840888,
      "grad_norm": 0.2298220545053482,
      "learning_rate": 3.0815548418957884e-06,
      "loss": 0.3819,
      "step": 12915
    },
    {
      "epoch": 2.6551546921574674,
      "grad_norm": 0.22008730471134186,
      "learning_rate": 3.0779233244416084e-06,
      "loss": 0.374,
      "step": 12916
    },
    {
      "epoch": 2.655360263130846,
      "grad_norm": 0.22563999891281128,
      "learning_rate": 3.0742938722921956e-06,
      "loss": 0.3705,
      "step": 12917
    },
    {
      "epoch": 2.6555658341042245,
      "grad_norm": 0.231903076171875,
      "learning_rate": 3.070666485626367e-06,
      "loss": 0.389,
      "step": 12918
    },
    {
      "epoch": 2.655771405077603,
      "grad_norm": 0.2327851802110672,
      "learning_rate": 3.067041164622829e-06,
      "loss": 0.3835,
      "step": 12919
    },
    {
      "epoch": 2.6559769760509817,
      "grad_norm": 0.225325807929039,
      "learning_rate": 3.063417909460175e-06,
      "loss": 0.3936,
      "step": 12920
    },
    {
      "epoch": 2.6561825470243603,
      "grad_norm": 0.12957926094532013,
      "learning_rate": 3.0597967203169113e-06,
      "loss": 0.4463,
      "step": 12921
    },
    {
      "epoch": 2.656388117997739,
      "grad_norm": 0.22962552309036255,
      "learning_rate": 3.056177597371436e-06,
      "loss": 0.3842,
      "step": 12922
    },
    {
      "epoch": 2.6565936889711175,
      "grad_norm": 0.22653664648532867,
      "learning_rate": 3.0525605408020405e-06,
      "loss": 0.3896,
      "step": 12923
    },
    {
      "epoch": 2.6567992599444956,
      "grad_norm": 0.23864829540252686,
      "learning_rate": 3.0489455507869275e-06,
      "loss": 0.3847,
      "step": 12924
    },
    {
      "epoch": 2.6570048309178746,
      "grad_norm": 0.22004222869873047,
      "learning_rate": 3.0453326275041898e-06,
      "loss": 0.3739,
      "step": 12925
    },
    {
      "epoch": 2.657210401891253,
      "grad_norm": 0.22327813506126404,
      "learning_rate": 3.0417217711318203e-06,
      "loss": 0.3769,
      "step": 12926
    },
    {
      "epoch": 2.6574159728646314,
      "grad_norm": 0.2343619167804718,
      "learning_rate": 3.038112981847706e-06,
      "loss": 0.395,
      "step": 12927
    },
    {
      "epoch": 2.65762154383801,
      "grad_norm": 0.2278946340084076,
      "learning_rate": 3.034506259829635e-06,
      "loss": 0.3972,
      "step": 12928
    },
    {
      "epoch": 2.6578271148113886,
      "grad_norm": 0.23201905190944672,
      "learning_rate": 3.030901605255296e-06,
      "loss": 0.3898,
      "step": 12929
    },
    {
      "epoch": 2.658032685784767,
      "grad_norm": 0.2293037325143814,
      "learning_rate": 3.0272990183022606e-06,
      "loss": 0.3821,
      "step": 12930
    },
    {
      "epoch": 2.6582382567581457,
      "grad_norm": 0.12265797704458237,
      "learning_rate": 3.0236984991480323e-06,
      "loss": 0.447,
      "step": 12931
    },
    {
      "epoch": 2.6584438277315243,
      "grad_norm": 0.2273169308900833,
      "learning_rate": 3.0201000479699793e-06,
      "loss": 0.373,
      "step": 12932
    },
    {
      "epoch": 2.658649398704903,
      "grad_norm": 0.12377490103244781,
      "learning_rate": 3.01650366494539e-06,
      "loss": 0.458,
      "step": 12933
    },
    {
      "epoch": 2.6588549696782815,
      "grad_norm": 0.2221972793340683,
      "learning_rate": 3.012909350251427e-06,
      "loss": 0.3638,
      "step": 12934
    },
    {
      "epoch": 2.65906054065166,
      "grad_norm": 0.22611477971076965,
      "learning_rate": 3.0093171040651795e-06,
      "loss": 0.3919,
      "step": 12935
    },
    {
      "epoch": 2.6592661116250387,
      "grad_norm": 0.22873830795288086,
      "learning_rate": 3.005726926563606e-06,
      "loss": 0.3743,
      "step": 12936
    },
    {
      "epoch": 2.6594716825984173,
      "grad_norm": 0.24191910028457642,
      "learning_rate": 3.0021388179235887e-06,
      "loss": 0.3736,
      "step": 12937
    },
    {
      "epoch": 2.659677253571796,
      "grad_norm": 0.2301923930644989,
      "learning_rate": 2.9985527783218924e-06,
      "loss": 0.3863,
      "step": 12938
    },
    {
      "epoch": 2.659882824545174,
      "grad_norm": 0.24405382573604584,
      "learning_rate": 2.9949688079351906e-06,
      "loss": 0.3997,
      "step": 12939
    },
    {
      "epoch": 2.660088395518553,
      "grad_norm": 0.23321811854839325,
      "learning_rate": 2.991386906940047e-06,
      "loss": 0.3724,
      "step": 12940
    },
    {
      "epoch": 2.660293966491931,
      "grad_norm": 0.1216077208518982,
      "learning_rate": 2.98780707551292e-06,
      "loss": 0.438,
      "step": 12941
    },
    {
      "epoch": 2.6604995374653098,
      "grad_norm": 0.23895247280597687,
      "learning_rate": 2.984229313830179e-06,
      "loss": 0.3645,
      "step": 12942
    },
    {
      "epoch": 2.6607051084386883,
      "grad_norm": 0.2277345210313797,
      "learning_rate": 2.9806536220680733e-06,
      "loss": 0.3865,
      "step": 12943
    },
    {
      "epoch": 2.660910679412067,
      "grad_norm": 0.22947533428668976,
      "learning_rate": 2.977080000402761e-06,
      "loss": 0.3807,
      "step": 12944
    },
    {
      "epoch": 2.6611162503854455,
      "grad_norm": 0.22254477441310883,
      "learning_rate": 2.973508449010307e-06,
      "loss": 0.3799,
      "step": 12945
    },
    {
      "epoch": 2.661321821358824,
      "grad_norm": 0.23514899611473083,
      "learning_rate": 2.9699389680666607e-06,
      "loss": 0.3769,
      "step": 12946
    },
    {
      "epoch": 2.6615273923322027,
      "grad_norm": 0.2325250208377838,
      "learning_rate": 2.9663715577476757e-06,
      "loss": 0.3932,
      "step": 12947
    },
    {
      "epoch": 2.6617329633055813,
      "grad_norm": 0.23977595567703247,
      "learning_rate": 2.962806218229097e-06,
      "loss": 0.3916,
      "step": 12948
    },
    {
      "epoch": 2.66193853427896,
      "grad_norm": 0.23064671456813812,
      "learning_rate": 2.9592429496865793e-06,
      "loss": 0.3747,
      "step": 12949
    },
    {
      "epoch": 2.6621441052523385,
      "grad_norm": 0.22543418407440186,
      "learning_rate": 2.9556817522956613e-06,
      "loss": 0.3767,
      "step": 12950
    },
    {
      "epoch": 2.662349676225717,
      "grad_norm": 0.23423805832862854,
      "learning_rate": 2.9521226262317785e-06,
      "loss": 0.3838,
      "step": 12951
    },
    {
      "epoch": 2.6625552471990956,
      "grad_norm": 0.22551970183849335,
      "learning_rate": 2.9485655716702904e-06,
      "loss": 0.3817,
      "step": 12952
    },
    {
      "epoch": 2.6627608181724742,
      "grad_norm": 0.2365717738866806,
      "learning_rate": 2.9450105887864316e-06,
      "loss": 0.3874,
      "step": 12953
    },
    {
      "epoch": 2.6629663891458524,
      "grad_norm": 0.24153275787830353,
      "learning_rate": 2.941457677755337e-06,
      "loss": 0.3949,
      "step": 12954
    },
    {
      "epoch": 2.6631719601192314,
      "grad_norm": 0.23784461617469788,
      "learning_rate": 2.937906838752037e-06,
      "loss": 0.3925,
      "step": 12955
    },
    {
      "epoch": 2.6633775310926096,
      "grad_norm": 0.23372387886047363,
      "learning_rate": 2.934358071951471e-06,
      "loss": 0.4013,
      "step": 12956
    },
    {
      "epoch": 2.663583102065988,
      "grad_norm": 0.24772094190120697,
      "learning_rate": 2.930811377528465e-06,
      "loss": 0.3938,
      "step": 12957
    },
    {
      "epoch": 2.6637886730393667,
      "grad_norm": 0.24129636585712433,
      "learning_rate": 2.927266755657754e-06,
      "loss": 0.3854,
      "step": 12958
    },
    {
      "epoch": 2.6639942440127453,
      "grad_norm": 0.11964880675077438,
      "learning_rate": 2.9237242065139626e-06,
      "loss": 0.4409,
      "step": 12959
    },
    {
      "epoch": 2.664199814986124,
      "grad_norm": 0.23363502323627472,
      "learning_rate": 2.9201837302716118e-06,
      "loss": 0.3931,
      "step": 12960
    },
    {
      "epoch": 2.6644053859595025,
      "grad_norm": 0.23559674620628357,
      "learning_rate": 2.916645327105132e-06,
      "loss": 0.3897,
      "step": 12961
    },
    {
      "epoch": 2.664610956932881,
      "grad_norm": 0.2335934042930603,
      "learning_rate": 2.913108997188844e-06,
      "loss": 0.3799,
      "step": 12962
    },
    {
      "epoch": 2.6648165279062597,
      "grad_norm": 0.23663899302482605,
      "learning_rate": 2.9095747406969577e-06,
      "loss": 0.3606,
      "step": 12963
    },
    {
      "epoch": 2.6650220988796383,
      "grad_norm": 0.22651928663253784,
      "learning_rate": 2.9060425578035995e-06,
      "loss": 0.3795,
      "step": 12964
    },
    {
      "epoch": 2.665227669853017,
      "grad_norm": 0.22793136537075043,
      "learning_rate": 2.902512448682765e-06,
      "loss": 0.3749,
      "step": 12965
    },
    {
      "epoch": 2.6654332408263954,
      "grad_norm": 0.2406536191701889,
      "learning_rate": 2.898984413508385e-06,
      "loss": 0.3877,
      "step": 12966
    },
    {
      "epoch": 2.665638811799774,
      "grad_norm": 0.24164964258670807,
      "learning_rate": 2.8954584524542707e-06,
      "loss": 0.3982,
      "step": 12967
    },
    {
      "epoch": 2.6658443827731526,
      "grad_norm": 0.2386479675769806,
      "learning_rate": 2.891934565694118e-06,
      "loss": 0.3901,
      "step": 12968
    },
    {
      "epoch": 2.6660499537465308,
      "grad_norm": 0.231131449341774,
      "learning_rate": 2.8884127534015327e-06,
      "loss": 0.3654,
      "step": 12969
    },
    {
      "epoch": 2.66625552471991,
      "grad_norm": 0.12683962285518646,
      "learning_rate": 2.8848930157500264e-06,
      "loss": 0.4251,
      "step": 12970
    },
    {
      "epoch": 2.666461095693288,
      "grad_norm": 0.23223094642162323,
      "learning_rate": 2.8813753529129956e-06,
      "loss": 0.3818,
      "step": 12971
    },
    {
      "epoch": 2.6666666666666665,
      "grad_norm": 0.22929398715496063,
      "learning_rate": 2.8778597650637312e-06,
      "loss": 0.3858,
      "step": 12972
    },
    {
      "epoch": 2.666872237640045,
      "grad_norm": 0.22222407162189484,
      "learning_rate": 2.874346252375445e-06,
      "loss": 0.3972,
      "step": 12973
    },
    {
      "epoch": 2.6670778086134237,
      "grad_norm": 0.22839607298374176,
      "learning_rate": 2.8708348150212236e-06,
      "loss": 0.3687,
      "step": 12974
    },
    {
      "epoch": 2.6672833795868023,
      "grad_norm": 0.22204485535621643,
      "learning_rate": 2.867325453174063e-06,
      "loss": 0.3772,
      "step": 12975
    },
    {
      "epoch": 2.667488950560181,
      "grad_norm": 0.12451615929603577,
      "learning_rate": 2.8638181670068452e-06,
      "loss": 0.4541,
      "step": 12976
    },
    {
      "epoch": 2.6676945215335595,
      "grad_norm": 0.24160999059677124,
      "learning_rate": 2.8603129566923676e-06,
      "loss": 0.3808,
      "step": 12977
    },
    {
      "epoch": 2.667900092506938,
      "grad_norm": 0.23395465314388275,
      "learning_rate": 2.8568098224032963e-06,
      "loss": 0.4002,
      "step": 12978
    },
    {
      "epoch": 2.6681056634803166,
      "grad_norm": 0.22567373514175415,
      "learning_rate": 2.8533087643122387e-06,
      "loss": 0.3679,
      "step": 12979
    },
    {
      "epoch": 2.6683112344536952,
      "grad_norm": 0.2304529845714569,
      "learning_rate": 2.8498097825916664e-06,
      "loss": 0.3783,
      "step": 12980
    },
    {
      "epoch": 2.668516805427074,
      "grad_norm": 0.22995389997959137,
      "learning_rate": 2.846312877413947e-06,
      "loss": 0.3743,
      "step": 12981
    },
    {
      "epoch": 2.6687223764004524,
      "grad_norm": 0.2304750382900238,
      "learning_rate": 2.842818048951377e-06,
      "loss": 0.3882,
      "step": 12982
    },
    {
      "epoch": 2.668927947373831,
      "grad_norm": 0.23300042748451233,
      "learning_rate": 2.8393252973761146e-06,
      "loss": 0.3901,
      "step": 12983
    },
    {
      "epoch": 2.669133518347209,
      "grad_norm": 0.231519877910614,
      "learning_rate": 2.8358346228602416e-06,
      "loss": 0.3797,
      "step": 12984
    },
    {
      "epoch": 2.669339089320588,
      "grad_norm": 0.22919991612434387,
      "learning_rate": 2.8323460255757206e-06,
      "loss": 0.3678,
      "step": 12985
    },
    {
      "epoch": 2.6695446602939663,
      "grad_norm": 0.2351444512605667,
      "learning_rate": 2.828859505694409e-06,
      "loss": 0.3931,
      "step": 12986
    },
    {
      "epoch": 2.669750231267345,
      "grad_norm": 0.12017477303743362,
      "learning_rate": 2.8253750633880943e-06,
      "loss": 0.4364,
      "step": 12987
    },
    {
      "epoch": 2.6699558022407235,
      "grad_norm": 0.22112242877483368,
      "learning_rate": 2.8218926988284245e-06,
      "loss": 0.3572,
      "step": 12988
    },
    {
      "epoch": 2.670161373214102,
      "grad_norm": 0.23203538358211517,
      "learning_rate": 2.8184124121869572e-06,
      "loss": 0.3769,
      "step": 12989
    },
    {
      "epoch": 2.6703669441874807,
      "grad_norm": 0.24650564789772034,
      "learning_rate": 2.81493420363516e-06,
      "loss": 0.3939,
      "step": 12990
    },
    {
      "epoch": 2.6705725151608593,
      "grad_norm": 0.22376540303230286,
      "learning_rate": 2.8114580733443815e-06,
      "loss": 0.3736,
      "step": 12991
    },
    {
      "epoch": 2.670778086134238,
      "grad_norm": 0.23529557883739471,
      "learning_rate": 2.8079840214858738e-06,
      "loss": 0.4071,
      "step": 12992
    },
    {
      "epoch": 2.6709836571076164,
      "grad_norm": 0.22694729268550873,
      "learning_rate": 2.804512048230781e-06,
      "loss": 0.366,
      "step": 12993
    },
    {
      "epoch": 2.671189228080995,
      "grad_norm": 0.11928309500217438,
      "learning_rate": 2.8010421537501653e-06,
      "loss": 0.4387,
      "step": 12994
    },
    {
      "epoch": 2.6713947990543736,
      "grad_norm": 0.22787900269031525,
      "learning_rate": 2.7975743382149655e-06,
      "loss": 0.3641,
      "step": 12995
    },
    {
      "epoch": 2.671600370027752,
      "grad_norm": 0.23789376020431519,
      "learning_rate": 2.79410860179602e-06,
      "loss": 0.3965,
      "step": 12996
    },
    {
      "epoch": 2.671805941001131,
      "grad_norm": 0.23560819029808044,
      "learning_rate": 2.790644944664082e-06,
      "loss": 0.3903,
      "step": 12997
    },
    {
      "epoch": 2.6720115119745094,
      "grad_norm": 0.1282123327255249,
      "learning_rate": 2.787183366989775e-06,
      "loss": 0.4493,
      "step": 12998
    },
    {
      "epoch": 2.6722170829478875,
      "grad_norm": 0.2356158196926117,
      "learning_rate": 2.783723868943638e-06,
      "loss": 0.3806,
      "step": 12999
    },
    {
      "epoch": 2.6724226539212665,
      "grad_norm": 0.22649535536766052,
      "learning_rate": 2.780266450696114e-06,
      "loss": 0.3694,
      "step": 13000
    },
    {
      "epoch": 2.6726282248946447,
      "grad_norm": 0.22387070953845978,
      "learning_rate": 2.7768111124175274e-06,
      "loss": 0.3648,
      "step": 13001
    },
    {
      "epoch": 2.6728337958680233,
      "grad_norm": 0.12043121457099915,
      "learning_rate": 2.7733578542780964e-06,
      "loss": 0.4574,
      "step": 13002
    },
    {
      "epoch": 2.673039366841402,
      "grad_norm": 0.22910076379776,
      "learning_rate": 2.7699066764479703e-06,
      "loss": 0.3764,
      "step": 13003
    },
    {
      "epoch": 2.6732449378147805,
      "grad_norm": 0.2299896627664566,
      "learning_rate": 2.766457579097153e-06,
      "loss": 0.3939,
      "step": 13004
    },
    {
      "epoch": 2.673450508788159,
      "grad_norm": 0.24078021943569183,
      "learning_rate": 2.763010562395579e-06,
      "loss": 0.385,
      "step": 13005
    },
    {
      "epoch": 2.6736560797615376,
      "grad_norm": 0.11954071372747421,
      "learning_rate": 2.7595656265130464e-06,
      "loss": 0.4594,
      "step": 13006
    },
    {
      "epoch": 2.673861650734916,
      "grad_norm": 0.23649781942367554,
      "learning_rate": 2.7561227716192906e-06,
      "loss": 0.3816,
      "step": 13007
    },
    {
      "epoch": 2.674067221708295,
      "grad_norm": 0.21963661909103394,
      "learning_rate": 2.75268199788392e-06,
      "loss": 0.3746,
      "step": 13008
    },
    {
      "epoch": 2.6742727926816734,
      "grad_norm": 0.2391149252653122,
      "learning_rate": 2.749243305476445e-06,
      "loss": 0.3899,
      "step": 13009
    },
    {
      "epoch": 2.674478363655052,
      "grad_norm": 0.22582948207855225,
      "learning_rate": 2.745806694566274e-06,
      "loss": 0.3797,
      "step": 13010
    },
    {
      "epoch": 2.6746839346284306,
      "grad_norm": 0.23774947226047516,
      "learning_rate": 2.7423721653227076e-06,
      "loss": 0.3978,
      "step": 13011
    },
    {
      "epoch": 2.674889505601809,
      "grad_norm": 0.2316160500049591,
      "learning_rate": 2.7389397179149596e-06,
      "loss": 0.3722,
      "step": 13012
    },
    {
      "epoch": 2.6750950765751877,
      "grad_norm": 0.22677737474441528,
      "learning_rate": 2.73550935251211e-06,
      "loss": 0.3865,
      "step": 13013
    },
    {
      "epoch": 2.675300647548566,
      "grad_norm": 0.2274550050497055,
      "learning_rate": 2.732081069283179e-06,
      "loss": 0.3732,
      "step": 13014
    },
    {
      "epoch": 2.675506218521945,
      "grad_norm": 0.23151232302188873,
      "learning_rate": 2.728654868397056e-06,
      "loss": 0.3861,
      "step": 13015
    },
    {
      "epoch": 2.675711789495323,
      "grad_norm": 0.12545832991600037,
      "learning_rate": 2.725230750022531e-06,
      "loss": 0.4512,
      "step": 13016
    },
    {
      "epoch": 2.6759173604687017,
      "grad_norm": 0.24073415994644165,
      "learning_rate": 2.7218087143282994e-06,
      "loss": 0.3836,
      "step": 13017
    },
    {
      "epoch": 2.6761229314420802,
      "grad_norm": 0.23176778852939606,
      "learning_rate": 2.7183887614829412e-06,
      "loss": 0.4068,
      "step": 13018
    },
    {
      "epoch": 2.676328502415459,
      "grad_norm": 0.23318178951740265,
      "learning_rate": 2.7149708916549418e-06,
      "loss": 0.3968,
      "step": 13019
    },
    {
      "epoch": 2.6765340733888374,
      "grad_norm": 0.24132607877254486,
      "learning_rate": 2.711555105012681e-06,
      "loss": 0.3644,
      "step": 13020
    },
    {
      "epoch": 2.676739644362216,
      "grad_norm": 0.22596125304698944,
      "learning_rate": 2.7081414017244543e-06,
      "loss": 0.3598,
      "step": 13021
    },
    {
      "epoch": 2.6769452153355946,
      "grad_norm": 0.2259039431810379,
      "learning_rate": 2.7047297819584276e-06,
      "loss": 0.3662,
      "step": 13022
    },
    {
      "epoch": 2.677150786308973,
      "grad_norm": 0.23511864244937897,
      "learning_rate": 2.7013202458826765e-06,
      "loss": 0.4058,
      "step": 13023
    },
    {
      "epoch": 2.6773563572823518,
      "grad_norm": 0.24032087624073029,
      "learning_rate": 2.697912793665171e-06,
      "loss": 0.3719,
      "step": 13024
    },
    {
      "epoch": 2.6775619282557304,
      "grad_norm": 0.23492936789989471,
      "learning_rate": 2.6945074254737823e-06,
      "loss": 0.3734,
      "step": 13025
    },
    {
      "epoch": 2.677767499229109,
      "grad_norm": 0.23162946105003357,
      "learning_rate": 2.691104141476281e-06,
      "loss": 0.3805,
      "step": 13026
    },
    {
      "epoch": 2.6779730702024875,
      "grad_norm": 0.24035188555717468,
      "learning_rate": 2.6877029418403233e-06,
      "loss": 0.3693,
      "step": 13027
    },
    {
      "epoch": 2.678178641175866,
      "grad_norm": 0.23720598220825195,
      "learning_rate": 2.6843038267334797e-06,
      "loss": 0.4006,
      "step": 13028
    },
    {
      "epoch": 2.6783842121492443,
      "grad_norm": 0.23743665218353271,
      "learning_rate": 2.6809067963232016e-06,
      "loss": 0.4038,
      "step": 13029
    },
    {
      "epoch": 2.6785897831226233,
      "grad_norm": 0.240424245595932,
      "learning_rate": 2.677511850776845e-06,
      "loss": 0.3842,
      "step": 13030
    },
    {
      "epoch": 2.6787953540960014,
      "grad_norm": 0.1235266923904419,
      "learning_rate": 2.674118990261666e-06,
      "loss": 0.4391,
      "step": 13031
    },
    {
      "epoch": 2.6790009250693805,
      "grad_norm": 0.23002861440181732,
      "learning_rate": 2.670728214944816e-06,
      "loss": 0.384,
      "step": 13032
    },
    {
      "epoch": 2.6792064960427586,
      "grad_norm": 0.22837281227111816,
      "learning_rate": 2.6673395249933415e-06,
      "loss": 0.38,
      "step": 13033
    },
    {
      "epoch": 2.679412067016137,
      "grad_norm": 0.24020573496818542,
      "learning_rate": 2.6639529205741737e-06,
      "loss": 0.3887,
      "step": 13034
    },
    {
      "epoch": 2.679617637989516,
      "grad_norm": 0.24188318848609924,
      "learning_rate": 2.6605684018541794e-06,
      "loss": 0.3972,
      "step": 13035
    },
    {
      "epoch": 2.6798232089628944,
      "grad_norm": 0.12417499721050262,
      "learning_rate": 2.657185969000085e-06,
      "loss": 0.4522,
      "step": 13036
    },
    {
      "epoch": 2.680028779936273,
      "grad_norm": 0.21679937839508057,
      "learning_rate": 2.653805622178527e-06,
      "loss": 0.3873,
      "step": 13037
    },
    {
      "epoch": 2.6802343509096516,
      "grad_norm": 0.22777822613716125,
      "learning_rate": 2.6504273615560383e-06,
      "loss": 0.3618,
      "step": 13038
    },
    {
      "epoch": 2.68043992188303,
      "grad_norm": 0.12555932998657227,
      "learning_rate": 2.6470511872990544e-06,
      "loss": 0.4445,
      "step": 13039
    },
    {
      "epoch": 2.6806454928564087,
      "grad_norm": 0.23415377736091614,
      "learning_rate": 2.643677099573903e-06,
      "loss": 0.3909,
      "step": 13040
    },
    {
      "epoch": 2.6808510638297873,
      "grad_norm": 0.22409114241600037,
      "learning_rate": 2.640305098546801e-06,
      "loss": 0.362,
      "step": 13041
    },
    {
      "epoch": 2.681056634803166,
      "grad_norm": 0.23534564673900604,
      "learning_rate": 2.6369351843838803e-06,
      "loss": 0.3977,
      "step": 13042
    },
    {
      "epoch": 2.6812622057765445,
      "grad_norm": 0.23140472173690796,
      "learning_rate": 2.633567357251163e-06,
      "loss": 0.3775,
      "step": 13043
    },
    {
      "epoch": 2.6814677767499226,
      "grad_norm": 0.23929573595523834,
      "learning_rate": 2.630201617314557e-06,
      "loss": 0.3746,
      "step": 13044
    },
    {
      "epoch": 2.6816733477233017,
      "grad_norm": 0.23926587402820587,
      "learning_rate": 2.6268379647398795e-06,
      "loss": 0.3772,
      "step": 13045
    },
    {
      "epoch": 2.68187891869668,
      "grad_norm": 0.23361510038375854,
      "learning_rate": 2.6234763996928526e-06,
      "loss": 0.3855,
      "step": 13046
    },
    {
      "epoch": 2.682084489670059,
      "grad_norm": 0.23641300201416016,
      "learning_rate": 2.620116922339069e-06,
      "loss": 0.3859,
      "step": 13047
    },
    {
      "epoch": 2.682290060643437,
      "grad_norm": 0.22642360627651215,
      "learning_rate": 2.616759532844041e-06,
      "loss": 0.375,
      "step": 13048
    },
    {
      "epoch": 2.6824956316168156,
      "grad_norm": 0.22510544955730438,
      "learning_rate": 2.6134042313731765e-06,
      "loss": 0.3614,
      "step": 13049
    },
    {
      "epoch": 2.682701202590194,
      "grad_norm": 0.23352572321891785,
      "learning_rate": 2.6100510180917686e-06,
      "loss": 0.3866,
      "step": 13050
    },
    {
      "epoch": 2.6829067735635728,
      "grad_norm": 0.2314728945493698,
      "learning_rate": 2.60669989316502e-06,
      "loss": 0.3931,
      "step": 13051
    },
    {
      "epoch": 2.6831123445369514,
      "grad_norm": 0.23167473077774048,
      "learning_rate": 2.603350856758018e-06,
      "loss": 0.3845,
      "step": 13052
    },
    {
      "epoch": 2.68331791551033,
      "grad_norm": 0.23171542584896088,
      "learning_rate": 2.600003909035762e-06,
      "loss": 0.3828,
      "step": 13053
    },
    {
      "epoch": 2.6835234864837085,
      "grad_norm": 0.12145873159170151,
      "learning_rate": 2.596659050163139e-06,
      "loss": 0.4463,
      "step": 13054
    },
    {
      "epoch": 2.683729057457087,
      "grad_norm": 0.22926872968673706,
      "learning_rate": 2.593316280304917e-06,
      "loss": 0.3856,
      "step": 13055
    },
    {
      "epoch": 2.6839346284304657,
      "grad_norm": 0.2303893268108368,
      "learning_rate": 2.589975599625805e-06,
      "loss": 0.3838,
      "step": 13056
    },
    {
      "epoch": 2.6841401994038443,
      "grad_norm": 0.2381599098443985,
      "learning_rate": 2.5866370082903713e-06,
      "loss": 0.3799,
      "step": 13057
    },
    {
      "epoch": 2.684345770377223,
      "grad_norm": 0.23543013632297516,
      "learning_rate": 2.583300506463094e-06,
      "loss": 0.385,
      "step": 13058
    },
    {
      "epoch": 2.684551341350601,
      "grad_norm": 0.2375613898038864,
      "learning_rate": 2.5799660943083415e-06,
      "loss": 0.388,
      "step": 13059
    },
    {
      "epoch": 2.68475691232398,
      "grad_norm": 0.22905340790748596,
      "learning_rate": 2.5766337719903927e-06,
      "loss": 0.3664,
      "step": 13060
    },
    {
      "epoch": 2.684962483297358,
      "grad_norm": 0.23459582030773163,
      "learning_rate": 2.5733035396734113e-06,
      "loss": 0.3786,
      "step": 13061
    },
    {
      "epoch": 2.6851680542707372,
      "grad_norm": 0.23848964273929596,
      "learning_rate": 2.569975397521451e-06,
      "loss": 0.374,
      "step": 13062
    },
    {
      "epoch": 2.6853736252441154,
      "grad_norm": 0.22707267105579376,
      "learning_rate": 2.5666493456985e-06,
      "loss": 0.3724,
      "step": 13063
    },
    {
      "epoch": 2.685579196217494,
      "grad_norm": 0.22259126603603363,
      "learning_rate": 2.5633253843683986e-06,
      "loss": 0.3879,
      "step": 13064
    },
    {
      "epoch": 2.6857847671908726,
      "grad_norm": 0.23496946692466736,
      "learning_rate": 2.5600035136949045e-06,
      "loss": 0.3857,
      "step": 13065
    },
    {
      "epoch": 2.685990338164251,
      "grad_norm": 0.23178550601005554,
      "learning_rate": 2.5566837338416676e-06,
      "loss": 0.3643,
      "step": 13066
    },
    {
      "epoch": 2.6861959091376297,
      "grad_norm": 0.22792139649391174,
      "learning_rate": 2.553366044972252e-06,
      "loss": 0.3983,
      "step": 13067
    },
    {
      "epoch": 2.6864014801110083,
      "grad_norm": 0.12821319699287415,
      "learning_rate": 2.5500504472500965e-06,
      "loss": 0.4591,
      "step": 13068
    },
    {
      "epoch": 2.686607051084387,
      "grad_norm": 0.22924353182315826,
      "learning_rate": 2.5467369408385405e-06,
      "loss": 0.3922,
      "step": 13069
    },
    {
      "epoch": 2.6868126220577655,
      "grad_norm": 0.2222532331943512,
      "learning_rate": 2.5434255259008338e-06,
      "loss": 0.3853,
      "step": 13070
    },
    {
      "epoch": 2.687018193031144,
      "grad_norm": 0.2258753478527069,
      "learning_rate": 2.5401162026001056e-06,
      "loss": 0.3812,
      "step": 13071
    },
    {
      "epoch": 2.6872237640045227,
      "grad_norm": 0.12898650765419006,
      "learning_rate": 2.536808971099401e-06,
      "loss": 0.459,
      "step": 13072
    },
    {
      "epoch": 2.6874293349779013,
      "grad_norm": 0.23579534888267517,
      "learning_rate": 2.533503831561644e-06,
      "loss": 0.3781,
      "step": 13073
    },
    {
      "epoch": 2.6876349059512794,
      "grad_norm": 0.23496629297733307,
      "learning_rate": 2.5302007841496646e-06,
      "loss": 0.3986,
      "step": 13074
    },
    {
      "epoch": 2.6878404769246584,
      "grad_norm": 0.23853163421154022,
      "learning_rate": 2.5268998290261877e-06,
      "loss": 0.3919,
      "step": 13075
    },
    {
      "epoch": 2.6880460478980366,
      "grad_norm": 0.23851320147514343,
      "learning_rate": 2.523600966353833e-06,
      "loss": 0.3835,
      "step": 13076
    },
    {
      "epoch": 2.6882516188714156,
      "grad_norm": 0.12485864758491516,
      "learning_rate": 2.5203041962951306e-06,
      "loss": 0.447,
      "step": 13077
    },
    {
      "epoch": 2.6884571898447938,
      "grad_norm": 0.22811704874038696,
      "learning_rate": 2.517009519012496e-06,
      "loss": 0.3961,
      "step": 13078
    },
    {
      "epoch": 2.6886627608181723,
      "grad_norm": 0.2245602309703827,
      "learning_rate": 2.513716934668229e-06,
      "loss": 0.3827,
      "step": 13079
    },
    {
      "epoch": 2.688868331791551,
      "grad_norm": 0.23157405853271484,
      "learning_rate": 2.5104264434245545e-06,
      "loss": 0.3791,
      "step": 13080
    },
    {
      "epoch": 2.6890739027649295,
      "grad_norm": 0.2352142632007599,
      "learning_rate": 2.5071380454435682e-06,
      "loss": 0.3827,
      "step": 13081
    },
    {
      "epoch": 2.689279473738308,
      "grad_norm": 0.24821443855762482,
      "learning_rate": 2.503851740887276e-06,
      "loss": 0.3834,
      "step": 13082
    },
    {
      "epoch": 2.6894850447116867,
      "grad_norm": 0.2236967235803604,
      "learning_rate": 2.5005675299175875e-06,
      "loss": 0.3846,
      "step": 13083
    },
    {
      "epoch": 2.6896906156850653,
      "grad_norm": 0.2317054569721222,
      "learning_rate": 2.4972854126962986e-06,
      "loss": 0.3587,
      "step": 13084
    },
    {
      "epoch": 2.689896186658444,
      "grad_norm": 0.2305641770362854,
      "learning_rate": 2.494005389385095e-06,
      "loss": 0.3853,
      "step": 13085
    },
    {
      "epoch": 2.6901017576318225,
      "grad_norm": 0.22506798803806305,
      "learning_rate": 2.4907274601455726e-06,
      "loss": 0.371,
      "step": 13086
    },
    {
      "epoch": 2.690307328605201,
      "grad_norm": 0.23190316557884216,
      "learning_rate": 2.487451625139217e-06,
      "loss": 0.3807,
      "step": 13087
    },
    {
      "epoch": 2.6905128995785796,
      "grad_norm": 0.23732031881809235,
      "learning_rate": 2.4841778845274242e-06,
      "loss": 0.3917,
      "step": 13088
    },
    {
      "epoch": 2.690718470551958,
      "grad_norm": 0.23446981608867645,
      "learning_rate": 2.4809062384714706e-06,
      "loss": 0.3926,
      "step": 13089
    },
    {
      "epoch": 2.690924041525337,
      "grad_norm": 0.2362259030342102,
      "learning_rate": 2.4776366871325213e-06,
      "loss": 0.3592,
      "step": 13090
    },
    {
      "epoch": 2.691129612498715,
      "grad_norm": 0.12213429063558578,
      "learning_rate": 2.4743692306716734e-06,
      "loss": 0.4355,
      "step": 13091
    },
    {
      "epoch": 2.691335183472094,
      "grad_norm": 0.23348113894462585,
      "learning_rate": 2.4711038692498873e-06,
      "loss": 0.3789,
      "step": 13092
    },
    {
      "epoch": 2.691540754445472,
      "grad_norm": 0.23292915523052216,
      "learning_rate": 2.46784060302803e-06,
      "loss": 0.3832,
      "step": 13093
    },
    {
      "epoch": 2.6917463254188507,
      "grad_norm": 0.12205676734447479,
      "learning_rate": 2.4645794321668774e-06,
      "loss": 0.4589,
      "step": 13094
    },
    {
      "epoch": 2.6919518963922293,
      "grad_norm": 0.24196146428585052,
      "learning_rate": 2.4613203568270864e-06,
      "loss": 0.3818,
      "step": 13095
    },
    {
      "epoch": 2.692157467365608,
      "grad_norm": 0.23547834157943726,
      "learning_rate": 2.4580633771692036e-06,
      "loss": 0.3813,
      "step": 13096
    },
    {
      "epoch": 2.6923630383389865,
      "grad_norm": 0.23056018352508545,
      "learning_rate": 2.4548084933537104e-06,
      "loss": 0.3645,
      "step": 13097
    },
    {
      "epoch": 2.692568609312365,
      "grad_norm": 0.23042161762714386,
      "learning_rate": 2.4515557055409433e-06,
      "loss": 0.3756,
      "step": 13098
    },
    {
      "epoch": 2.6927741802857437,
      "grad_norm": 0.22752924263477325,
      "learning_rate": 2.4483050138911598e-06,
      "loss": 0.3805,
      "step": 13099
    },
    {
      "epoch": 2.6929797512591223,
      "grad_norm": 0.12008198350667953,
      "learning_rate": 2.445056418564496e-06,
      "loss": 0.442,
      "step": 13100
    },
    {
      "epoch": 2.693185322232501,
      "grad_norm": 0.23437613248825073,
      "learning_rate": 2.4418099197210043e-06,
      "loss": 0.3716,
      "step": 13101
    },
    {
      "epoch": 2.6933908932058794,
      "grad_norm": 0.2585579752922058,
      "learning_rate": 2.438565517520622e-06,
      "loss": 0.3716,
      "step": 13102
    },
    {
      "epoch": 2.693596464179258,
      "grad_norm": 0.23899348080158234,
      "learning_rate": 2.4353232121231807e-06,
      "loss": 0.3918,
      "step": 13103
    },
    {
      "epoch": 2.6938020351526366,
      "grad_norm": 0.23468652367591858,
      "learning_rate": 2.432083003688423e-06,
      "loss": 0.4057,
      "step": 13104
    },
    {
      "epoch": 2.694007606126015,
      "grad_norm": 0.2242051213979721,
      "learning_rate": 2.428844892375971e-06,
      "loss": 0.3746,
      "step": 13105
    },
    {
      "epoch": 2.6942131770993933,
      "grad_norm": 0.22958362102508545,
      "learning_rate": 2.4256088783453573e-06,
      "loss": 0.3729,
      "step": 13106
    },
    {
      "epoch": 2.6944187480727724,
      "grad_norm": 0.2308982014656067,
      "learning_rate": 2.4223749617559994e-06,
      "loss": 0.3924,
      "step": 13107
    },
    {
      "epoch": 2.6946243190461505,
      "grad_norm": 0.22810958325862885,
      "learning_rate": 2.4191431427672194e-06,
      "loss": 0.378,
      "step": 13108
    },
    {
      "epoch": 2.694829890019529,
      "grad_norm": 0.2260715216398239,
      "learning_rate": 2.4159134215382305e-06,
      "loss": 0.3683,
      "step": 13109
    },
    {
      "epoch": 2.6950354609929077,
      "grad_norm": 0.22648762166500092,
      "learning_rate": 2.4126857982281553e-06,
      "loss": 0.3933,
      "step": 13110
    },
    {
      "epoch": 2.6952410319662863,
      "grad_norm": 0.2309451550245285,
      "learning_rate": 2.4094602729959916e-06,
      "loss": 0.3877,
      "step": 13111
    },
    {
      "epoch": 2.695446602939665,
      "grad_norm": 0.23046442866325378,
      "learning_rate": 2.406236846000657e-06,
      "loss": 0.3708,
      "step": 13112
    },
    {
      "epoch": 2.6956521739130435,
      "grad_norm": 0.23607337474822998,
      "learning_rate": 2.4030155174009545e-06,
      "loss": 0.3971,
      "step": 13113
    },
    {
      "epoch": 2.695857744886422,
      "grad_norm": 0.12376264482736588,
      "learning_rate": 2.3997962873555773e-06,
      "loss": 0.4165,
      "step": 13114
    },
    {
      "epoch": 2.6960633158598006,
      "grad_norm": 0.22271588444709778,
      "learning_rate": 2.396579156023124e-06,
      "loss": 0.3727,
      "step": 13115
    },
    {
      "epoch": 2.6962688868331792,
      "grad_norm": 0.22929471731185913,
      "learning_rate": 2.393364123562087e-06,
      "loss": 0.3777,
      "step": 13116
    },
    {
      "epoch": 2.696474457806558,
      "grad_norm": 0.12126558274030685,
      "learning_rate": 2.39015119013085e-06,
      "loss": 0.4323,
      "step": 13117
    },
    {
      "epoch": 2.6966800287799364,
      "grad_norm": 0.22162644565105438,
      "learning_rate": 2.3869403558877163e-06,
      "loss": 0.3741,
      "step": 13118
    },
    {
      "epoch": 2.696885599753315,
      "grad_norm": 0.12397125363349915,
      "learning_rate": 2.3837316209908546e-06,
      "loss": 0.4487,
      "step": 13119
    },
    {
      "epoch": 2.6970911707266936,
      "grad_norm": 0.11935931444168091,
      "learning_rate": 2.380524985598348e-06,
      "loss": 0.4464,
      "step": 13120
    },
    {
      "epoch": 2.6972967417000717,
      "grad_norm": 0.23302531242370605,
      "learning_rate": 2.3773204498681758e-06,
      "loss": 0.3829,
      "step": 13121
    },
    {
      "epoch": 2.6975023126734508,
      "grad_norm": 0.12195220589637756,
      "learning_rate": 2.374118013958206e-06,
      "loss": 0.4471,
      "step": 13122
    },
    {
      "epoch": 2.697707883646829,
      "grad_norm": 0.23271100223064423,
      "learning_rate": 2.3709176780262076e-06,
      "loss": 0.3808,
      "step": 13123
    },
    {
      "epoch": 2.6979134546202075,
      "grad_norm": 0.22822687029838562,
      "learning_rate": 2.36771944222984e-06,
      "loss": 0.3695,
      "step": 13124
    },
    {
      "epoch": 2.698119025593586,
      "grad_norm": 0.23481127619743347,
      "learning_rate": 2.3645233067266815e-06,
      "loss": 0.3999,
      "step": 13125
    },
    {
      "epoch": 2.6983245965669647,
      "grad_norm": 0.225934699177742,
      "learning_rate": 2.3613292716741816e-06,
      "loss": 0.3737,
      "step": 13126
    },
    {
      "epoch": 2.6985301675403432,
      "grad_norm": 0.23984263837337494,
      "learning_rate": 2.358137337229694e-06,
      "loss": 0.3819,
      "step": 13127
    },
    {
      "epoch": 2.698735738513722,
      "grad_norm": 0.2241523116827011,
      "learning_rate": 2.3549475035504733e-06,
      "loss": 0.3683,
      "step": 13128
    },
    {
      "epoch": 2.6989413094871004,
      "grad_norm": 0.2185996025800705,
      "learning_rate": 2.3517597707936636e-06,
      "loss": 0.3635,
      "step": 13129
    },
    {
      "epoch": 2.699146880460479,
      "grad_norm": 0.23561379313468933,
      "learning_rate": 2.3485741391163092e-06,
      "loss": 0.3819,
      "step": 13130
    },
    {
      "epoch": 2.6993524514338576,
      "grad_norm": 0.12253455072641373,
      "learning_rate": 2.3453906086753646e-06,
      "loss": 0.4446,
      "step": 13131
    },
    {
      "epoch": 2.699558022407236,
      "grad_norm": 0.22365736961364746,
      "learning_rate": 2.34220917962765e-06,
      "loss": 0.382,
      "step": 13132
    },
    {
      "epoch": 2.699763593380615,
      "grad_norm": 0.12028893828392029,
      "learning_rate": 2.339029852129909e-06,
      "loss": 0.4583,
      "step": 13133
    },
    {
      "epoch": 2.6999691643539934,
      "grad_norm": 0.12149006873369217,
      "learning_rate": 2.3358526263387715e-06,
      "loss": 0.4422,
      "step": 13134
    },
    {
      "epoch": 2.700174735327372,
      "grad_norm": 0.23423054814338684,
      "learning_rate": 2.3326775024107627e-06,
      "loss": 0.3989,
      "step": 13135
    },
    {
      "epoch": 2.70038030630075,
      "grad_norm": 0.2282380312681198,
      "learning_rate": 2.3295044805023075e-06,
      "loss": 0.3798,
      "step": 13136
    },
    {
      "epoch": 2.700585877274129,
      "grad_norm": 0.22905749082565308,
      "learning_rate": 2.3263335607697258e-06,
      "loss": 0.3987,
      "step": 13137
    },
    {
      "epoch": 2.7007914482475073,
      "grad_norm": 0.223682701587677,
      "learning_rate": 2.3231647433692273e-06,
      "loss": 0.3574,
      "step": 13138
    },
    {
      "epoch": 2.700997019220886,
      "grad_norm": 0.12208550423383713,
      "learning_rate": 2.3199980284569373e-06,
      "loss": 0.4494,
      "step": 13139
    },
    {
      "epoch": 2.7012025901942645,
      "grad_norm": 0.2424362748861313,
      "learning_rate": 2.316833416188861e-06,
      "loss": 0.3936,
      "step": 13140
    },
    {
      "epoch": 2.701408161167643,
      "grad_norm": 0.12195998430252075,
      "learning_rate": 2.313670906720899e-06,
      "loss": 0.4549,
      "step": 13141
    },
    {
      "epoch": 2.7016137321410216,
      "grad_norm": 0.23335258662700653,
      "learning_rate": 2.310510500208856e-06,
      "loss": 0.398,
      "step": 13142
    },
    {
      "epoch": 2.7018193031144,
      "grad_norm": 0.239247128367424,
      "learning_rate": 2.3073521968084285e-06,
      "loss": 0.3936,
      "step": 13143
    },
    {
      "epoch": 2.702024874087779,
      "grad_norm": 0.23259401321411133,
      "learning_rate": 2.304195996675216e-06,
      "loss": 0.395,
      "step": 13144
    },
    {
      "epoch": 2.7022304450611574,
      "grad_norm": 0.2279106080532074,
      "learning_rate": 2.3010418999646995e-06,
      "loss": 0.3653,
      "step": 13145
    },
    {
      "epoch": 2.702436016034536,
      "grad_norm": 0.23209701478481293,
      "learning_rate": 2.2978899068322845e-06,
      "loss": 0.3991,
      "step": 13146
    },
    {
      "epoch": 2.7026415870079146,
      "grad_norm": 0.23221181333065033,
      "learning_rate": 2.294740017433242e-06,
      "loss": 0.3696,
      "step": 13147
    },
    {
      "epoch": 2.702847157981293,
      "grad_norm": 0.23377148807048798,
      "learning_rate": 2.2915922319227536e-06,
      "loss": 0.3691,
      "step": 13148
    },
    {
      "epoch": 2.7030527289546717,
      "grad_norm": 0.23326507210731506,
      "learning_rate": 2.288446550455899e-06,
      "loss": 0.371,
      "step": 13149
    },
    {
      "epoch": 2.7032582999280503,
      "grad_norm": 0.11786891520023346,
      "learning_rate": 2.2853029731876445e-06,
      "loss": 0.4332,
      "step": 13150
    },
    {
      "epoch": 2.7034638709014285,
      "grad_norm": 0.23896630108356476,
      "learning_rate": 2.282161500272867e-06,
      "loss": 0.3665,
      "step": 13151
    },
    {
      "epoch": 2.7036694418748075,
      "grad_norm": 0.12194350361824036,
      "learning_rate": 2.2790221318663267e-06,
      "loss": 0.4324,
      "step": 13152
    },
    {
      "epoch": 2.7038750128481857,
      "grad_norm": 0.23250941932201385,
      "learning_rate": 2.275884868122696e-06,
      "loss": 0.3881,
      "step": 13153
    },
    {
      "epoch": 2.7040805838215642,
      "grad_norm": 0.232101172208786,
      "learning_rate": 2.272749709196515e-06,
      "loss": 0.3908,
      "step": 13154
    },
    {
      "epoch": 2.704286154794943,
      "grad_norm": 0.22767187654972076,
      "learning_rate": 2.269616655242261e-06,
      "loss": 0.3864,
      "step": 13155
    },
    {
      "epoch": 2.7044917257683214,
      "grad_norm": 0.232827827334404,
      "learning_rate": 2.2664857064142654e-06,
      "loss": 0.3791,
      "step": 13156
    },
    {
      "epoch": 2.7046972967417,
      "grad_norm": 0.12185569107532501,
      "learning_rate": 2.2633568628667894e-06,
      "loss": 0.4662,
      "step": 13157
    },
    {
      "epoch": 2.7049028677150786,
      "grad_norm": 0.23200562596321106,
      "learning_rate": 2.2602301247539605e-06,
      "loss": 0.3772,
      "step": 13158
    },
    {
      "epoch": 2.705108438688457,
      "grad_norm": 0.24151834845542908,
      "learning_rate": 2.2571054922298347e-06,
      "loss": 0.372,
      "step": 13159
    },
    {
      "epoch": 2.7053140096618358,
      "grad_norm": 0.23124399781227112,
      "learning_rate": 2.253982965448344e-06,
      "loss": 0.4018,
      "step": 13160
    },
    {
      "epoch": 2.7055195806352144,
      "grad_norm": 0.11762725561857224,
      "learning_rate": 2.250862544563316e-06,
      "loss": 0.4491,
      "step": 13161
    },
    {
      "epoch": 2.705725151608593,
      "grad_norm": 0.11691106110811234,
      "learning_rate": 2.2477442297284817e-06,
      "loss": 0.4284,
      "step": 13162
    },
    {
      "epoch": 2.7059307225819715,
      "grad_norm": 0.22589966654777527,
      "learning_rate": 2.244628021097469e-06,
      "loss": 0.3723,
      "step": 13163
    },
    {
      "epoch": 2.70613629355535,
      "grad_norm": 0.23272038996219635,
      "learning_rate": 2.24151391882379e-06,
      "loss": 0.3985,
      "step": 13164
    },
    {
      "epoch": 2.7063418645287287,
      "grad_norm": 0.22128084301948547,
      "learning_rate": 2.2384019230608664e-06,
      "loss": 0.3743,
      "step": 13165
    },
    {
      "epoch": 2.706547435502107,
      "grad_norm": 0.11852707713842392,
      "learning_rate": 2.2352920339620166e-06,
      "loss": 0.4401,
      "step": 13166
    },
    {
      "epoch": 2.706753006475486,
      "grad_norm": 0.23066598176956177,
      "learning_rate": 2.232184251680447e-06,
      "loss": 0.3604,
      "step": 13167
    },
    {
      "epoch": 2.706958577448864,
      "grad_norm": 0.11477980017662048,
      "learning_rate": 2.229078576369261e-06,
      "loss": 0.4282,
      "step": 13168
    },
    {
      "epoch": 2.7071641484222426,
      "grad_norm": 0.2299182116985321,
      "learning_rate": 2.2259750081814653e-06,
      "loss": 0.3933,
      "step": 13169
    },
    {
      "epoch": 2.707369719395621,
      "grad_norm": 0.22713468968868256,
      "learning_rate": 2.222873547269953e-06,
      "loss": 0.4137,
      "step": 13170
    },
    {
      "epoch": 2.707575290369,
      "grad_norm": 0.23036979138851166,
      "learning_rate": 2.2197741937875274e-06,
      "loss": 0.3846,
      "step": 13171
    },
    {
      "epoch": 2.7077808613423784,
      "grad_norm": 0.2358933985233307,
      "learning_rate": 2.2166769478868607e-06,
      "loss": 0.3745,
      "step": 13172
    },
    {
      "epoch": 2.707986432315757,
      "grad_norm": 0.25210806727409363,
      "learning_rate": 2.2135818097205606e-06,
      "loss": 0.4011,
      "step": 13173
    },
    {
      "epoch": 2.7081920032891356,
      "grad_norm": 0.23163393139839172,
      "learning_rate": 2.210488779441101e-06,
      "loss": 0.3744,
      "step": 13174
    },
    {
      "epoch": 2.708397574262514,
      "grad_norm": 0.24861502647399902,
      "learning_rate": 2.207397857200855e-06,
      "loss": 0.3905,
      "step": 13175
    },
    {
      "epoch": 2.7086031452358927,
      "grad_norm": 0.23797625303268433,
      "learning_rate": 2.20430904315211e-06,
      "loss": 0.3745,
      "step": 13176
    },
    {
      "epoch": 2.7088087162092713,
      "grad_norm": 0.23504245281219482,
      "learning_rate": 2.201222337447034e-06,
      "loss": 0.3763,
      "step": 13177
    },
    {
      "epoch": 2.70901428718265,
      "grad_norm": 0.23205745220184326,
      "learning_rate": 2.1981377402376917e-06,
      "loss": 0.3683,
      "step": 13178
    },
    {
      "epoch": 2.7092198581560285,
      "grad_norm": 0.12594787776470184,
      "learning_rate": 2.195055251676041e-06,
      "loss": 0.4583,
      "step": 13179
    },
    {
      "epoch": 2.709425429129407,
      "grad_norm": 0.22511595487594604,
      "learning_rate": 2.191974871913955e-06,
      "loss": 0.3569,
      "step": 13180
    },
    {
      "epoch": 2.7096310001027852,
      "grad_norm": 0.2308862954378128,
      "learning_rate": 2.1888966011031823e-06,
      "loss": 0.3656,
      "step": 13181
    },
    {
      "epoch": 2.7098365710761643,
      "grad_norm": 0.2143402099609375,
      "learning_rate": 2.1858204393953726e-06,
      "loss": 0.3644,
      "step": 13182
    },
    {
      "epoch": 2.7100421420495424,
      "grad_norm": 0.24129731953144073,
      "learning_rate": 2.1827463869420834e-06,
      "loss": 0.3925,
      "step": 13183
    },
    {
      "epoch": 2.710247713022921,
      "grad_norm": 0.23186422884464264,
      "learning_rate": 2.179674443894749e-06,
      "loss": 0.3777,
      "step": 13184
    },
    {
      "epoch": 2.7104532839962996,
      "grad_norm": 0.22773997485637665,
      "learning_rate": 2.176604610404709e-06,
      "loss": 0.3691,
      "step": 13185
    },
    {
      "epoch": 2.710658854969678,
      "grad_norm": 0.22896374762058258,
      "learning_rate": 2.1735368866232013e-06,
      "loss": 0.3764,
      "step": 13186
    },
    {
      "epoch": 2.7108644259430568,
      "grad_norm": 0.23243440687656403,
      "learning_rate": 2.170471272701371e-06,
      "loss": 0.367,
      "step": 13187
    },
    {
      "epoch": 2.7110699969164354,
      "grad_norm": 0.2411787211894989,
      "learning_rate": 2.1674077687902318e-06,
      "loss": 0.3867,
      "step": 13188
    },
    {
      "epoch": 2.711275567889814,
      "grad_norm": 0.21772977709770203,
      "learning_rate": 2.164346375040713e-06,
      "loss": 0.372,
      "step": 13189
    },
    {
      "epoch": 2.7114811388631925,
      "grad_norm": 0.2415088415145874,
      "learning_rate": 2.1612870916036336e-06,
      "loss": 0.3886,
      "step": 13190
    },
    {
      "epoch": 2.711686709836571,
      "grad_norm": 0.22934192419052124,
      "learning_rate": 2.1582299186297138e-06,
      "loss": 0.384,
      "step": 13191
    },
    {
      "epoch": 2.7118922808099497,
      "grad_norm": 0.23373478651046753,
      "learning_rate": 2.1551748562695627e-06,
      "loss": 0.3916,
      "step": 13192
    },
    {
      "epoch": 2.7120978517833283,
      "grad_norm": 0.23007836937904358,
      "learning_rate": 2.152121904673685e-06,
      "loss": 0.3613,
      "step": 13193
    },
    {
      "epoch": 2.712303422756707,
      "grad_norm": 0.12053580582141876,
      "learning_rate": 2.1490710639925003e-06,
      "loss": 0.4356,
      "step": 13194
    },
    {
      "epoch": 2.7125089937300855,
      "grad_norm": 0.23001576960086823,
      "learning_rate": 2.1460223343762937e-06,
      "loss": 0.3559,
      "step": 13195
    },
    {
      "epoch": 2.7127145647034636,
      "grad_norm": 0.2180272787809372,
      "learning_rate": 2.1429757159752697e-06,
      "loss": 0.3824,
      "step": 13196
    },
    {
      "epoch": 2.7129201356768426,
      "grad_norm": 0.23263666033744812,
      "learning_rate": 2.139931208939513e-06,
      "loss": 0.3618,
      "step": 13197
    },
    {
      "epoch": 2.713125706650221,
      "grad_norm": 0.2428431212902069,
      "learning_rate": 2.136888813419024e-06,
      "loss": 0.3795,
      "step": 13198
    },
    {
      "epoch": 2.7133312776236,
      "grad_norm": 0.11948748677968979,
      "learning_rate": 2.133848529563683e-06,
      "loss": 0.4315,
      "step": 13199
    },
    {
      "epoch": 2.713536848596978,
      "grad_norm": 0.2290315479040146,
      "learning_rate": 2.1308103575232645e-06,
      "loss": 0.3947,
      "step": 13200
    },
    {
      "epoch": 2.7137424195703566,
      "grad_norm": 0.11666145920753479,
      "learning_rate": 2.12777429744745e-06,
      "loss": 0.4485,
      "step": 13201
    },
    {
      "epoch": 2.713947990543735,
      "grad_norm": 0.2218608856201172,
      "learning_rate": 2.124740349485818e-06,
      "loss": 0.3646,
      "step": 13202
    },
    {
      "epoch": 2.7141535615171137,
      "grad_norm": 0.23134684562683105,
      "learning_rate": 2.1217085137878256e-06,
      "loss": 0.3925,
      "step": 13203
    },
    {
      "epoch": 2.7143591324904923,
      "grad_norm": 0.22924216091632843,
      "learning_rate": 2.118678790502843e-06,
      "loss": 0.3695,
      "step": 13204
    },
    {
      "epoch": 2.714564703463871,
      "grad_norm": 0.23405931890010834,
      "learning_rate": 2.11565117978013e-06,
      "loss": 0.3772,
      "step": 13205
    },
    {
      "epoch": 2.7147702744372495,
      "grad_norm": 0.22839610278606415,
      "learning_rate": 2.1126256817688427e-06,
      "loss": 0.3642,
      "step": 13206
    },
    {
      "epoch": 2.714975845410628,
      "grad_norm": 0.22964198887348175,
      "learning_rate": 2.1096022966180274e-06,
      "loss": 0.3658,
      "step": 13207
    },
    {
      "epoch": 2.7151814163840067,
      "grad_norm": 0.2313418686389923,
      "learning_rate": 2.106581024476644e-06,
      "loss": 0.3822,
      "step": 13208
    },
    {
      "epoch": 2.7153869873573853,
      "grad_norm": 0.23704691231250763,
      "learning_rate": 2.10356186549353e-06,
      "loss": 0.3822,
      "step": 13209
    },
    {
      "epoch": 2.715592558330764,
      "grad_norm": 0.2327091097831726,
      "learning_rate": 2.100544819817424e-06,
      "loss": 0.3948,
      "step": 13210
    },
    {
      "epoch": 2.715798129304142,
      "grad_norm": 0.23315146565437317,
      "learning_rate": 2.0975298875969646e-06,
      "loss": 0.384,
      "step": 13211
    },
    {
      "epoch": 2.716003700277521,
      "grad_norm": 0.22651349008083344,
      "learning_rate": 2.0945170689806813e-06,
      "loss": 0.3692,
      "step": 13212
    },
    {
      "epoch": 2.716209271250899,
      "grad_norm": 0.22469674050807953,
      "learning_rate": 2.0915063641170015e-06,
      "loss": 0.3868,
      "step": 13213
    },
    {
      "epoch": 2.716414842224278,
      "grad_norm": 0.12531313300132751,
      "learning_rate": 2.0884977731542454e-06,
      "loss": 0.4479,
      "step": 13214
    },
    {
      "epoch": 2.7166204131976563,
      "grad_norm": 0.11977065354585648,
      "learning_rate": 2.0854912962406403e-06,
      "loss": 0.4418,
      "step": 13215
    },
    {
      "epoch": 2.716825984171035,
      "grad_norm": 0.23392470180988312,
      "learning_rate": 2.0824869335242976e-06,
      "loss": 0.3844,
      "step": 13216
    },
    {
      "epoch": 2.7170315551444135,
      "grad_norm": 0.23545394837856293,
      "learning_rate": 2.0794846851532287e-06,
      "loss": 0.3921,
      "step": 13217
    },
    {
      "epoch": 2.717237126117792,
      "grad_norm": 0.11989542841911316,
      "learning_rate": 2.076484551275335e-06,
      "loss": 0.4487,
      "step": 13218
    },
    {
      "epoch": 2.7174426970911707,
      "grad_norm": 0.2391575127840042,
      "learning_rate": 2.073486532038424e-06,
      "loss": 0.3802,
      "step": 13219
    },
    {
      "epoch": 2.7176482680645493,
      "grad_norm": 0.12006057053804398,
      "learning_rate": 2.0704906275901968e-06,
      "loss": 0.4567,
      "step": 13220
    },
    {
      "epoch": 2.717853839037928,
      "grad_norm": 0.23207461833953857,
      "learning_rate": 2.067496838078241e-06,
      "loss": 0.3808,
      "step": 13221
    },
    {
      "epoch": 2.7180594100113065,
      "grad_norm": 0.2288663387298584,
      "learning_rate": 2.0645051636500534e-06,
      "loss": 0.3663,
      "step": 13222
    },
    {
      "epoch": 2.718264980984685,
      "grad_norm": 0.23807507753372192,
      "learning_rate": 2.061515604453016e-06,
      "loss": 0.3887,
      "step": 13223
    },
    {
      "epoch": 2.7184705519580636,
      "grad_norm": 0.23132917284965515,
      "learning_rate": 2.058528160634411e-06,
      "loss": 0.4107,
      "step": 13224
    },
    {
      "epoch": 2.7186761229314422,
      "grad_norm": 0.2228475958108902,
      "learning_rate": 2.0555428323414157e-06,
      "loss": 0.3912,
      "step": 13225
    },
    {
      "epoch": 2.7188816939048204,
      "grad_norm": 0.2397620677947998,
      "learning_rate": 2.0525596197211022e-06,
      "loss": 0.3716,
      "step": 13226
    },
    {
      "epoch": 2.7190872648781994,
      "grad_norm": 0.23399171233177185,
      "learning_rate": 2.0495785229204432e-06,
      "loss": 0.3902,
      "step": 13227
    },
    {
      "epoch": 2.7192928358515775,
      "grad_norm": 0.23543019592761993,
      "learning_rate": 2.0465995420862917e-06,
      "loss": 0.3836,
      "step": 13228
    },
    {
      "epoch": 2.7194984068249566,
      "grad_norm": 0.12511909008026123,
      "learning_rate": 2.043622677365424e-06,
      "loss": 0.4333,
      "step": 13229
    },
    {
      "epoch": 2.7197039777983347,
      "grad_norm": 0.22897110879421234,
      "learning_rate": 2.0406479289044895e-06,
      "loss": 0.3777,
      "step": 13230
    },
    {
      "epoch": 2.7199095487717133,
      "grad_norm": 0.23776206374168396,
      "learning_rate": 2.0376752968500397e-06,
      "loss": 0.385,
      "step": 13231
    },
    {
      "epoch": 2.720115119745092,
      "grad_norm": 0.22551818192005157,
      "learning_rate": 2.0347047813485274e-06,
      "loss": 0.3732,
      "step": 13232
    },
    {
      "epoch": 2.7203206907184705,
      "grad_norm": 0.23058441281318665,
      "learning_rate": 2.0317363825462867e-06,
      "loss": 0.3617,
      "step": 13233
    },
    {
      "epoch": 2.720526261691849,
      "grad_norm": 0.22936634719371796,
      "learning_rate": 2.0287701005895543e-06,
      "loss": 0.3821,
      "step": 13234
    },
    {
      "epoch": 2.7207318326652277,
      "grad_norm": 0.2348644882440567,
      "learning_rate": 2.025805935624479e-06,
      "loss": 0.3783,
      "step": 13235
    },
    {
      "epoch": 2.7209374036386063,
      "grad_norm": 0.22684963047504425,
      "learning_rate": 2.022843887797084e-06,
      "loss": 0.3885,
      "step": 13236
    },
    {
      "epoch": 2.721142974611985,
      "grad_norm": 0.2363407462835312,
      "learning_rate": 2.0198839572532972e-06,
      "loss": 0.379,
      "step": 13237
    },
    {
      "epoch": 2.7213485455853634,
      "grad_norm": 0.11835481971502304,
      "learning_rate": 2.0169261441389376e-06,
      "loss": 0.4572,
      "step": 13238
    },
    {
      "epoch": 2.721554116558742,
      "grad_norm": 0.11983449012041092,
      "learning_rate": 2.013970448599723e-06,
      "loss": 0.4419,
      "step": 13239
    },
    {
      "epoch": 2.7217596875321206,
      "grad_norm": 0.23388756811618805,
      "learning_rate": 2.011016870781267e-06,
      "loss": 0.3931,
      "step": 13240
    },
    {
      "epoch": 2.7219652585054988,
      "grad_norm": 0.23092390596866608,
      "learning_rate": 2.0080654108290835e-06,
      "loss": 0.3978,
      "step": 13241
    },
    {
      "epoch": 2.722170829478878,
      "grad_norm": 0.23938718438148499,
      "learning_rate": 2.0051160688885714e-06,
      "loss": 0.3733,
      "step": 13242
    },
    {
      "epoch": 2.722376400452256,
      "grad_norm": 0.11977725476026535,
      "learning_rate": 2.0021688451050334e-06,
      "loss": 0.4444,
      "step": 13243
    },
    {
      "epoch": 2.722581971425635,
      "grad_norm": 0.22424502670764923,
      "learning_rate": 1.9992237396236645e-06,
      "loss": 0.383,
      "step": 13244
    },
    {
      "epoch": 2.722787542399013,
      "grad_norm": 0.23597703874111176,
      "learning_rate": 1.996280752589563e-06,
      "loss": 0.369,
      "step": 13245
    },
    {
      "epoch": 2.7229931133723917,
      "grad_norm": 0.23005138337612152,
      "learning_rate": 1.993339884147704e-06,
      "loss": 0.3738,
      "step": 13246
    },
    {
      "epoch": 2.7231986843457703,
      "grad_norm": 0.22499051690101624,
      "learning_rate": 1.9904011344429797e-06,
      "loss": 0.3796,
      "step": 13247
    },
    {
      "epoch": 2.723404255319149,
      "grad_norm": 0.22470605373382568,
      "learning_rate": 1.9874645036201557e-06,
      "loss": 0.3958,
      "step": 13248
    },
    {
      "epoch": 2.7236098262925275,
      "grad_norm": 0.22984722256660461,
      "learning_rate": 1.9845299918239257e-06,
      "loss": 0.4004,
      "step": 13249
    },
    {
      "epoch": 2.723815397265906,
      "grad_norm": 0.2346932291984558,
      "learning_rate": 1.9815975991988445e-06,
      "loss": 0.3883,
      "step": 13250
    },
    {
      "epoch": 2.7240209682392846,
      "grad_norm": 0.24680159986019135,
      "learning_rate": 1.978667325889386e-06,
      "loss": 0.3792,
      "step": 13251
    },
    {
      "epoch": 2.7242265392126632,
      "grad_norm": 0.2350476086139679,
      "learning_rate": 1.9757391720399056e-06,
      "loss": 0.3892,
      "step": 13252
    },
    {
      "epoch": 2.724432110186042,
      "grad_norm": 0.22723430395126343,
      "learning_rate": 1.972813137794662e-06,
      "loss": 0.3875,
      "step": 13253
    },
    {
      "epoch": 2.7246376811594204,
      "grad_norm": 0.238324373960495,
      "learning_rate": 1.969889223297805e-06,
      "loss": 0.3872,
      "step": 13254
    },
    {
      "epoch": 2.724843252132799,
      "grad_norm": 0.12185484915971756,
      "learning_rate": 1.96696742869338e-06,
      "loss": 0.4491,
      "step": 13255
    },
    {
      "epoch": 2.725048823106177,
      "grad_norm": 0.23587733507156372,
      "learning_rate": 1.964047754125341e-06,
      "loss": 0.3813,
      "step": 13256
    },
    {
      "epoch": 2.725254394079556,
      "grad_norm": 0.22594283521175385,
      "learning_rate": 1.961130199737514e-06,
      "loss": 0.3808,
      "step": 13257
    },
    {
      "epoch": 2.7254599650529343,
      "grad_norm": 0.117709681391716,
      "learning_rate": 1.9582147656736426e-06,
      "loss": 0.4448,
      "step": 13258
    },
    {
      "epoch": 2.7256655360263133,
      "grad_norm": 0.1209564283490181,
      "learning_rate": 1.9553014520773535e-06,
      "loss": 0.4412,
      "step": 13259
    },
    {
      "epoch": 2.7258711069996915,
      "grad_norm": 0.23455938696861267,
      "learning_rate": 1.9523902590921657e-06,
      "loss": 0.3511,
      "step": 13260
    },
    {
      "epoch": 2.72607667797307,
      "grad_norm": 0.2380744367837906,
      "learning_rate": 1.94948118686151e-06,
      "loss": 0.376,
      "step": 13261
    },
    {
      "epoch": 2.7262822489464487,
      "grad_norm": 0.11584602296352386,
      "learning_rate": 1.9465742355287014e-06,
      "loss": 0.4523,
      "step": 13262
    },
    {
      "epoch": 2.7264878199198272,
      "grad_norm": 0.12415748089551926,
      "learning_rate": 1.943669405236941e-06,
      "loss": 0.435,
      "step": 13263
    },
    {
      "epoch": 2.726693390893206,
      "grad_norm": 0.227211594581604,
      "learning_rate": 1.9407666961293487e-06,
      "loss": 0.383,
      "step": 13264
    },
    {
      "epoch": 2.7268989618665844,
      "grad_norm": 0.12154388427734375,
      "learning_rate": 1.9378661083489255e-06,
      "loss": 0.4509,
      "step": 13265
    },
    {
      "epoch": 2.727104532839963,
      "grad_norm": 0.23259896039962769,
      "learning_rate": 1.9349676420385665e-06,
      "loss": 0.385,
      "step": 13266
    },
    {
      "epoch": 2.7273101038133416,
      "grad_norm": 0.2539547383785248,
      "learning_rate": 1.9320712973410634e-06,
      "loss": 0.397,
      "step": 13267
    },
    {
      "epoch": 2.72751567478672,
      "grad_norm": 0.22695066034793854,
      "learning_rate": 1.929177074399111e-06,
      "loss": 0.3688,
      "step": 13268
    },
    {
      "epoch": 2.727721245760099,
      "grad_norm": 0.23069900274276733,
      "learning_rate": 1.9262849733552864e-06,
      "loss": 0.3655,
      "step": 13269
    },
    {
      "epoch": 2.7279268167334774,
      "grad_norm": 0.22440584003925323,
      "learning_rate": 1.9233949943520798e-06,
      "loss": 0.3756,
      "step": 13270
    },
    {
      "epoch": 2.728132387706856,
      "grad_norm": 0.23021718859672546,
      "learning_rate": 1.920507137531862e-06,
      "loss": 0.3766,
      "step": 13271
    },
    {
      "epoch": 2.7283379586802345,
      "grad_norm": 0.22625602781772614,
      "learning_rate": 1.9176214030369055e-06,
      "loss": 0.3834,
      "step": 13272
    },
    {
      "epoch": 2.7285435296536127,
      "grad_norm": 0.22805316746234894,
      "learning_rate": 1.9147377910093754e-06,
      "loss": 0.3796,
      "step": 13273
    },
    {
      "epoch": 2.7287491006269917,
      "grad_norm": 0.12454908341169357,
      "learning_rate": 1.9118563015913337e-06,
      "loss": 0.4406,
      "step": 13274
    },
    {
      "epoch": 2.72895467160037,
      "grad_norm": 0.23324701189994812,
      "learning_rate": 1.9089769349247417e-06,
      "loss": 0.3713,
      "step": 13275
    },
    {
      "epoch": 2.7291602425737485,
      "grad_norm": 0.23551899194717407,
      "learning_rate": 1.9060996911514407e-06,
      "loss": 0.3907,
      "step": 13276
    },
    {
      "epoch": 2.729365813547127,
      "grad_norm": 0.24228408932685852,
      "learning_rate": 1.9032245704131973e-06,
      "loss": 0.386,
      "step": 13277
    },
    {
      "epoch": 2.7295713845205056,
      "grad_norm": 0.22902436554431915,
      "learning_rate": 1.9003515728516386e-06,
      "loss": 0.3902,
      "step": 13278
    },
    {
      "epoch": 2.729776955493884,
      "grad_norm": 0.2340046763420105,
      "learning_rate": 1.897480698608316e-06,
      "loss": 0.3771,
      "step": 13279
    },
    {
      "epoch": 2.729982526467263,
      "grad_norm": 0.12157081812620163,
      "learning_rate": 1.8946119478246565e-06,
      "loss": 0.4443,
      "step": 13280
    },
    {
      "epoch": 2.7301880974406414,
      "grad_norm": 0.22382938861846924,
      "learning_rate": 1.8917453206419922e-06,
      "loss": 0.3782,
      "step": 13281
    },
    {
      "epoch": 2.73039366841402,
      "grad_norm": 0.2339484691619873,
      "learning_rate": 1.888880817201545e-06,
      "loss": 0.3879,
      "step": 13282
    },
    {
      "epoch": 2.7305992393873986,
      "grad_norm": 0.23548907041549683,
      "learning_rate": 1.8860184376444418e-06,
      "loss": 0.3741,
      "step": 13283
    },
    {
      "epoch": 2.730804810360777,
      "grad_norm": 0.22213146090507507,
      "learning_rate": 1.8831581821116901e-06,
      "loss": 0.3853,
      "step": 13284
    },
    {
      "epoch": 2.7310103813341557,
      "grad_norm": 0.22502891719341278,
      "learning_rate": 1.8803000507442171e-06,
      "loss": 0.3773,
      "step": 13285
    },
    {
      "epoch": 2.7312159523075343,
      "grad_norm": 0.22634708881378174,
      "learning_rate": 1.877444043682815e-06,
      "loss": 0.3758,
      "step": 13286
    },
    {
      "epoch": 2.731421523280913,
      "grad_norm": 0.23306407034397125,
      "learning_rate": 1.8745901610681915e-06,
      "loss": 0.373,
      "step": 13287
    },
    {
      "epoch": 2.731627094254291,
      "grad_norm": 0.2324984073638916,
      "learning_rate": 1.8717384030409442e-06,
      "loss": 0.3757,
      "step": 13288
    },
    {
      "epoch": 2.73183266522767,
      "grad_norm": 0.2317853718996048,
      "learning_rate": 1.8688887697415653e-06,
      "loss": 0.3755,
      "step": 13289
    },
    {
      "epoch": 2.7320382362010482,
      "grad_norm": 0.2325008064508438,
      "learning_rate": 1.8660412613104379e-06,
      "loss": 0.3827,
      "step": 13290
    },
    {
      "epoch": 2.732243807174427,
      "grad_norm": 0.23261573910713196,
      "learning_rate": 1.8631958778878495e-06,
      "loss": 0.3833,
      "step": 13291
    },
    {
      "epoch": 2.7324493781478054,
      "grad_norm": 0.22458091378211975,
      "learning_rate": 1.860352619613983e-06,
      "loss": 0.3718,
      "step": 13292
    },
    {
      "epoch": 2.732654949121184,
      "grad_norm": 0.23159871995449066,
      "learning_rate": 1.8575114866289118e-06,
      "loss": 0.3698,
      "step": 13293
    },
    {
      "epoch": 2.7328605200945626,
      "grad_norm": 0.22185635566711426,
      "learning_rate": 1.8546724790725984e-06,
      "loss": 0.3753,
      "step": 13294
    },
    {
      "epoch": 2.733066091067941,
      "grad_norm": 0.23244544863700867,
      "learning_rate": 1.851835597084911e-06,
      "loss": 0.3959,
      "step": 13295
    },
    {
      "epoch": 2.7332716620413198,
      "grad_norm": 0.2387784868478775,
      "learning_rate": 1.8490008408056131e-06,
      "loss": 0.3712,
      "step": 13296
    },
    {
      "epoch": 2.7334772330146984,
      "grad_norm": 0.2200855165719986,
      "learning_rate": 1.8461682103743478e-06,
      "loss": 0.3655,
      "step": 13297
    },
    {
      "epoch": 2.733682803988077,
      "grad_norm": 0.22560839354991913,
      "learning_rate": 1.8433377059306835e-06,
      "loss": 0.3768,
      "step": 13298
    },
    {
      "epoch": 2.7338883749614555,
      "grad_norm": 0.23543007671833038,
      "learning_rate": 1.8405093276140534e-06,
      "loss": 0.4065,
      "step": 13299
    },
    {
      "epoch": 2.734093945934834,
      "grad_norm": 0.2383396327495575,
      "learning_rate": 1.8376830755638013e-06,
      "loss": 0.3916,
      "step": 13300
    },
    {
      "epoch": 2.7342995169082127,
      "grad_norm": 0.22618170082569122,
      "learning_rate": 1.834858949919166e-06,
      "loss": 0.3665,
      "step": 13301
    },
    {
      "epoch": 2.7345050878815913,
      "grad_norm": 0.12377354502677917,
      "learning_rate": 1.8320369508192759e-06,
      "loss": 0.4598,
      "step": 13302
    },
    {
      "epoch": 2.7347106588549694,
      "grad_norm": 0.24000823497772217,
      "learning_rate": 1.8292170784031548e-06,
      "loss": 0.3789,
      "step": 13303
    },
    {
      "epoch": 2.7349162298283485,
      "grad_norm": 0.23978973925113678,
      "learning_rate": 1.8263993328097318e-06,
      "loss": 0.385,
      "step": 13304
    },
    {
      "epoch": 2.7351218008017266,
      "grad_norm": 0.21829509735107422,
      "learning_rate": 1.8235837141778206e-06,
      "loss": 0.3776,
      "step": 13305
    },
    {
      "epoch": 2.735327371775105,
      "grad_norm": 0.2309700846672058,
      "learning_rate": 1.8207702226461305e-06,
      "loss": 0.3829,
      "step": 13306
    },
    {
      "epoch": 2.735532942748484,
      "grad_norm": 0.225687175989151,
      "learning_rate": 1.8179588583532753e-06,
      "loss": 0.3622,
      "step": 13307
    },
    {
      "epoch": 2.7357385137218624,
      "grad_norm": 0.22879765927791595,
      "learning_rate": 1.8151496214377546e-06,
      "loss": 0.3916,
      "step": 13308
    },
    {
      "epoch": 2.735944084695241,
      "grad_norm": 0.23625633120536804,
      "learning_rate": 1.8123425120379672e-06,
      "loss": 0.405,
      "step": 13309
    },
    {
      "epoch": 2.7361496556686196,
      "grad_norm": 0.2239421159029007,
      "learning_rate": 1.809537530292203e-06,
      "loss": 0.3717,
      "step": 13310
    },
    {
      "epoch": 2.736355226641998,
      "grad_norm": 0.23815853893756866,
      "learning_rate": 1.806734676338656e-06,
      "loss": 0.3639,
      "step": 13311
    },
    {
      "epoch": 2.7365607976153767,
      "grad_norm": 0.23043270409107208,
      "learning_rate": 1.8039339503154062e-06,
      "loss": 0.3773,
      "step": 13312
    },
    {
      "epoch": 2.7367663685887553,
      "grad_norm": 0.23327013850212097,
      "learning_rate": 1.801135352360433e-06,
      "loss": 0.3803,
      "step": 13313
    },
    {
      "epoch": 2.736971939562134,
      "grad_norm": 0.2213982492685318,
      "learning_rate": 1.798338882611611e-06,
      "loss": 0.3834,
      "step": 13314
    },
    {
      "epoch": 2.7371775105355125,
      "grad_norm": 0.23775465786457062,
      "learning_rate": 1.7955445412067102e-06,
      "loss": 0.3809,
      "step": 13315
    },
    {
      "epoch": 2.737383081508891,
      "grad_norm": 0.23932182788848877,
      "learning_rate": 1.7927523282833902e-06,
      "loss": 0.3749,
      "step": 13316
    },
    {
      "epoch": 2.7375886524822697,
      "grad_norm": 0.24264240264892578,
      "learning_rate": 1.7899622439792063e-06,
      "loss": 0.378,
      "step": 13317
    },
    {
      "epoch": 2.737794223455648,
      "grad_norm": 0.22181403636932373,
      "learning_rate": 1.7871742884316284e-06,
      "loss": 0.3769,
      "step": 13318
    },
    {
      "epoch": 2.737999794429027,
      "grad_norm": 0.231339693069458,
      "learning_rate": 1.7843884617779917e-06,
      "loss": 0.3888,
      "step": 13319
    },
    {
      "epoch": 2.738205365402405,
      "grad_norm": 0.23549319803714752,
      "learning_rate": 1.7816047641555512e-06,
      "loss": 0.3774,
      "step": 13320
    },
    {
      "epoch": 2.7384109363757836,
      "grad_norm": 0.12438720464706421,
      "learning_rate": 1.7788231957014424e-06,
      "loss": 0.4356,
      "step": 13321
    },
    {
      "epoch": 2.738616507349162,
      "grad_norm": 0.22666363418102264,
      "learning_rate": 1.7760437565526955e-06,
      "loss": 0.3691,
      "step": 13322
    },
    {
      "epoch": 2.7388220783225408,
      "grad_norm": 0.23813243210315704,
      "learning_rate": 1.7732664468462463e-06,
      "loss": 0.3833,
      "step": 13323
    },
    {
      "epoch": 2.7390276492959194,
      "grad_norm": 0.23540125787258148,
      "learning_rate": 1.77049126671891e-06,
      "loss": 0.3851,
      "step": 13324
    },
    {
      "epoch": 2.739233220269298,
      "grad_norm": 0.22330401837825775,
      "learning_rate": 1.7677182163074224e-06,
      "loss": 0.3608,
      "step": 13325
    },
    {
      "epoch": 2.7394387912426765,
      "grad_norm": 0.2364005595445633,
      "learning_rate": 1.7649472957483942e-06,
      "loss": 0.3804,
      "step": 13326
    },
    {
      "epoch": 2.739644362216055,
      "grad_norm": 0.22581815719604492,
      "learning_rate": 1.7621785051783213e-06,
      "loss": 0.3868,
      "step": 13327
    },
    {
      "epoch": 2.7398499331894337,
      "grad_norm": 0.22957521677017212,
      "learning_rate": 1.7594118447336294e-06,
      "loss": 0.3731,
      "step": 13328
    },
    {
      "epoch": 2.7400555041628123,
      "grad_norm": 0.2309531569480896,
      "learning_rate": 1.7566473145506097e-06,
      "loss": 0.3712,
      "step": 13329
    },
    {
      "epoch": 2.740261075136191,
      "grad_norm": 0.22596019506454468,
      "learning_rate": 1.753884914765458e-06,
      "loss": 0.352,
      "step": 13330
    },
    {
      "epoch": 2.7404666461095695,
      "grad_norm": 0.1303359568119049,
      "learning_rate": 1.7511246455142555e-06,
      "loss": 0.4202,
      "step": 13331
    },
    {
      "epoch": 2.740672217082948,
      "grad_norm": 0.22711026668548584,
      "learning_rate": 1.7483665069330086e-06,
      "loss": 0.3873,
      "step": 13332
    },
    {
      "epoch": 2.740877788056326,
      "grad_norm": 0.12200283259153366,
      "learning_rate": 1.7456104991575834e-06,
      "loss": 0.4612,
      "step": 13333
    },
    {
      "epoch": 2.7410833590297052,
      "grad_norm": 0.2212943285703659,
      "learning_rate": 1.7428566223237564e-06,
      "loss": 0.3736,
      "step": 13334
    },
    {
      "epoch": 2.7412889300030834,
      "grad_norm": 0.2410779595375061,
      "learning_rate": 1.740104876567204e-06,
      "loss": 0.3901,
      "step": 13335
    },
    {
      "epoch": 2.741494500976462,
      "grad_norm": 0.229153111577034,
      "learning_rate": 1.737355262023483e-06,
      "loss": 0.3911,
      "step": 13336
    },
    {
      "epoch": 2.7417000719498406,
      "grad_norm": 0.2225484400987625,
      "learning_rate": 1.7346077788280646e-06,
      "loss": 0.3882,
      "step": 13337
    },
    {
      "epoch": 2.741905642923219,
      "grad_norm": 0.23056496679782867,
      "learning_rate": 1.731862427116291e-06,
      "loss": 0.3961,
      "step": 13338
    },
    {
      "epoch": 2.7421112138965977,
      "grad_norm": 0.12282350659370422,
      "learning_rate": 1.7291192070234285e-06,
      "loss": 0.4445,
      "step": 13339
    },
    {
      "epoch": 2.7423167848699763,
      "grad_norm": 0.23378808796405792,
      "learning_rate": 1.7263781186846096e-06,
      "loss": 0.3796,
      "step": 13340
    },
    {
      "epoch": 2.742522355843355,
      "grad_norm": 0.22584164142608643,
      "learning_rate": 1.7236391622348857e-06,
      "loss": 0.3835,
      "step": 13341
    },
    {
      "epoch": 2.7427279268167335,
      "grad_norm": 0.23093900084495544,
      "learning_rate": 1.7209023378091844e-06,
      "loss": 0.3808,
      "step": 13342
    },
    {
      "epoch": 2.742933497790112,
      "grad_norm": 0.22885221242904663,
      "learning_rate": 1.7181676455423425e-06,
      "loss": 0.3878,
      "step": 13343
    },
    {
      "epoch": 2.7431390687634907,
      "grad_norm": 0.11858902126550674,
      "learning_rate": 1.715435085569077e-06,
      "loss": 0.4476,
      "step": 13344
    },
    {
      "epoch": 2.7433446397368693,
      "grad_norm": 0.21561181545257568,
      "learning_rate": 1.712704658024011e-06,
      "loss": 0.3538,
      "step": 13345
    },
    {
      "epoch": 2.743550210710248,
      "grad_norm": 0.12362432479858398,
      "learning_rate": 1.709976363041666e-06,
      "loss": 0.4305,
      "step": 13346
    },
    {
      "epoch": 2.7437557816836264,
      "grad_norm": 0.2305142730474472,
      "learning_rate": 1.7072502007564501e-06,
      "loss": 0.3542,
      "step": 13347
    },
    {
      "epoch": 2.7439613526570046,
      "grad_norm": 0.22127611935138702,
      "learning_rate": 1.7045261713026607e-06,
      "loss": 0.3839,
      "step": 13348
    },
    {
      "epoch": 2.7441669236303836,
      "grad_norm": 0.22477680444717407,
      "learning_rate": 1.7018042748145103e-06,
      "loss": 0.3914,
      "step": 13349
    },
    {
      "epoch": 2.7443724946037618,
      "grad_norm": 0.23768995702266693,
      "learning_rate": 1.6990845114260868e-06,
      "loss": 0.3856,
      "step": 13350
    },
    {
      "epoch": 2.7445780655771403,
      "grad_norm": 0.23600324988365173,
      "learning_rate": 1.696366881271383e-06,
      "loss": 0.3844,
      "step": 13351
    },
    {
      "epoch": 2.744783636550519,
      "grad_norm": 0.19089475274085999,
      "learning_rate": 1.6936513844842767e-06,
      "loss": 0.4466,
      "step": 13352
    },
    {
      "epoch": 2.7449892075238975,
      "grad_norm": 0.12042814493179321,
      "learning_rate": 1.690938021198556e-06,
      "loss": 0.446,
      "step": 13353
    },
    {
      "epoch": 2.745194778497276,
      "grad_norm": 0.23038606345653534,
      "learning_rate": 1.688226791547899e-06,
      "loss": 0.3753,
      "step": 13354
    },
    {
      "epoch": 2.7454003494706547,
      "grad_norm": 0.12141604721546173,
      "learning_rate": 1.6855176956658635e-06,
      "loss": 0.4526,
      "step": 13355
    },
    {
      "epoch": 2.7456059204440333,
      "grad_norm": 0.23495237529277802,
      "learning_rate": 1.6828107336859233e-06,
      "loss": 0.3864,
      "step": 13356
    },
    {
      "epoch": 2.745811491417412,
      "grad_norm": 0.11687915772199631,
      "learning_rate": 1.6801059057414314e-06,
      "loss": 0.4553,
      "step": 13357
    },
    {
      "epoch": 2.7460170623907905,
      "grad_norm": 0.23180118203163147,
      "learning_rate": 1.6774032119656463e-06,
      "loss": 0.3715,
      "step": 13358
    },
    {
      "epoch": 2.746222633364169,
      "grad_norm": 0.2334972769021988,
      "learning_rate": 1.6747026524917114e-06,
      "loss": 0.3905,
      "step": 13359
    },
    {
      "epoch": 2.7464282043375476,
      "grad_norm": 0.11535855382680893,
      "learning_rate": 1.6720042274526754e-06,
      "loss": 0.4416,
      "step": 13360
    },
    {
      "epoch": 2.7466337753109262,
      "grad_norm": 0.2297336459159851,
      "learning_rate": 1.6693079369814819e-06,
      "loss": 0.3875,
      "step": 13361
    },
    {
      "epoch": 2.746839346284305,
      "grad_norm": 0.23500292003154755,
      "learning_rate": 1.6666137812109595e-06,
      "loss": 0.3828,
      "step": 13362
    },
    {
      "epoch": 2.747044917257683,
      "grad_norm": 0.22683893144130707,
      "learning_rate": 1.6639217602738322e-06,
      "loss": 0.3724,
      "step": 13363
    },
    {
      "epoch": 2.747250488231062,
      "grad_norm": 0.22383469343185425,
      "learning_rate": 1.6612318743027288e-06,
      "loss": 0.3791,
      "step": 13364
    },
    {
      "epoch": 2.74745605920444,
      "grad_norm": 0.23103176057338715,
      "learning_rate": 1.6585441234301686e-06,
      "loss": 0.372,
      "step": 13365
    },
    {
      "epoch": 2.7476616301778187,
      "grad_norm": 0.12242773920297623,
      "learning_rate": 1.6558585077885553e-06,
      "loss": 0.431,
      "step": 13366
    },
    {
      "epoch": 2.7478672011511973,
      "grad_norm": 0.24223840236663818,
      "learning_rate": 1.6531750275102082e-06,
      "loss": 0.4015,
      "step": 13367
    },
    {
      "epoch": 2.748072772124576,
      "grad_norm": 0.24244338274002075,
      "learning_rate": 1.6504936827273216e-06,
      "loss": 0.3931,
      "step": 13368
    },
    {
      "epoch": 2.7482783430979545,
      "grad_norm": 0.23767444491386414,
      "learning_rate": 1.6478144735719997e-06,
      "loss": 0.39,
      "step": 13369
    },
    {
      "epoch": 2.748483914071333,
      "grad_norm": 0.2309853583574295,
      "learning_rate": 1.6451374001762272e-06,
      "loss": 0.3812,
      "step": 13370
    },
    {
      "epoch": 2.7486894850447117,
      "grad_norm": 0.2257552295923233,
      "learning_rate": 1.6424624626718982e-06,
      "loss": 0.3695,
      "step": 13371
    },
    {
      "epoch": 2.7488950560180903,
      "grad_norm": 0.22814756631851196,
      "learning_rate": 1.6397896611907925e-06,
      "loss": 0.3859,
      "step": 13372
    },
    {
      "epoch": 2.749100626991469,
      "grad_norm": 0.23302559554576874,
      "learning_rate": 1.63711899586458e-06,
      "loss": 0.389,
      "step": 13373
    },
    {
      "epoch": 2.7493061979648474,
      "grad_norm": 0.1236652210354805,
      "learning_rate": 1.6344504668248401e-06,
      "loss": 0.4524,
      "step": 13374
    },
    {
      "epoch": 2.749511768938226,
      "grad_norm": 0.12643174827098846,
      "learning_rate": 1.6317840742030328e-06,
      "loss": 0.4511,
      "step": 13375
    },
    {
      "epoch": 2.7497173399116046,
      "grad_norm": 0.122630275785923,
      "learning_rate": 1.6291198181305279e-06,
      "loss": 0.4569,
      "step": 13376
    },
    {
      "epoch": 2.749922910884983,
      "grad_norm": 0.12135348469018936,
      "learning_rate": 1.6264576987385705e-06,
      "loss": 0.4401,
      "step": 13377
    },
    {
      "epoch": 2.7501284818583613,
      "grad_norm": 0.2322167158126831,
      "learning_rate": 1.6237977161583157e-06,
      "loss": 0.3739,
      "step": 13378
    },
    {
      "epoch": 2.7503340528317404,
      "grad_norm": 0.22395208477973938,
      "learning_rate": 1.6211398705208086e-06,
      "loss": 0.3799,
      "step": 13379
    },
    {
      "epoch": 2.7505396238051185,
      "grad_norm": 0.23402728140354156,
      "learning_rate": 1.6184841619569847e-06,
      "loss": 0.4011,
      "step": 13380
    },
    {
      "epoch": 2.7507451947784975,
      "grad_norm": 0.23176077008247375,
      "learning_rate": 1.6158305905976839e-06,
      "loss": 0.3742,
      "step": 13381
    },
    {
      "epoch": 2.7509507657518757,
      "grad_norm": 0.23246802389621735,
      "learning_rate": 1.6131791565736322e-06,
      "loss": 0.3758,
      "step": 13382
    },
    {
      "epoch": 2.7511563367252543,
      "grad_norm": 0.23959018290042877,
      "learning_rate": 1.6105298600154545e-06,
      "loss": 0.3795,
      "step": 13383
    },
    {
      "epoch": 2.751361907698633,
      "grad_norm": 0.22550779581069946,
      "learning_rate": 1.6078827010536717e-06,
      "loss": 0.3797,
      "step": 13384
    },
    {
      "epoch": 2.7515674786720115,
      "grad_norm": 0.23018544912338257,
      "learning_rate": 1.6052376798186896e-06,
      "loss": 0.4094,
      "step": 13385
    },
    {
      "epoch": 2.75177304964539,
      "grad_norm": 0.12095669656991959,
      "learning_rate": 1.602594796440824e-06,
      "loss": 0.4569,
      "step": 13386
    },
    {
      "epoch": 2.7519786206187686,
      "grad_norm": 0.22460295259952545,
      "learning_rate": 1.5999540510502653e-06,
      "loss": 0.3634,
      "step": 13387
    },
    {
      "epoch": 2.7521841915921472,
      "grad_norm": 0.23332244157791138,
      "learning_rate": 1.597315443777125e-06,
      "loss": 0.3988,
      "step": 13388
    },
    {
      "epoch": 2.752389762565526,
      "grad_norm": 0.22451160848140717,
      "learning_rate": 1.5946789747513935e-06,
      "loss": 0.3883,
      "step": 13389
    },
    {
      "epoch": 2.7525953335389044,
      "grad_norm": 0.23024022579193115,
      "learning_rate": 1.5920446441029474e-06,
      "loss": 0.3961,
      "step": 13390
    },
    {
      "epoch": 2.752800904512283,
      "grad_norm": 0.2318277209997177,
      "learning_rate": 1.5894124519615678e-06,
      "loss": 0.3562,
      "step": 13391
    },
    {
      "epoch": 2.7530064754856616,
      "grad_norm": 0.22565960884094238,
      "learning_rate": 1.5867823984569458e-06,
      "loss": 0.3716,
      "step": 13392
    },
    {
      "epoch": 2.7532120464590397,
      "grad_norm": 0.23131342232227325,
      "learning_rate": 1.5841544837186428e-06,
      "loss": 0.3782,
      "step": 13393
    },
    {
      "epoch": 2.7534176174324188,
      "grad_norm": 0.24259567260742188,
      "learning_rate": 1.5815287078761155e-06,
      "loss": 0.3828,
      "step": 13394
    },
    {
      "epoch": 2.753623188405797,
      "grad_norm": 0.23125715553760529,
      "learning_rate": 1.578905071058735e-06,
      "loss": 0.3934,
      "step": 13395
    },
    {
      "epoch": 2.753828759379176,
      "grad_norm": 0.22932687401771545,
      "learning_rate": 1.5762835733957531e-06,
      "loss": 0.3589,
      "step": 13396
    },
    {
      "epoch": 2.754034330352554,
      "grad_norm": 0.22249168157577515,
      "learning_rate": 1.5736642150163168e-06,
      "loss": 0.3756,
      "step": 13397
    },
    {
      "epoch": 2.7542399013259327,
      "grad_norm": 0.23607565462589264,
      "learning_rate": 1.5710469960494723e-06,
      "loss": 0.362,
      "step": 13398
    },
    {
      "epoch": 2.7544454722993112,
      "grad_norm": 0.2208351343870163,
      "learning_rate": 1.5684319166241568e-06,
      "loss": 0.3629,
      "step": 13399
    },
    {
      "epoch": 2.75465104327269,
      "grad_norm": 0.22543267905712128,
      "learning_rate": 1.5658189768691923e-06,
      "loss": 0.3795,
      "step": 13400
    },
    {
      "epoch": 2.7548566142460684,
      "grad_norm": 0.233917698264122,
      "learning_rate": 1.5632081769133255e-06,
      "loss": 0.4046,
      "step": 13401
    },
    {
      "epoch": 2.755062185219447,
      "grad_norm": 0.23534435033798218,
      "learning_rate": 1.560599516885169e-06,
      "loss": 0.3964,
      "step": 13402
    },
    {
      "epoch": 2.7552677561928256,
      "grad_norm": 0.2306138277053833,
      "learning_rate": 1.5579929969132395e-06,
      "loss": 0.3845,
      "step": 13403
    },
    {
      "epoch": 2.755473327166204,
      "grad_norm": 0.23294024169445038,
      "learning_rate": 1.5553886171259446e-06,
      "loss": 0.3764,
      "step": 13404
    },
    {
      "epoch": 2.7556788981395828,
      "grad_norm": 0.23482374846935272,
      "learning_rate": 1.5527863776515918e-06,
      "loss": 0.3821,
      "step": 13405
    },
    {
      "epoch": 2.7558844691129614,
      "grad_norm": 0.12278321385383606,
      "learning_rate": 1.550186278618388e-06,
      "loss": 0.4522,
      "step": 13406
    },
    {
      "epoch": 2.75609004008634,
      "grad_norm": 0.23249104619026184,
      "learning_rate": 1.5475883201544111e-06,
      "loss": 0.3943,
      "step": 13407
    },
    {
      "epoch": 2.756295611059718,
      "grad_norm": 0.23344482481479645,
      "learning_rate": 1.544992502387669e-06,
      "loss": 0.3739,
      "step": 13408
    },
    {
      "epoch": 2.756501182033097,
      "grad_norm": 0.22119440138339996,
      "learning_rate": 1.5423988254460386e-06,
      "loss": 0.3884,
      "step": 13409
    },
    {
      "epoch": 2.7567067530064753,
      "grad_norm": 0.2359190583229065,
      "learning_rate": 1.5398072894572984e-06,
      "loss": 0.3688,
      "step": 13410
    },
    {
      "epoch": 2.7569123239798543,
      "grad_norm": 0.23092953860759735,
      "learning_rate": 1.537217894549121e-06,
      "loss": 0.3673,
      "step": 13411
    },
    {
      "epoch": 2.7571178949532325,
      "grad_norm": 0.23413263261318207,
      "learning_rate": 1.5346306408490697e-06,
      "loss": 0.3826,
      "step": 13412
    },
    {
      "epoch": 2.757323465926611,
      "grad_norm": 0.23555617034435272,
      "learning_rate": 1.532045528484612e-06,
      "loss": 0.3885,
      "step": 13413
    },
    {
      "epoch": 2.7575290368999896,
      "grad_norm": 0.2366188019514084,
      "learning_rate": 1.5294625575831012e-06,
      "loss": 0.4028,
      "step": 13414
    },
    {
      "epoch": 2.757734607873368,
      "grad_norm": 0.23909413814544678,
      "learning_rate": 1.5268817282717857e-06,
      "loss": 0.387,
      "step": 13415
    },
    {
      "epoch": 2.757940178846747,
      "grad_norm": 0.11871679872274399,
      "learning_rate": 1.5243030406778237e-06,
      "loss": 0.4433,
      "step": 13416
    },
    {
      "epoch": 2.7581457498201254,
      "grad_norm": 0.24047636985778809,
      "learning_rate": 1.5217264949282384e-06,
      "loss": 0.368,
      "step": 13417
    },
    {
      "epoch": 2.758351320793504,
      "grad_norm": 0.1217452734708786,
      "learning_rate": 1.5191520911499786e-06,
      "loss": 0.4494,
      "step": 13418
    },
    {
      "epoch": 2.7585568917668826,
      "grad_norm": 0.22655776143074036,
      "learning_rate": 1.5165798294698625e-06,
      "loss": 0.3789,
      "step": 13419
    },
    {
      "epoch": 2.758762462740261,
      "grad_norm": 0.12003947049379349,
      "learning_rate": 1.5140097100146188e-06,
      "loss": 0.4444,
      "step": 13420
    },
    {
      "epoch": 2.7589680337136397,
      "grad_norm": 0.24004173278808594,
      "learning_rate": 1.5114417329108565e-06,
      "loss": 0.3891,
      "step": 13421
    },
    {
      "epoch": 2.7591736046870183,
      "grad_norm": 0.24962955713272095,
      "learning_rate": 1.5088758982851042e-06,
      "loss": 0.3885,
      "step": 13422
    },
    {
      "epoch": 2.7593791756603965,
      "grad_norm": 0.22153249382972717,
      "learning_rate": 1.5063122062637558e-06,
      "loss": 0.3772,
      "step": 13423
    },
    {
      "epoch": 2.7595847466337755,
      "grad_norm": 0.2372589111328125,
      "learning_rate": 1.5037506569731202e-06,
      "loss": 0.3729,
      "step": 13424
    },
    {
      "epoch": 2.7597903176071537,
      "grad_norm": 0.2346244603395462,
      "learning_rate": 1.5011912505393867e-06,
      "loss": 0.4007,
      "step": 13425
    },
    {
      "epoch": 2.7599958885805327,
      "grad_norm": 0.22780828177928925,
      "learning_rate": 1.498633987088644e-06,
      "loss": 0.3916,
      "step": 13426
    },
    {
      "epoch": 2.760201459553911,
      "grad_norm": 0.23186977207660675,
      "learning_rate": 1.4960788667468816e-06,
      "loss": 0.384,
      "step": 13427
    },
    {
      "epoch": 2.7604070305272894,
      "grad_norm": 0.22554920613765717,
      "learning_rate": 1.493525889639974e-06,
      "loss": 0.3773,
      "step": 13428
    },
    {
      "epoch": 2.760612601500668,
      "grad_norm": 0.22903324663639069,
      "learning_rate": 1.4909750558937003e-06,
      "loss": 0.3697,
      "step": 13429
    },
    {
      "epoch": 2.7608181724740466,
      "grad_norm": 0.23504561185836792,
      "learning_rate": 1.488426365633725e-06,
      "loss": 0.3907,
      "step": 13430
    },
    {
      "epoch": 2.761023743447425,
      "grad_norm": 0.2242177128791809,
      "learning_rate": 1.4858798189856076e-06,
      "loss": 0.3697,
      "step": 13431
    },
    {
      "epoch": 2.7612293144208038,
      "grad_norm": 0.21883516013622284,
      "learning_rate": 1.4833354160748131e-06,
      "loss": 0.4012,
      "step": 13432
    },
    {
      "epoch": 2.7614348853941824,
      "grad_norm": 0.23364631831645966,
      "learning_rate": 1.480793157026676e-06,
      "loss": 0.403,
      "step": 13433
    },
    {
      "epoch": 2.761640456367561,
      "grad_norm": 0.21907542645931244,
      "learning_rate": 1.478253041966461e-06,
      "loss": 0.3822,
      "step": 13434
    },
    {
      "epoch": 2.7618460273409395,
      "grad_norm": 0.22405709326267242,
      "learning_rate": 1.475715071019293e-06,
      "loss": 0.3756,
      "step": 13435
    },
    {
      "epoch": 2.762051598314318,
      "grad_norm": 0.2236599624156952,
      "learning_rate": 1.473179244310212e-06,
      "loss": 0.3914,
      "step": 13436
    },
    {
      "epoch": 2.7622571692876967,
      "grad_norm": 0.22199760377407074,
      "learning_rate": 1.4706455619641485e-06,
      "loss": 0.3774,
      "step": 13437
    },
    {
      "epoch": 2.7624627402610753,
      "grad_norm": 0.12080203741788864,
      "learning_rate": 1.4681140241059221e-06,
      "loss": 0.4438,
      "step": 13438
    },
    {
      "epoch": 2.762668311234454,
      "grad_norm": 0.22729521989822388,
      "learning_rate": 1.4655846308602483e-06,
      "loss": 0.378,
      "step": 13439
    },
    {
      "epoch": 2.762873882207832,
      "grad_norm": 0.23560449481010437,
      "learning_rate": 1.4630573823517425e-06,
      "loss": 0.3705,
      "step": 13440
    },
    {
      "epoch": 2.763079453181211,
      "grad_norm": 0.22415785491466522,
      "learning_rate": 1.4605322787049097e-06,
      "loss": 0.3701,
      "step": 13441
    },
    {
      "epoch": 2.763285024154589,
      "grad_norm": 0.12505774199962616,
      "learning_rate": 1.4580093200441408e-06,
      "loss": 0.4395,
      "step": 13442
    },
    {
      "epoch": 2.763490595127968,
      "grad_norm": 0.23564325273036957,
      "learning_rate": 1.4554885064937462e-06,
      "loss": 0.4002,
      "step": 13443
    },
    {
      "epoch": 2.7636961661013464,
      "grad_norm": 0.2280401885509491,
      "learning_rate": 1.4529698381779067e-06,
      "loss": 0.3785,
      "step": 13444
    },
    {
      "epoch": 2.763901737074725,
      "grad_norm": 0.2783918082714081,
      "learning_rate": 1.4504533152207028e-06,
      "loss": 0.3812,
      "step": 13445
    },
    {
      "epoch": 2.7641073080481036,
      "grad_norm": 0.2317892611026764,
      "learning_rate": 1.4479389377461105e-06,
      "loss": 0.3993,
      "step": 13446
    },
    {
      "epoch": 2.764312879021482,
      "grad_norm": 0.23319129645824432,
      "learning_rate": 1.4454267058780108e-06,
      "loss": 0.3947,
      "step": 13447
    },
    {
      "epoch": 2.7645184499948607,
      "grad_norm": 0.23588362336158752,
      "learning_rate": 1.4429166197401594e-06,
      "loss": 0.389,
      "step": 13448
    },
    {
      "epoch": 2.7647240209682393,
      "grad_norm": 0.23121041059494019,
      "learning_rate": 1.4404086794562177e-06,
      "loss": 0.3932,
      "step": 13449
    },
    {
      "epoch": 2.764929591941618,
      "grad_norm": 0.22811183333396912,
      "learning_rate": 1.4379028851497516e-06,
      "loss": 0.371,
      "step": 13450
    },
    {
      "epoch": 2.7651351629149965,
      "grad_norm": 0.24577990174293518,
      "learning_rate": 1.4353992369441976e-06,
      "loss": 0.3868,
      "step": 13451
    },
    {
      "epoch": 2.765340733888375,
      "grad_norm": 0.22762644290924072,
      "learning_rate": 1.4328977349629019e-06,
      "loss": 0.3735,
      "step": 13452
    },
    {
      "epoch": 2.7655463048617537,
      "grad_norm": 0.23274122178554535,
      "learning_rate": 1.430398379329106e-06,
      "loss": 0.3751,
      "step": 13453
    },
    {
      "epoch": 2.7657518758351323,
      "grad_norm": 0.23309700191020966,
      "learning_rate": 1.4279011701659362e-06,
      "loss": 0.3838,
      "step": 13454
    },
    {
      "epoch": 2.7659574468085104,
      "grad_norm": 0.23482760787010193,
      "learning_rate": 1.4254061075964143e-06,
      "loss": 0.3797,
      "step": 13455
    },
    {
      "epoch": 2.7661630177818894,
      "grad_norm": 0.11869847029447556,
      "learning_rate": 1.4229131917434769e-06,
      "loss": 0.4533,
      "step": 13456
    },
    {
      "epoch": 2.7663685887552676,
      "grad_norm": 0.12506672739982605,
      "learning_rate": 1.4204224227299156e-06,
      "loss": 0.4454,
      "step": 13457
    },
    {
      "epoch": 2.766574159728646,
      "grad_norm": 0.12263701856136322,
      "learning_rate": 1.4179338006784626e-06,
      "loss": 0.4483,
      "step": 13458
    },
    {
      "epoch": 2.7667797307020248,
      "grad_norm": 0.24314455687999725,
      "learning_rate": 1.4154473257117047e-06,
      "loss": 0.3715,
      "step": 13459
    },
    {
      "epoch": 2.7669853016754034,
      "grad_norm": 0.22938649356365204,
      "learning_rate": 1.4129629979521436e-06,
      "loss": 0.3668,
      "step": 13460
    },
    {
      "epoch": 2.767190872648782,
      "grad_norm": 0.23843181133270264,
      "learning_rate": 1.4104808175221717e-06,
      "loss": 0.3938,
      "step": 13461
    },
    {
      "epoch": 2.7673964436221605,
      "grad_norm": 0.22452838718891144,
      "learning_rate": 1.4080007845440713e-06,
      "loss": 0.3485,
      "step": 13462
    },
    {
      "epoch": 2.767602014595539,
      "grad_norm": 0.2309243083000183,
      "learning_rate": 1.4055228991400193e-06,
      "loss": 0.3835,
      "step": 13463
    },
    {
      "epoch": 2.7678075855689177,
      "grad_norm": 0.24043014645576477,
      "learning_rate": 1.4030471614320984e-06,
      "loss": 0.3677,
      "step": 13464
    },
    {
      "epoch": 2.7680131565422963,
      "grad_norm": 0.24299444258213043,
      "learning_rate": 1.4005735715422757e-06,
      "loss": 0.392,
      "step": 13465
    },
    {
      "epoch": 2.768218727515675,
      "grad_norm": 0.2283448427915573,
      "learning_rate": 1.3981021295924091e-06,
      "loss": 0.3609,
      "step": 13466
    },
    {
      "epoch": 2.7684242984890535,
      "grad_norm": 0.2528086304664612,
      "learning_rate": 1.395632835704251e-06,
      "loss": 0.3771,
      "step": 13467
    },
    {
      "epoch": 2.768629869462432,
      "grad_norm": 0.23460538685321808,
      "learning_rate": 1.393165689999464e-06,
      "loss": 0.3766,
      "step": 13468
    },
    {
      "epoch": 2.7688354404358106,
      "grad_norm": 0.23330725729465485,
      "learning_rate": 1.390700692599576e-06,
      "loss": 0.3756,
      "step": 13469
    },
    {
      "epoch": 2.769041011409189,
      "grad_norm": 0.11881226301193237,
      "learning_rate": 1.3882378436260396e-06,
      "loss": 0.4427,
      "step": 13470
    },
    {
      "epoch": 2.769246582382568,
      "grad_norm": 0.2295810878276825,
      "learning_rate": 1.3857771432001881e-06,
      "loss": 0.374,
      "step": 13471
    },
    {
      "epoch": 2.769452153355946,
      "grad_norm": 0.12282148748636246,
      "learning_rate": 1.3833185914432396e-06,
      "loss": 0.4614,
      "step": 13472
    },
    {
      "epoch": 2.7696577243293246,
      "grad_norm": 0.2301100343465805,
      "learning_rate": 1.3808621884763218e-06,
      "loss": 0.3805,
      "step": 13473
    },
    {
      "epoch": 2.769863295302703,
      "grad_norm": 0.23633736371994019,
      "learning_rate": 1.378407934420448e-06,
      "loss": 0.3947,
      "step": 13474
    },
    {
      "epoch": 2.7700688662760817,
      "grad_norm": 0.22140897810459137,
      "learning_rate": 1.375955829396532e-06,
      "loss": 0.3804,
      "step": 13475
    },
    {
      "epoch": 2.7702744372494603,
      "grad_norm": 0.12200979143381119,
      "learning_rate": 1.3735058735253663e-06,
      "loss": 0.4414,
      "step": 13476
    },
    {
      "epoch": 2.770480008222839,
      "grad_norm": 0.23393046855926514,
      "learning_rate": 1.3710580669276601e-06,
      "loss": 0.3847,
      "step": 13477
    },
    {
      "epoch": 2.7706855791962175,
      "grad_norm": 0.1250157356262207,
      "learning_rate": 1.3686124097240066e-06,
      "loss": 0.4526,
      "step": 13478
    },
    {
      "epoch": 2.770891150169596,
      "grad_norm": 0.2439979910850525,
      "learning_rate": 1.3661689020348795e-06,
      "loss": 0.4,
      "step": 13479
    },
    {
      "epoch": 2.7710967211429747,
      "grad_norm": 0.23264381289482117,
      "learning_rate": 1.3637275439806723e-06,
      "loss": 0.3863,
      "step": 13480
    },
    {
      "epoch": 2.7713022921163533,
      "grad_norm": 0.23298904299736023,
      "learning_rate": 1.3612883356816493e-06,
      "loss": 0.3768,
      "step": 13481
    },
    {
      "epoch": 2.771507863089732,
      "grad_norm": 0.23129281401634216,
      "learning_rate": 1.3588512772579887e-06,
      "loss": 0.3775,
      "step": 13482
    },
    {
      "epoch": 2.7717134340631104,
      "grad_norm": 0.22436164319515228,
      "learning_rate": 1.3564163688297398e-06,
      "loss": 0.386,
      "step": 13483
    },
    {
      "epoch": 2.771919005036489,
      "grad_norm": 0.12501150369644165,
      "learning_rate": 1.353983610516872e-06,
      "loss": 0.457,
      "step": 13484
    },
    {
      "epoch": 2.772124576009867,
      "grad_norm": 0.22815532982349396,
      "learning_rate": 1.3515530024392286e-06,
      "loss": 0.3907,
      "step": 13485
    },
    {
      "epoch": 2.772330146983246,
      "grad_norm": 0.23359054327011108,
      "learning_rate": 1.3491245447165596e-06,
      "loss": 0.38,
      "step": 13486
    },
    {
      "epoch": 2.7725357179566243,
      "grad_norm": 0.23818518221378326,
      "learning_rate": 1.3466982374684988e-06,
      "loss": 0.3788,
      "step": 13487
    },
    {
      "epoch": 2.772741288930003,
      "grad_norm": 0.2215246707201004,
      "learning_rate": 1.344274080814586e-06,
      "loss": 0.3717,
      "step": 13488
    },
    {
      "epoch": 2.7729468599033815,
      "grad_norm": 0.2501251697540283,
      "learning_rate": 1.3418520748742352e-06,
      "loss": 0.3799,
      "step": 13489
    },
    {
      "epoch": 2.77315243087676,
      "grad_norm": 0.22703197598457336,
      "learning_rate": 1.3394322197667763e-06,
      "loss": 0.3996,
      "step": 13490
    },
    {
      "epoch": 2.7733580018501387,
      "grad_norm": 0.1154903993010521,
      "learning_rate": 1.3370145156114239e-06,
      "loss": 0.4539,
      "step": 13491
    },
    {
      "epoch": 2.7735635728235173,
      "grad_norm": 0.23296396434307098,
      "learning_rate": 1.3345989625272875e-06,
      "loss": 0.397,
      "step": 13492
    },
    {
      "epoch": 2.773769143796896,
      "grad_norm": 0.23191124200820923,
      "learning_rate": 1.3321855606333673e-06,
      "loss": 0.3711,
      "step": 13493
    },
    {
      "epoch": 2.7739747147702745,
      "grad_norm": 0.22586466372013092,
      "learning_rate": 1.3297743100485627e-06,
      "loss": 0.3726,
      "step": 13494
    },
    {
      "epoch": 2.774180285743653,
      "grad_norm": 0.23403707146644592,
      "learning_rate": 1.327365210891664e-06,
      "loss": 0.3872,
      "step": 13495
    },
    {
      "epoch": 2.7743858567170316,
      "grad_norm": 0.2312314212322235,
      "learning_rate": 1.3249582632813563e-06,
      "loss": 0.3814,
      "step": 13496
    },
    {
      "epoch": 2.7745914276904102,
      "grad_norm": 0.12255984544754028,
      "learning_rate": 1.3225534673362144e-06,
      "loss": 0.4595,
      "step": 13497
    },
    {
      "epoch": 2.774796998663789,
      "grad_norm": 0.24602609872817993,
      "learning_rate": 1.320150823174719e-06,
      "loss": 0.3767,
      "step": 13498
    },
    {
      "epoch": 2.7750025696371674,
      "grad_norm": 0.22925207018852234,
      "learning_rate": 1.3177503309152351e-06,
      "loss": 0.3627,
      "step": 13499
    },
    {
      "epoch": 2.7752081406105455,
      "grad_norm": 0.2292235791683197,
      "learning_rate": 1.3153519906760132e-06,
      "loss": 0.3868,
      "step": 13500
    },
    {
      "epoch": 2.7754137115839246,
      "grad_norm": 0.2379998415708542,
      "learning_rate": 1.3129558025752236e-06,
      "loss": 0.3589,
      "step": 13501
    },
    {
      "epoch": 2.7756192825573027,
      "grad_norm": 0.12553149461746216,
      "learning_rate": 1.3105617667309124e-06,
      "loss": 0.4474,
      "step": 13502
    },
    {
      "epoch": 2.7758248535306813,
      "grad_norm": 0.12078402936458588,
      "learning_rate": 1.3081698832610146e-06,
      "loss": 0.4486,
      "step": 13503
    },
    {
      "epoch": 2.77603042450406,
      "grad_norm": 0.22506415843963623,
      "learning_rate": 1.3057801522833662e-06,
      "loss": 0.3689,
      "step": 13504
    },
    {
      "epoch": 2.7762359954774385,
      "grad_norm": 0.12277916818857193,
      "learning_rate": 1.3033925739157133e-06,
      "loss": 0.447,
      "step": 13505
    },
    {
      "epoch": 2.776441566450817,
      "grad_norm": 0.2416677474975586,
      "learning_rate": 1.3010071482756665e-06,
      "loss": 0.3757,
      "step": 13506
    },
    {
      "epoch": 2.7766471374241957,
      "grad_norm": 0.23068147897720337,
      "learning_rate": 1.2986238754807518e-06,
      "loss": 0.371,
      "step": 13507
    },
    {
      "epoch": 2.7768527083975743,
      "grad_norm": 0.23163877427577972,
      "learning_rate": 1.2962427556483753e-06,
      "loss": 0.358,
      "step": 13508
    },
    {
      "epoch": 2.777058279370953,
      "grad_norm": 0.22567118704319,
      "learning_rate": 1.2938637888958482e-06,
      "loss": 0.3855,
      "step": 13509
    },
    {
      "epoch": 2.7772638503443314,
      "grad_norm": 0.2283574789762497,
      "learning_rate": 1.2914869753403718e-06,
      "loss": 0.3802,
      "step": 13510
    },
    {
      "epoch": 2.77746942131771,
      "grad_norm": 0.1205214112997055,
      "learning_rate": 1.2891123150990376e-06,
      "loss": 0.4479,
      "step": 13511
    },
    {
      "epoch": 2.7776749922910886,
      "grad_norm": 0.12035630643367767,
      "learning_rate": 1.2867398082888366e-06,
      "loss": 0.4525,
      "step": 13512
    },
    {
      "epoch": 2.777880563264467,
      "grad_norm": 0.22281195223331451,
      "learning_rate": 1.2843694550266506e-06,
      "loss": 0.3897,
      "step": 13513
    },
    {
      "epoch": 2.778086134237846,
      "grad_norm": 0.22318775951862335,
      "learning_rate": 1.282001255429251e-06,
      "loss": 0.3767,
      "step": 13514
    },
    {
      "epoch": 2.778291705211224,
      "grad_norm": 0.11824406683444977,
      "learning_rate": 1.2796352096133195e-06,
      "loss": 0.4477,
      "step": 13515
    },
    {
      "epoch": 2.778497276184603,
      "grad_norm": 0.2342909723520279,
      "learning_rate": 1.2772713176954082e-06,
      "loss": 0.3699,
      "step": 13516
    },
    {
      "epoch": 2.778702847157981,
      "grad_norm": 0.2417282909154892,
      "learning_rate": 1.2749095797919785e-06,
      "loss": 0.3834,
      "step": 13517
    },
    {
      "epoch": 2.7789084181313597,
      "grad_norm": 0.2351347953081131,
      "learning_rate": 1.2725499960193826e-06,
      "loss": 0.3835,
      "step": 13518
    },
    {
      "epoch": 2.7791139891047383,
      "grad_norm": 0.11795809119939804,
      "learning_rate": 1.2701925664938675e-06,
      "loss": 0.4358,
      "step": 13519
    },
    {
      "epoch": 2.779319560078117,
      "grad_norm": 0.2242356687784195,
      "learning_rate": 1.267837291331575e-06,
      "loss": 0.3773,
      "step": 13520
    },
    {
      "epoch": 2.7795251310514955,
      "grad_norm": 0.23602786660194397,
      "learning_rate": 1.2654841706485326e-06,
      "loss": 0.3895,
      "step": 13521
    },
    {
      "epoch": 2.779730702024874,
      "grad_norm": 0.23076754808425903,
      "learning_rate": 1.2631332045606725e-06,
      "loss": 0.3835,
      "step": 13522
    },
    {
      "epoch": 2.7799362729982526,
      "grad_norm": 0.11837997287511826,
      "learning_rate": 1.260784393183812e-06,
      "loss": 0.4325,
      "step": 13523
    },
    {
      "epoch": 2.780141843971631,
      "grad_norm": 0.23414716124534607,
      "learning_rate": 1.2584377366336687e-06,
      "loss": 0.3697,
      "step": 13524
    },
    {
      "epoch": 2.78034741494501,
      "grad_norm": 0.2339978665113449,
      "learning_rate": 1.2560932350258498e-06,
      "loss": 0.3645,
      "step": 13525
    },
    {
      "epoch": 2.7805529859183884,
      "grad_norm": 0.22943206131458282,
      "learning_rate": 1.2537508884758581e-06,
      "loss": 0.3743,
      "step": 13526
    },
    {
      "epoch": 2.780758556891767,
      "grad_norm": 0.24171233177185059,
      "learning_rate": 1.2514106970990962e-06,
      "loss": 0.3866,
      "step": 13527
    },
    {
      "epoch": 2.7809641278651456,
      "grad_norm": 0.2301642745733261,
      "learning_rate": 1.2490726610108423e-06,
      "loss": 0.3776,
      "step": 13528
    },
    {
      "epoch": 2.781169698838524,
      "grad_norm": 0.23848628997802734,
      "learning_rate": 1.2467367803262937e-06,
      "loss": 0.378,
      "step": 13529
    },
    {
      "epoch": 2.7813752698119023,
      "grad_norm": 0.23197340965270996,
      "learning_rate": 1.2444030551605185e-06,
      "loss": 0.3848,
      "step": 13530
    },
    {
      "epoch": 2.7815808407852813,
      "grad_norm": 0.2298150360584259,
      "learning_rate": 1.24207148562849e-06,
      "loss": 0.3695,
      "step": 13531
    },
    {
      "epoch": 2.7817864117586595,
      "grad_norm": 0.23591184616088867,
      "learning_rate": 1.2397420718450708e-06,
      "loss": 0.3615,
      "step": 13532
    },
    {
      "epoch": 2.781991982732038,
      "grad_norm": 0.23564256727695465,
      "learning_rate": 1.2374148139250348e-06,
      "loss": 0.3773,
      "step": 13533
    },
    {
      "epoch": 2.7821975537054167,
      "grad_norm": 0.2334429770708084,
      "learning_rate": 1.2350897119830195e-06,
      "loss": 0.3737,
      "step": 13534
    },
    {
      "epoch": 2.7824031246787952,
      "grad_norm": 0.12291624397039413,
      "learning_rate": 1.232766766133579e-06,
      "loss": 0.4507,
      "step": 13535
    },
    {
      "epoch": 2.782608695652174,
      "grad_norm": 0.23279529809951782,
      "learning_rate": 1.2304459764911514e-06,
      "loss": 0.385,
      "step": 13536
    },
    {
      "epoch": 2.7828142666255524,
      "grad_norm": 0.22842784225940704,
      "learning_rate": 1.2281273431700752e-06,
      "loss": 0.3793,
      "step": 13537
    },
    {
      "epoch": 2.783019837598931,
      "grad_norm": 0.2227255403995514,
      "learning_rate": 1.225810866284574e-06,
      "loss": 0.3711,
      "step": 13538
    },
    {
      "epoch": 2.7832254085723096,
      "grad_norm": 0.21851521730422974,
      "learning_rate": 1.2234965459487668e-06,
      "loss": 0.3655,
      "step": 13539
    },
    {
      "epoch": 2.783430979545688,
      "grad_norm": 0.5399391651153564,
      "learning_rate": 1.2211843822766771e-06,
      "loss": 0.3967,
      "step": 13540
    },
    {
      "epoch": 2.7836365505190668,
      "grad_norm": 0.21860133111476898,
      "learning_rate": 1.218874375382214e-06,
      "loss": 0.3758,
      "step": 13541
    },
    {
      "epoch": 2.7838421214924454,
      "grad_norm": 0.2336316704750061,
      "learning_rate": 1.2165665253791764e-06,
      "loss": 0.393,
      "step": 13542
    },
    {
      "epoch": 2.784047692465824,
      "grad_norm": 0.22940628230571747,
      "learning_rate": 1.2142608323812582e-06,
      "loss": 0.3921,
      "step": 13543
    },
    {
      "epoch": 2.7842532634392025,
      "grad_norm": 0.12518246471881866,
      "learning_rate": 1.2119572965020588e-06,
      "loss": 0.4595,
      "step": 13544
    },
    {
      "epoch": 2.7844588344125807,
      "grad_norm": 0.12655936181545258,
      "learning_rate": 1.209655917855057e-06,
      "loss": 0.4395,
      "step": 13545
    },
    {
      "epoch": 2.7846644053859597,
      "grad_norm": 0.22794488072395325,
      "learning_rate": 1.2073566965536327e-06,
      "loss": 0.4002,
      "step": 13546
    },
    {
      "epoch": 2.784869976359338,
      "grad_norm": 0.23608453571796417,
      "learning_rate": 1.2050596327110598e-06,
      "loss": 0.3706,
      "step": 13547
    },
    {
      "epoch": 2.785075547332717,
      "grad_norm": 0.22875483334064484,
      "learning_rate": 1.202764726440503e-06,
      "loss": 0.3529,
      "step": 13548
    },
    {
      "epoch": 2.785281118306095,
      "grad_norm": 0.23389698565006256,
      "learning_rate": 1.2004719778550167e-06,
      "loss": 0.3826,
      "step": 13549
    },
    {
      "epoch": 2.7854866892794736,
      "grad_norm": 0.1383344829082489,
      "learning_rate": 1.1981813870675608e-06,
      "loss": 0.4421,
      "step": 13550
    },
    {
      "epoch": 2.785692260252852,
      "grad_norm": 0.2320520430803299,
      "learning_rate": 1.1958929541909798e-06,
      "loss": 0.3664,
      "step": 13551
    },
    {
      "epoch": 2.785897831226231,
      "grad_norm": 0.236195906996727,
      "learning_rate": 1.1936066793380035e-06,
      "loss": 0.3693,
      "step": 13552
    },
    {
      "epoch": 2.7861034021996094,
      "grad_norm": 0.23397257924079895,
      "learning_rate": 1.191322562621287e-06,
      "loss": 0.377,
      "step": 13553
    },
    {
      "epoch": 2.786308973172988,
      "grad_norm": 0.23099073767662048,
      "learning_rate": 1.1890406041533404e-06,
      "loss": 0.3729,
      "step": 13554
    },
    {
      "epoch": 2.7865145441463666,
      "grad_norm": 0.23826487362384796,
      "learning_rate": 1.1867608040465933e-06,
      "loss": 0.3772,
      "step": 13555
    },
    {
      "epoch": 2.786720115119745,
      "grad_norm": 0.23329326510429382,
      "learning_rate": 1.1844831624133611e-06,
      "loss": 0.3604,
      "step": 13556
    },
    {
      "epoch": 2.7869256860931237,
      "grad_norm": 0.22708694636821747,
      "learning_rate": 1.1822076793658493e-06,
      "loss": 0.3632,
      "step": 13557
    },
    {
      "epoch": 2.7871312570665023,
      "grad_norm": 0.23591133952140808,
      "learning_rate": 1.179934355016158e-06,
      "loss": 0.3962,
      "step": 13558
    },
    {
      "epoch": 2.787336828039881,
      "grad_norm": 0.23737779259681702,
      "learning_rate": 1.1776631894762874e-06,
      "loss": 0.37,
      "step": 13559
    },
    {
      "epoch": 2.787542399013259,
      "grad_norm": 0.22978876531124115,
      "learning_rate": 1.1753941828581283e-06,
      "loss": 0.3697,
      "step": 13560
    },
    {
      "epoch": 2.787747969986638,
      "grad_norm": 0.22215284407138824,
      "learning_rate": 1.1731273352734612e-06,
      "loss": 0.38,
      "step": 13561
    },
    {
      "epoch": 2.7879535409600162,
      "grad_norm": 0.22937338054180145,
      "learning_rate": 1.1708626468339619e-06,
      "loss": 0.3759,
      "step": 13562
    },
    {
      "epoch": 2.7881591119333953,
      "grad_norm": 0.2433684915304184,
      "learning_rate": 1.1686001176512108e-06,
      "loss": 0.383,
      "step": 13563
    },
    {
      "epoch": 2.7883646829067734,
      "grad_norm": 0.2298046201467514,
      "learning_rate": 1.1663397478366539e-06,
      "loss": 0.3739,
      "step": 13564
    },
    {
      "epoch": 2.788570253880152,
      "grad_norm": 0.12105909734964371,
      "learning_rate": 1.1640815375016623e-06,
      "loss": 0.4447,
      "step": 13565
    },
    {
      "epoch": 2.7887758248535306,
      "grad_norm": 0.22252750396728516,
      "learning_rate": 1.1618254867574918e-06,
      "loss": 0.3771,
      "step": 13566
    },
    {
      "epoch": 2.788981395826909,
      "grad_norm": 0.2337454855442047,
      "learning_rate": 1.1595715957152686e-06,
      "loss": 0.3896,
      "step": 13567
    },
    {
      "epoch": 2.7891869668002878,
      "grad_norm": 0.23451459407806396,
      "learning_rate": 1.157319864486054e-06,
      "loss": 0.3887,
      "step": 13568
    },
    {
      "epoch": 2.7893925377736664,
      "grad_norm": 0.2217395305633545,
      "learning_rate": 1.155070293180764e-06,
      "loss": 0.38,
      "step": 13569
    },
    {
      "epoch": 2.789598108747045,
      "grad_norm": 0.23397988080978394,
      "learning_rate": 1.1528228819102348e-06,
      "loss": 0.3831,
      "step": 13570
    },
    {
      "epoch": 2.7898036797204235,
      "grad_norm": 0.23299863934516907,
      "learning_rate": 1.1505776307851784e-06,
      "loss": 0.3913,
      "step": 13571
    },
    {
      "epoch": 2.790009250693802,
      "grad_norm": 0.12697121500968933,
      "learning_rate": 1.148334539916211e-06,
      "loss": 0.4494,
      "step": 13572
    },
    {
      "epoch": 2.7902148216671807,
      "grad_norm": 0.23409722745418549,
      "learning_rate": 1.1460936094138342e-06,
      "loss": 0.3775,
      "step": 13573
    },
    {
      "epoch": 2.7904203926405593,
      "grad_norm": 0.23983454704284668,
      "learning_rate": 1.1438548393884545e-06,
      "loss": 0.3591,
      "step": 13574
    },
    {
      "epoch": 2.7906259636139374,
      "grad_norm": 0.12223486602306366,
      "learning_rate": 1.1416182299503692e-06,
      "loss": 0.4504,
      "step": 13575
    },
    {
      "epoch": 2.7908315345873165,
      "grad_norm": 0.2318880558013916,
      "learning_rate": 1.1393837812097546e-06,
      "loss": 0.3754,
      "step": 13576
    },
    {
      "epoch": 2.7910371055606946,
      "grad_norm": 0.22092534601688385,
      "learning_rate": 1.137151493276703e-06,
      "loss": 0.3631,
      "step": 13577
    },
    {
      "epoch": 2.7912426765340737,
      "grad_norm": 0.23008911311626434,
      "learning_rate": 1.1349213662611764e-06,
      "loss": 0.3736,
      "step": 13578
    },
    {
      "epoch": 2.791448247507452,
      "grad_norm": 0.2271631807088852,
      "learning_rate": 1.1326934002730516e-06,
      "loss": 0.4001,
      "step": 13579
    },
    {
      "epoch": 2.7916538184808304,
      "grad_norm": 0.22966791689395905,
      "learning_rate": 1.1304675954220861e-06,
      "loss": 0.3774,
      "step": 13580
    },
    {
      "epoch": 2.791859389454209,
      "grad_norm": 0.2344343513250351,
      "learning_rate": 1.1282439518179373e-06,
      "loss": 0.3852,
      "step": 13581
    },
    {
      "epoch": 2.7920649604275876,
      "grad_norm": 0.21964535117149353,
      "learning_rate": 1.1260224695701571e-06,
      "loss": 0.3675,
      "step": 13582
    },
    {
      "epoch": 2.792270531400966,
      "grad_norm": 0.23566703498363495,
      "learning_rate": 1.1238031487881785e-06,
      "loss": 0.3684,
      "step": 13583
    },
    {
      "epoch": 2.7924761023743447,
      "grad_norm": 0.23792453110218048,
      "learning_rate": 1.1215859895813436e-06,
      "loss": 0.4032,
      "step": 13584
    },
    {
      "epoch": 2.7926816733477233,
      "grad_norm": 0.23992085456848145,
      "learning_rate": 1.1193709920588803e-06,
      "loss": 0.3779,
      "step": 13585
    },
    {
      "epoch": 2.792887244321102,
      "grad_norm": 0.23918254673480988,
      "learning_rate": 1.117158156329911e-06,
      "loss": 0.38,
      "step": 13586
    },
    {
      "epoch": 2.7930928152944805,
      "grad_norm": 0.23621824383735657,
      "learning_rate": 1.114947482503449e-06,
      "loss": 0.3967,
      "step": 13587
    },
    {
      "epoch": 2.793298386267859,
      "grad_norm": 0.23575182259082794,
      "learning_rate": 1.1127389706884017e-06,
      "loss": 0.3905,
      "step": 13588
    },
    {
      "epoch": 2.7935039572412377,
      "grad_norm": 0.11634790897369385,
      "learning_rate": 1.1105326209935874e-06,
      "loss": 0.4412,
      "step": 13589
    },
    {
      "epoch": 2.793709528214616,
      "grad_norm": 0.11823614686727524,
      "learning_rate": 1.108328433527689e-06,
      "loss": 0.4613,
      "step": 13590
    },
    {
      "epoch": 2.793915099187995,
      "grad_norm": 0.2277180552482605,
      "learning_rate": 1.1061264083992995e-06,
      "loss": 0.4023,
      "step": 13591
    },
    {
      "epoch": 2.794120670161373,
      "grad_norm": 0.23190085589885712,
      "learning_rate": 1.1039265457168973e-06,
      "loss": 0.3905,
      "step": 13592
    },
    {
      "epoch": 2.794326241134752,
      "grad_norm": 0.22355376183986664,
      "learning_rate": 1.1017288455888708e-06,
      "loss": 0.3748,
      "step": 13593
    },
    {
      "epoch": 2.79453181210813,
      "grad_norm": 0.22133591771125793,
      "learning_rate": 1.0995333081234783e-06,
      "loss": 0.3757,
      "step": 13594
    },
    {
      "epoch": 2.7947373830815088,
      "grad_norm": 0.2462836503982544,
      "learning_rate": 1.097339933428893e-06,
      "loss": 0.3903,
      "step": 13595
    },
    {
      "epoch": 2.7949429540548874,
      "grad_norm": 0.2253459244966507,
      "learning_rate": 1.095148721613169e-06,
      "loss": 0.3692,
      "step": 13596
    },
    {
      "epoch": 2.795148525028266,
      "grad_norm": 0.2545377016067505,
      "learning_rate": 1.0929596727842545e-06,
      "loss": 0.3871,
      "step": 13597
    },
    {
      "epoch": 2.7953540960016445,
      "grad_norm": 0.2286592274904251,
      "learning_rate": 1.0907727870499985e-06,
      "loss": 0.3749,
      "step": 13598
    },
    {
      "epoch": 2.795559666975023,
      "grad_norm": 0.23702724277973175,
      "learning_rate": 1.0885880645181395e-06,
      "loss": 0.3861,
      "step": 13599
    },
    {
      "epoch": 2.7957652379484017,
      "grad_norm": 0.12967750430107117,
      "learning_rate": 1.086405505296302e-06,
      "loss": 0.4553,
      "step": 13600
    },
    {
      "epoch": 2.7959708089217803,
      "grad_norm": 0.23417022824287415,
      "learning_rate": 1.0842251094920042e-06,
      "loss": 0.3808,
      "step": 13601
    },
    {
      "epoch": 2.796176379895159,
      "grad_norm": 0.23133817315101624,
      "learning_rate": 1.0820468772126858e-06,
      "loss": 0.3838,
      "step": 13602
    },
    {
      "epoch": 2.7963819508685375,
      "grad_norm": 0.12354867160320282,
      "learning_rate": 1.0798708085656406e-06,
      "loss": 0.4403,
      "step": 13603
    },
    {
      "epoch": 2.796587521841916,
      "grad_norm": 0.2297942191362381,
      "learning_rate": 1.0776969036580831e-06,
      "loss": 0.3838,
      "step": 13604
    },
    {
      "epoch": 2.7967930928152946,
      "grad_norm": 0.12144782394170761,
      "learning_rate": 1.0755251625971025e-06,
      "loss": 0.4596,
      "step": 13605
    },
    {
      "epoch": 2.7969986637886732,
      "grad_norm": 0.23768429458141327,
      "learning_rate": 1.0733555854896931e-06,
      "loss": 0.389,
      "step": 13606
    },
    {
      "epoch": 2.7972042347620514,
      "grad_norm": 0.11554042994976044,
      "learning_rate": 1.0711881724427398e-06,
      "loss": 0.4285,
      "step": 13607
    },
    {
      "epoch": 2.7974098057354304,
      "grad_norm": 0.23497696220874786,
      "learning_rate": 1.0690229235630318e-06,
      "loss": 0.3711,
      "step": 13608
    },
    {
      "epoch": 2.7976153767088086,
      "grad_norm": 0.23065055906772614,
      "learning_rate": 1.0668598389572187e-06,
      "loss": 0.3752,
      "step": 13609
    },
    {
      "epoch": 2.797820947682187,
      "grad_norm": 0.2266397476196289,
      "learning_rate": 1.0646989187318856e-06,
      "loss": 0.3693,
      "step": 13610
    },
    {
      "epoch": 2.7980265186555657,
      "grad_norm": 0.2287440001964569,
      "learning_rate": 1.0625401629934873e-06,
      "loss": 0.3822,
      "step": 13611
    },
    {
      "epoch": 2.7982320896289443,
      "grad_norm": 0.23608548939228058,
      "learning_rate": 1.0603835718483686e-06,
      "loss": 0.3633,
      "step": 13612
    },
    {
      "epoch": 2.798437660602323,
      "grad_norm": 0.23724471032619476,
      "learning_rate": 1.0582291454027792e-06,
      "loss": 0.3976,
      "step": 13613
    },
    {
      "epoch": 2.7986432315757015,
      "grad_norm": 0.3131234645843506,
      "learning_rate": 1.0560768837628549e-06,
      "loss": 0.3685,
      "step": 13614
    },
    {
      "epoch": 2.79884880254908,
      "grad_norm": 0.24307109415531158,
      "learning_rate": 1.0539267870346253e-06,
      "loss": 0.3986,
      "step": 13615
    },
    {
      "epoch": 2.7990543735224587,
      "grad_norm": 0.23056308925151825,
      "learning_rate": 1.051778855324026e-06,
      "loss": 0.3667,
      "step": 13616
    },
    {
      "epoch": 2.7992599444958373,
      "grad_norm": 0.2293158620595932,
      "learning_rate": 1.0496330887368672e-06,
      "loss": 0.3761,
      "step": 13617
    },
    {
      "epoch": 2.799465515469216,
      "grad_norm": 0.23687753081321716,
      "learning_rate": 1.0474894873788643e-06,
      "loss": 0.4005,
      "step": 13618
    },
    {
      "epoch": 2.7996710864425944,
      "grad_norm": 0.2287084013223648,
      "learning_rate": 1.045348051355618e-06,
      "loss": 0.3946,
      "step": 13619
    },
    {
      "epoch": 2.799876657415973,
      "grad_norm": 0.23279039561748505,
      "learning_rate": 1.0432087807726288e-06,
      "loss": 0.3591,
      "step": 13620
    },
    {
      "epoch": 2.8000822283893516,
      "grad_norm": 0.23075073957443237,
      "learning_rate": 1.0410716757352923e-06,
      "loss": 0.3777,
      "step": 13621
    },
    {
      "epoch": 2.8002877993627298,
      "grad_norm": 0.23093274235725403,
      "learning_rate": 1.0389367363488895e-06,
      "loss": 0.4152,
      "step": 13622
    },
    {
      "epoch": 2.800493370336109,
      "grad_norm": 0.2433861345052719,
      "learning_rate": 1.036803962718601e-06,
      "loss": 0.3827,
      "step": 13623
    },
    {
      "epoch": 2.800698941309487,
      "grad_norm": 0.2396126538515091,
      "learning_rate": 1.034673354949498e-06,
      "loss": 0.3938,
      "step": 13624
    },
    {
      "epoch": 2.8009045122828655,
      "grad_norm": 0.231951504945755,
      "learning_rate": 1.0325449131465414e-06,
      "loss": 0.3815,
      "step": 13625
    },
    {
      "epoch": 2.801110083256244,
      "grad_norm": 0.23407815396785736,
      "learning_rate": 1.0304186374145975e-06,
      "loss": 0.3898,
      "step": 13626
    },
    {
      "epoch": 2.8013156542296227,
      "grad_norm": 0.23772378265857697,
      "learning_rate": 1.0282945278584172e-06,
      "loss": 0.3771,
      "step": 13627
    },
    {
      "epoch": 2.8015212252030013,
      "grad_norm": 0.3126058578491211,
      "learning_rate": 1.026172584582632e-06,
      "loss": 0.3637,
      "step": 13628
    },
    {
      "epoch": 2.80172679617638,
      "grad_norm": 0.1172962412238121,
      "learning_rate": 1.0240528076917982e-06,
      "loss": 0.4601,
      "step": 13629
    },
    {
      "epoch": 2.8019323671497585,
      "grad_norm": 0.2429337203502655,
      "learning_rate": 1.0219351972903375e-06,
      "loss": 0.368,
      "step": 13630
    },
    {
      "epoch": 2.802137938123137,
      "grad_norm": 0.24216631054878235,
      "learning_rate": 1.019819753482576e-06,
      "loss": 0.3908,
      "step": 13631
    },
    {
      "epoch": 2.8023435090965156,
      "grad_norm": 0.23483149707317352,
      "learning_rate": 1.0177064763727356e-06,
      "loss": 0.3796,
      "step": 13632
    },
    {
      "epoch": 2.8025490800698942,
      "grad_norm": 0.22960315644741058,
      "learning_rate": 1.0155953660649232e-06,
      "loss": 0.3897,
      "step": 13633
    },
    {
      "epoch": 2.802754651043273,
      "grad_norm": 0.22772780060768127,
      "learning_rate": 1.0134864226631402e-06,
      "loss": 0.3716,
      "step": 13634
    },
    {
      "epoch": 2.8029602220166514,
      "grad_norm": 0.12643857300281525,
      "learning_rate": 1.0113796462712888e-06,
      "loss": 0.4547,
      "step": 13635
    },
    {
      "epoch": 2.80316579299003,
      "grad_norm": 0.2468147575855255,
      "learning_rate": 1.009275036993166e-06,
      "loss": 0.3853,
      "step": 13636
    },
    {
      "epoch": 2.803371363963408,
      "grad_norm": 0.25671377778053284,
      "learning_rate": 1.0071725949324484e-06,
      "loss": 0.3783,
      "step": 13637
    },
    {
      "epoch": 2.803576934936787,
      "grad_norm": 0.2457493543624878,
      "learning_rate": 1.0050723201927136e-06,
      "loss": 0.391,
      "step": 13638
    },
    {
      "epoch": 2.8037825059101653,
      "grad_norm": 0.23700536787509918,
      "learning_rate": 1.002974212877439e-06,
      "loss": 0.3792,
      "step": 13639
    },
    {
      "epoch": 2.803988076883544,
      "grad_norm": 0.23173773288726807,
      "learning_rate": 1.0008782730899764e-06,
      "loss": 0.4102,
      "step": 13640
    },
    {
      "epoch": 2.8041936478569225,
      "grad_norm": 0.22917988896369934,
      "learning_rate": 9.987845009335933e-07,
      "loss": 0.3808,
      "step": 13641
    },
    {
      "epoch": 2.804399218830301,
      "grad_norm": 0.22746974229812622,
      "learning_rate": 9.966928965114325e-07,
      "loss": 0.3807,
      "step": 13642
    },
    {
      "epoch": 2.8046047898036797,
      "grad_norm": 0.2384635955095291,
      "learning_rate": 9.946034599265464e-07,
      "loss": 0.3678,
      "step": 13643
    },
    {
      "epoch": 2.8048103607770583,
      "grad_norm": 0.12134691327810287,
      "learning_rate": 9.925161912818625e-07,
      "loss": 0.4635,
      "step": 13644
    },
    {
      "epoch": 2.805015931750437,
      "grad_norm": 0.12150728702545166,
      "learning_rate": 9.90431090680224e-07,
      "loss": 0.4485,
      "step": 13645
    },
    {
      "epoch": 2.8052215027238154,
      "grad_norm": 0.2309166043996811,
      "learning_rate": 9.88348158224338e-07,
      "loss": 0.3792,
      "step": 13646
    },
    {
      "epoch": 2.805427073697194,
      "grad_norm": 0.12357798218727112,
      "learning_rate": 9.862673940168332e-07,
      "loss": 0.4412,
      "step": 13647
    },
    {
      "epoch": 2.8056326446705726,
      "grad_norm": 0.1247616782784462,
      "learning_rate": 9.841887981602121e-07,
      "loss": 0.4396,
      "step": 13648
    },
    {
      "epoch": 2.805838215643951,
      "grad_norm": 0.231892392039299,
      "learning_rate": 9.82112370756873e-07,
      "loss": 0.3681,
      "step": 13649
    },
    {
      "epoch": 2.80604378661733,
      "grad_norm": 0.23392446339130402,
      "learning_rate": 9.80038111909124e-07,
      "loss": 0.3689,
      "step": 13650
    },
    {
      "epoch": 2.8062493575907084,
      "grad_norm": 0.22620777785778046,
      "learning_rate": 9.779660217191484e-07,
      "loss": 0.3742,
      "step": 13651
    },
    {
      "epoch": 2.8064549285640865,
      "grad_norm": 0.23345611989498138,
      "learning_rate": 9.758961002890242e-07,
      "loss": 0.3886,
      "step": 13652
    },
    {
      "epoch": 2.8066604995374655,
      "grad_norm": 0.22174043953418732,
      "learning_rate": 9.738283477207405e-07,
      "loss": 0.3853,
      "step": 13653
    },
    {
      "epoch": 2.8068660705108437,
      "grad_norm": 0.125930517911911,
      "learning_rate": 9.717627641161502e-07,
      "loss": 0.4399,
      "step": 13654
    },
    {
      "epoch": 2.8070716414842223,
      "grad_norm": 0.23766390979290009,
      "learning_rate": 9.696993495770224e-07,
      "loss": 0.3779,
      "step": 13655
    },
    {
      "epoch": 2.807277212457601,
      "grad_norm": 0.22734849154949188,
      "learning_rate": 9.676381042050053e-07,
      "loss": 0.3785,
      "step": 13656
    },
    {
      "epoch": 2.8074827834309795,
      "grad_norm": 0.12355753779411316,
      "learning_rate": 9.65579028101658e-07,
      "loss": 0.4431,
      "step": 13657
    },
    {
      "epoch": 2.807688354404358,
      "grad_norm": 0.23215292394161224,
      "learning_rate": 9.635221213684143e-07,
      "loss": 0.3898,
      "step": 13658
    },
    {
      "epoch": 2.8078939253777366,
      "grad_norm": 0.2282809466123581,
      "learning_rate": 9.61467384106613e-07,
      "loss": 0.3711,
      "step": 13659
    },
    {
      "epoch": 2.808099496351115,
      "grad_norm": 0.23502275347709656,
      "learning_rate": 9.594148164174731e-07,
      "loss": 0.3777,
      "step": 13660
    },
    {
      "epoch": 2.808305067324494,
      "grad_norm": 0.13005268573760986,
      "learning_rate": 9.57364418402124e-07,
      "loss": 0.4485,
      "step": 13661
    },
    {
      "epoch": 2.8085106382978724,
      "grad_norm": 0.12916310131549835,
      "learning_rate": 9.553161901615748e-07,
      "loss": 0.433,
      "step": 13662
    },
    {
      "epoch": 2.808716209271251,
      "grad_norm": 0.22590284049510956,
      "learning_rate": 9.532701317967247e-07,
      "loss": 0.374,
      "step": 13663
    },
    {
      "epoch": 2.8089217802446296,
      "grad_norm": 0.22926348447799683,
      "learning_rate": 9.512262434083879e-07,
      "loss": 0.3615,
      "step": 13664
    },
    {
      "epoch": 2.809127351218008,
      "grad_norm": 0.22875775396823883,
      "learning_rate": 9.491845250972542e-07,
      "loss": 0.3893,
      "step": 13665
    },
    {
      "epoch": 2.8093329221913867,
      "grad_norm": 0.22488847374916077,
      "learning_rate": 9.47144976963903e-07,
      "loss": 0.3798,
      "step": 13666
    },
    {
      "epoch": 2.809538493164765,
      "grad_norm": 0.2324180006980896,
      "learning_rate": 9.451075991088138e-07,
      "loss": 0.3821,
      "step": 13667
    },
    {
      "epoch": 2.809744064138144,
      "grad_norm": 0.22795747220516205,
      "learning_rate": 9.430723916323663e-07,
      "loss": 0.3638,
      "step": 13668
    },
    {
      "epoch": 2.809949635111522,
      "grad_norm": 0.23047983646392822,
      "learning_rate": 9.410393546348156e-07,
      "loss": 0.4035,
      "step": 13669
    },
    {
      "epoch": 2.8101552060849007,
      "grad_norm": 0.23792816698551178,
      "learning_rate": 9.390084882163214e-07,
      "loss": 0.3815,
      "step": 13670
    },
    {
      "epoch": 2.8103607770582792,
      "grad_norm": 0.22404231131076813,
      "learning_rate": 9.369797924769436e-07,
      "loss": 0.3589,
      "step": 13671
    },
    {
      "epoch": 2.810566348031658,
      "grad_norm": 0.23044191300868988,
      "learning_rate": 9.349532675166223e-07,
      "loss": 0.3835,
      "step": 13672
    },
    {
      "epoch": 2.8107719190050364,
      "grad_norm": 0.232622891664505,
      "learning_rate": 9.329289134351927e-07,
      "loss": 0.3969,
      "step": 13673
    },
    {
      "epoch": 2.810977489978415,
      "grad_norm": 0.2428961992263794,
      "learning_rate": 9.309067303323848e-07,
      "loss": 0.3955,
      "step": 13674
    },
    {
      "epoch": 2.8111830609517936,
      "grad_norm": 0.12209093570709229,
      "learning_rate": 9.288867183078243e-07,
      "loss": 0.4581,
      "step": 13675
    },
    {
      "epoch": 2.811388631925172,
      "grad_norm": 0.2335900366306305,
      "learning_rate": 9.268688774610313e-07,
      "loss": 0.3835,
      "step": 13676
    },
    {
      "epoch": 2.8115942028985508,
      "grad_norm": 0.22861804068088531,
      "learning_rate": 9.248532078914063e-07,
      "loss": 0.3936,
      "step": 13677
    },
    {
      "epoch": 2.8117997738719294,
      "grad_norm": 0.2362525463104248,
      "learning_rate": 9.2283970969826e-07,
      "loss": 0.3921,
      "step": 13678
    },
    {
      "epoch": 2.812005344845308,
      "grad_norm": 0.2308216392993927,
      "learning_rate": 9.208283829807829e-07,
      "loss": 0.4013,
      "step": 13679
    },
    {
      "epoch": 2.8122109158186865,
      "grad_norm": 0.22565658390522003,
      "learning_rate": 9.188192278380709e-07,
      "loss": 0.3744,
      "step": 13680
    },
    {
      "epoch": 2.812416486792065,
      "grad_norm": 0.22707243263721466,
      "learning_rate": 9.168122443690997e-07,
      "loss": 0.3629,
      "step": 13681
    },
    {
      "epoch": 2.8126220577654433,
      "grad_norm": 0.22881367802619934,
      "learning_rate": 9.148074326727402e-07,
      "loss": 0.3871,
      "step": 13682
    },
    {
      "epoch": 2.8128276287388223,
      "grad_norm": 0.21950559318065643,
      "learning_rate": 9.128047928477685e-07,
      "loss": 0.3675,
      "step": 13683
    },
    {
      "epoch": 2.8130331997122004,
      "grad_norm": 0.23215575516223907,
      "learning_rate": 9.108043249928355e-07,
      "loss": 0.3695,
      "step": 13684
    },
    {
      "epoch": 2.813238770685579,
      "grad_norm": 0.250355988740921,
      "learning_rate": 9.088060292065076e-07,
      "loss": 0.3879,
      "step": 13685
    },
    {
      "epoch": 2.8134443416589576,
      "grad_norm": 0.2308340221643448,
      "learning_rate": 9.068099055872259e-07,
      "loss": 0.3749,
      "step": 13686
    },
    {
      "epoch": 2.813649912632336,
      "grad_norm": 0.11878734081983566,
      "learning_rate": 9.048159542333268e-07,
      "loss": 0.4479,
      "step": 13687
    },
    {
      "epoch": 2.813855483605715,
      "grad_norm": 0.12243502587080002,
      "learning_rate": 9.028241752430417e-07,
      "loss": 0.463,
      "step": 13688
    },
    {
      "epoch": 2.8140610545790934,
      "grad_norm": 0.22359062731266022,
      "learning_rate": 9.00834568714507e-07,
      "loss": 0.3762,
      "step": 13689
    },
    {
      "epoch": 2.814266625552472,
      "grad_norm": 0.22925424575805664,
      "learning_rate": 8.988471347457295e-07,
      "loss": 0.3776,
      "step": 13690
    },
    {
      "epoch": 2.8144721965258506,
      "grad_norm": 0.2405097633600235,
      "learning_rate": 8.968618734346207e-07,
      "loss": 0.3733,
      "step": 13691
    },
    {
      "epoch": 2.814677767499229,
      "grad_norm": 0.21798452734947205,
      "learning_rate": 8.948787848789974e-07,
      "loss": 0.365,
      "step": 13692
    },
    {
      "epoch": 2.8148833384726077,
      "grad_norm": 0.24408473074436188,
      "learning_rate": 8.928978691765466e-07,
      "loss": 0.3723,
      "step": 13693
    },
    {
      "epoch": 2.8150889094459863,
      "grad_norm": 0.23546668887138367,
      "learning_rate": 8.909191264248601e-07,
      "loss": 0.399,
      "step": 13694
    },
    {
      "epoch": 2.815294480419365,
      "grad_norm": 0.2411290407180786,
      "learning_rate": 8.889425567214249e-07,
      "loss": 0.3898,
      "step": 13695
    },
    {
      "epoch": 2.8155000513927435,
      "grad_norm": 0.12170881778001785,
      "learning_rate": 8.869681601636181e-07,
      "loss": 0.453,
      "step": 13696
    },
    {
      "epoch": 2.8157056223661217,
      "grad_norm": 0.22512827813625336,
      "learning_rate": 8.849959368487021e-07,
      "loss": 0.3593,
      "step": 13697
    },
    {
      "epoch": 2.8159111933395007,
      "grad_norm": 0.23673585057258606,
      "learning_rate": 8.830258868738439e-07,
      "loss": 0.3814,
      "step": 13698
    },
    {
      "epoch": 2.816116764312879,
      "grad_norm": 0.23666398227214813,
      "learning_rate": 8.81058010336101e-07,
      "loss": 0.3891,
      "step": 13699
    },
    {
      "epoch": 2.8163223352862574,
      "grad_norm": 0.23257263004779816,
      "learning_rate": 8.790923073324159e-07,
      "loss": 0.3874,
      "step": 13700
    },
    {
      "epoch": 2.816527906259636,
      "grad_norm": 0.11912301182746887,
      "learning_rate": 8.771287779596361e-07,
      "loss": 0.4726,
      "step": 13701
    },
    {
      "epoch": 2.8167334772330146,
      "grad_norm": 0.24169382452964783,
      "learning_rate": 8.75167422314489e-07,
      "loss": 0.3887,
      "step": 13702
    },
    {
      "epoch": 2.816939048206393,
      "grad_norm": 0.24105940759181976,
      "learning_rate": 8.732082404936026e-07,
      "loss": 0.3656,
      "step": 13703
    },
    {
      "epoch": 2.8171446191797718,
      "grad_norm": 0.23163765668869019,
      "learning_rate": 8.712512325934946e-07,
      "loss": 0.3995,
      "step": 13704
    },
    {
      "epoch": 2.8173501901531504,
      "grad_norm": 0.24219734966754913,
      "learning_rate": 8.692963987105878e-07,
      "loss": 0.3994,
      "step": 13705
    },
    {
      "epoch": 2.817555761126529,
      "grad_norm": 0.23079170286655426,
      "learning_rate": 8.673437389411804e-07,
      "loss": 0.386,
      "step": 13706
    },
    {
      "epoch": 2.8177613320999075,
      "grad_norm": 0.23005284368991852,
      "learning_rate": 8.653932533814702e-07,
      "loss": 0.3753,
      "step": 13707
    },
    {
      "epoch": 2.817966903073286,
      "grad_norm": 0.23586174845695496,
      "learning_rate": 8.634449421275504e-07,
      "loss": 0.3902,
      "step": 13708
    },
    {
      "epoch": 2.8181724740466647,
      "grad_norm": 0.22992920875549316,
      "learning_rate": 8.614988052754042e-07,
      "loss": 0.3829,
      "step": 13709
    },
    {
      "epoch": 2.8183780450200433,
      "grad_norm": 0.2352675497531891,
      "learning_rate": 8.5955484292091e-07,
      "loss": 0.3804,
      "step": 13710
    },
    {
      "epoch": 2.818583615993422,
      "grad_norm": 0.22630825638771057,
      "learning_rate": 8.576130551598311e-07,
      "loss": 0.3642,
      "step": 13711
    },
    {
      "epoch": 2.8187891869668,
      "grad_norm": 0.23707729578018188,
      "learning_rate": 8.556734420878409e-07,
      "loss": 0.3683,
      "step": 13712
    },
    {
      "epoch": 2.818994757940179,
      "grad_norm": 0.23465366661548615,
      "learning_rate": 8.537360038004883e-07,
      "loss": 0.3868,
      "step": 13713
    },
    {
      "epoch": 2.819200328913557,
      "grad_norm": 0.23585152626037598,
      "learning_rate": 8.518007403932266e-07,
      "loss": 0.4204,
      "step": 13714
    },
    {
      "epoch": 2.8194058998869362,
      "grad_norm": 0.23271988332271576,
      "learning_rate": 8.498676519613947e-07,
      "loss": 0.3661,
      "step": 13715
    },
    {
      "epoch": 2.8196114708603144,
      "grad_norm": 0.23224134743213654,
      "learning_rate": 8.479367386002163e-07,
      "loss": 0.3807,
      "step": 13716
    },
    {
      "epoch": 2.819817041833693,
      "grad_norm": 0.22672690451145172,
      "learning_rate": 8.460080004048404e-07,
      "loss": 0.3921,
      "step": 13717
    },
    {
      "epoch": 2.8200226128070716,
      "grad_norm": 0.2301137000322342,
      "learning_rate": 8.44081437470266e-07,
      "loss": 0.3761,
      "step": 13718
    },
    {
      "epoch": 2.82022818378045,
      "grad_norm": 0.24038895964622498,
      "learning_rate": 8.421570498914222e-07,
      "loss": 0.3823,
      "step": 13719
    },
    {
      "epoch": 2.8204337547538287,
      "grad_norm": 0.11897142231464386,
      "learning_rate": 8.402348377631031e-07,
      "loss": 0.4372,
      "step": 13720
    },
    {
      "epoch": 2.8206393257272073,
      "grad_norm": 0.23280301690101624,
      "learning_rate": 8.383148011800179e-07,
      "loss": 0.3707,
      "step": 13721
    },
    {
      "epoch": 2.820844896700586,
      "grad_norm": 0.2358703911304474,
      "learning_rate": 8.363969402367461e-07,
      "loss": 0.3826,
      "step": 13722
    },
    {
      "epoch": 2.8210504676739645,
      "grad_norm": 0.2333759367465973,
      "learning_rate": 8.34481255027777e-07,
      "loss": 0.3911,
      "step": 13723
    },
    {
      "epoch": 2.821256038647343,
      "grad_norm": 0.23327887058258057,
      "learning_rate": 8.325677456474901e-07,
      "loss": 0.3781,
      "step": 13724
    },
    {
      "epoch": 2.8214616096207217,
      "grad_norm": 0.23647433519363403,
      "learning_rate": 8.30656412190145e-07,
      "loss": 0.3817,
      "step": 13725
    },
    {
      "epoch": 2.8216671805941003,
      "grad_norm": 0.12305039912462234,
      "learning_rate": 8.287472547499165e-07,
      "loss": 0.4555,
      "step": 13726
    },
    {
      "epoch": 2.8218727515674784,
      "grad_norm": 0.22186824679374695,
      "learning_rate": 8.268402734208592e-07,
      "loss": 0.3963,
      "step": 13727
    },
    {
      "epoch": 2.8220783225408574,
      "grad_norm": 0.22588272392749786,
      "learning_rate": 8.249354682969129e-07,
      "loss": 0.3854,
      "step": 13728
    },
    {
      "epoch": 2.8222838935142356,
      "grad_norm": 0.23009559512138367,
      "learning_rate": 8.230328394719228e-07,
      "loss": 0.3894,
      "step": 13729
    },
    {
      "epoch": 2.8224894644876146,
      "grad_norm": 0.23012928664684296,
      "learning_rate": 8.211323870396187e-07,
      "loss": 0.3711,
      "step": 13730
    },
    {
      "epoch": 2.8226950354609928,
      "grad_norm": 0.12790702283382416,
      "learning_rate": 8.192341110936358e-07,
      "loss": 0.466,
      "step": 13731
    },
    {
      "epoch": 2.8229006064343714,
      "grad_norm": 0.2347603589296341,
      "learning_rate": 8.173380117274792e-07,
      "loss": 0.3855,
      "step": 13732
    },
    {
      "epoch": 2.82310617740775,
      "grad_norm": 0.11841531097888947,
      "learning_rate": 8.154440890345794e-07,
      "loss": 0.4421,
      "step": 13733
    },
    {
      "epoch": 2.8233117483811285,
      "grad_norm": 0.22990132868289948,
      "learning_rate": 8.135523431082265e-07,
      "loss": 0.373,
      "step": 13734
    },
    {
      "epoch": 2.823517319354507,
      "grad_norm": 0.2206183522939682,
      "learning_rate": 8.11662774041626e-07,
      "loss": 0.3587,
      "step": 13735
    },
    {
      "epoch": 2.8237228903278857,
      "grad_norm": 0.2378583699464798,
      "learning_rate": 8.097753819278636e-07,
      "loss": 0.3793,
      "step": 13736
    },
    {
      "epoch": 2.8239284613012643,
      "grad_norm": 0.22767938673496246,
      "learning_rate": 8.078901668599149e-07,
      "loss": 0.3706,
      "step": 13737
    },
    {
      "epoch": 2.824134032274643,
      "grad_norm": 0.23271609842777252,
      "learning_rate": 8.060071289306753e-07,
      "loss": 0.3807,
      "step": 13738
    },
    {
      "epoch": 2.8243396032480215,
      "grad_norm": 0.21641339361667633,
      "learning_rate": 8.04126268232901e-07,
      "loss": 0.3673,
      "step": 13739
    },
    {
      "epoch": 2.8245451742214,
      "grad_norm": 0.2371521145105362,
      "learning_rate": 8.022475848592475e-07,
      "loss": 0.3795,
      "step": 13740
    },
    {
      "epoch": 2.8247507451947786,
      "grad_norm": 0.22861357033252716,
      "learning_rate": 8.003710789022811e-07,
      "loss": 0.3907,
      "step": 13741
    },
    {
      "epoch": 2.824956316168157,
      "grad_norm": 0.23238502442836761,
      "learning_rate": 7.984967504544427e-07,
      "loss": 0.376,
      "step": 13742
    },
    {
      "epoch": 2.825161887141536,
      "grad_norm": 0.2233378142118454,
      "learning_rate": 7.966245996080734e-07,
      "loss": 0.3744,
      "step": 13743
    },
    {
      "epoch": 2.825367458114914,
      "grad_norm": 0.22623707354068756,
      "learning_rate": 7.947546264553996e-07,
      "loss": 0.3867,
      "step": 13744
    },
    {
      "epoch": 2.825573029088293,
      "grad_norm": 0.24018484354019165,
      "learning_rate": 7.928868310885573e-07,
      "loss": 0.3648,
      "step": 13745
    },
    {
      "epoch": 2.825778600061671,
      "grad_norm": 0.13057471811771393,
      "learning_rate": 7.910212135995481e-07,
      "loss": 0.4654,
      "step": 13746
    },
    {
      "epoch": 2.8259841710350497,
      "grad_norm": 0.22883687913417816,
      "learning_rate": 7.891577740802985e-07,
      "loss": 0.3663,
      "step": 13747
    },
    {
      "epoch": 2.8261897420084283,
      "grad_norm": 0.23778748512268066,
      "learning_rate": 7.872965126226e-07,
      "loss": 0.3603,
      "step": 13748
    },
    {
      "epoch": 2.826395312981807,
      "grad_norm": 0.12038971483707428,
      "learning_rate": 7.854374293181593e-07,
      "loss": 0.4537,
      "step": 13749
    },
    {
      "epoch": 2.8266008839551855,
      "grad_norm": 0.11914535611867905,
      "learning_rate": 7.835805242585531e-07,
      "loss": 0.4408,
      "step": 13750
    },
    {
      "epoch": 2.826806454928564,
      "grad_norm": 0.22773846983909607,
      "learning_rate": 7.817257975352682e-07,
      "loss": 0.3739,
      "step": 13751
    },
    {
      "epoch": 2.8270120259019427,
      "grad_norm": 0.2309103161096573,
      "learning_rate": 7.798732492396815e-07,
      "loss": 0.3781,
      "step": 13752
    },
    {
      "epoch": 2.8272175968753213,
      "grad_norm": 0.12411284446716309,
      "learning_rate": 7.780228794630451e-07,
      "loss": 0.4418,
      "step": 13753
    },
    {
      "epoch": 2.8274231678487,
      "grad_norm": 0.22320185601711273,
      "learning_rate": 7.761746882965359e-07,
      "loss": 0.3706,
      "step": 13754
    },
    {
      "epoch": 2.8276287388220784,
      "grad_norm": 0.23378294706344604,
      "learning_rate": 7.743286758312013e-07,
      "loss": 0.3784,
      "step": 13755
    },
    {
      "epoch": 2.827834309795457,
      "grad_norm": 0.23577441275119781,
      "learning_rate": 7.724848421579784e-07,
      "loss": 0.371,
      "step": 13756
    },
    {
      "epoch": 2.828039880768835,
      "grad_norm": 0.22351431846618652,
      "learning_rate": 7.706431873677094e-07,
      "loss": 0.3703,
      "step": 13757
    },
    {
      "epoch": 2.828245451742214,
      "grad_norm": 0.24170389771461487,
      "learning_rate": 7.688037115511171e-07,
      "loss": 0.391,
      "step": 13758
    },
    {
      "epoch": 2.8284510227155923,
      "grad_norm": 0.23205341398715973,
      "learning_rate": 7.669664147988387e-07,
      "loss": 0.3744,
      "step": 13759
    },
    {
      "epoch": 2.8286565936889714,
      "grad_norm": 0.22255219519138336,
      "learning_rate": 7.651312972013769e-07,
      "loss": 0.3775,
      "step": 13760
    },
    {
      "epoch": 2.8288621646623495,
      "grad_norm": 0.22708290815353394,
      "learning_rate": 7.632983588491393e-07,
      "loss": 0.3945,
      "step": 13761
    },
    {
      "epoch": 2.829067735635728,
      "grad_norm": 0.23190079629421234,
      "learning_rate": 7.614675998324339e-07,
      "loss": 0.3955,
      "step": 13762
    },
    {
      "epoch": 2.8292733066091067,
      "grad_norm": 0.11703302711248398,
      "learning_rate": 7.596390202414483e-07,
      "loss": 0.4556,
      "step": 13763
    },
    {
      "epoch": 2.8294788775824853,
      "grad_norm": 0.232466459274292,
      "learning_rate": 7.578126201662706e-07,
      "loss": 0.3894,
      "step": 13764
    },
    {
      "epoch": 2.829684448555864,
      "grad_norm": 0.23175998032093048,
      "learning_rate": 7.559883996968787e-07,
      "loss": 0.36,
      "step": 13765
    },
    {
      "epoch": 2.8298900195292425,
      "grad_norm": 0.2221493124961853,
      "learning_rate": 7.541663589231407e-07,
      "loss": 0.3767,
      "step": 13766
    },
    {
      "epoch": 2.830095590502621,
      "grad_norm": 0.23145779967308044,
      "learning_rate": 7.5234649793482e-07,
      "loss": 0.3761,
      "step": 13767
    },
    {
      "epoch": 2.8303011614759996,
      "grad_norm": 0.2308301031589508,
      "learning_rate": 7.505288168215746e-07,
      "loss": 0.3777,
      "step": 13768
    },
    {
      "epoch": 2.8305067324493782,
      "grad_norm": 0.22926832735538483,
      "learning_rate": 7.487133156729531e-07,
      "loss": 0.3794,
      "step": 13769
    },
    {
      "epoch": 2.830712303422757,
      "grad_norm": 0.22793909907341003,
      "learning_rate": 7.468999945783989e-07,
      "loss": 0.3854,
      "step": 13770
    },
    {
      "epoch": 2.8309178743961354,
      "grad_norm": 0.23420362174510956,
      "learning_rate": 7.450888536272455e-07,
      "loss": 0.3804,
      "step": 13771
    },
    {
      "epoch": 2.8311234453695135,
      "grad_norm": 0.2258753925561905,
      "learning_rate": 7.432798929087115e-07,
      "loss": 0.386,
      "step": 13772
    },
    {
      "epoch": 2.8313290163428926,
      "grad_norm": 0.12601035833358765,
      "learning_rate": 7.414731125119256e-07,
      "loss": 0.4424,
      "step": 13773
    },
    {
      "epoch": 2.8315345873162707,
      "grad_norm": 0.22683130204677582,
      "learning_rate": 7.396685125258917e-07,
      "loss": 0.3806,
      "step": 13774
    },
    {
      "epoch": 2.8317401582896498,
      "grad_norm": 0.23239809274673462,
      "learning_rate": 7.378660930395237e-07,
      "loss": 0.373,
      "step": 13775
    },
    {
      "epoch": 2.831945729263028,
      "grad_norm": 0.23171231150627136,
      "learning_rate": 7.360658541416054e-07,
      "loss": 0.3781,
      "step": 13776
    },
    {
      "epoch": 2.8321513002364065,
      "grad_norm": 0.23430903255939484,
      "learning_rate": 7.34267795920841e-07,
      "loss": 0.3819,
      "step": 13777
    },
    {
      "epoch": 2.832356871209785,
      "grad_norm": 0.22949565947055817,
      "learning_rate": 7.324719184657997e-07,
      "loss": 0.378,
      "step": 13778
    },
    {
      "epoch": 2.8325624421831637,
      "grad_norm": 0.11871360242366791,
      "learning_rate": 7.306782218649605e-07,
      "loss": 0.4448,
      "step": 13779
    },
    {
      "epoch": 2.8327680131565423,
      "grad_norm": 0.2298881858587265,
      "learning_rate": 7.288867062066928e-07,
      "loss": 0.3606,
      "step": 13780
    },
    {
      "epoch": 2.832973584129921,
      "grad_norm": 0.11663959920406342,
      "learning_rate": 7.270973715792562e-07,
      "loss": 0.4501,
      "step": 13781
    },
    {
      "epoch": 2.8331791551032994,
      "grad_norm": 0.12173844128847122,
      "learning_rate": 7.253102180707949e-07,
      "loss": 0.4564,
      "step": 13782
    },
    {
      "epoch": 2.833384726076678,
      "grad_norm": 0.2263535112142563,
      "learning_rate": 7.235252457693686e-07,
      "loss": 0.3858,
      "step": 13783
    },
    {
      "epoch": 2.8335902970500566,
      "grad_norm": 0.11779969185590744,
      "learning_rate": 7.21742454762902e-07,
      "loss": 0.4431,
      "step": 13784
    },
    {
      "epoch": 2.833795868023435,
      "grad_norm": 0.2434069812297821,
      "learning_rate": 7.199618451392298e-07,
      "loss": 0.4067,
      "step": 13785
    },
    {
      "epoch": 2.834001438996814,
      "grad_norm": 0.22886650264263153,
      "learning_rate": 7.181834169860719e-07,
      "loss": 0.3828,
      "step": 13786
    },
    {
      "epoch": 2.8342070099701924,
      "grad_norm": 0.2306927889585495,
      "learning_rate": 7.16407170391038e-07,
      "loss": 0.3762,
      "step": 13787
    },
    {
      "epoch": 2.834412580943571,
      "grad_norm": 0.2322409451007843,
      "learning_rate": 7.146331054416483e-07,
      "loss": 0.3907,
      "step": 13788
    },
    {
      "epoch": 2.834618151916949,
      "grad_norm": 0.22728115320205688,
      "learning_rate": 7.128612222252979e-07,
      "loss": 0.3824,
      "step": 13789
    },
    {
      "epoch": 2.834823722890328,
      "grad_norm": 0.225159153342247,
      "learning_rate": 7.110915208292768e-07,
      "loss": 0.4054,
      "step": 13790
    },
    {
      "epoch": 2.8350292938637063,
      "grad_norm": 0.12113186717033386,
      "learning_rate": 7.093240013407704e-07,
      "loss": 0.439,
      "step": 13791
    },
    {
      "epoch": 2.835234864837085,
      "grad_norm": 0.2332168072462082,
      "learning_rate": 7.07558663846854e-07,
      "loss": 0.3793,
      "step": 13792
    },
    {
      "epoch": 2.8354404358104635,
      "grad_norm": 0.22835347056388855,
      "learning_rate": 7.05795508434503e-07,
      "loss": 0.3758,
      "step": 13793
    },
    {
      "epoch": 2.835646006783842,
      "grad_norm": 0.12069544196128845,
      "learning_rate": 7.040345351905731e-07,
      "loss": 0.4602,
      "step": 13794
    },
    {
      "epoch": 2.8358515777572206,
      "grad_norm": 0.22868898510932922,
      "learning_rate": 7.022757442018246e-07,
      "loss": 0.3804,
      "step": 13795
    },
    {
      "epoch": 2.836057148730599,
      "grad_norm": 0.232134148478508,
      "learning_rate": 7.005191355549034e-07,
      "loss": 0.3889,
      "step": 13796
    },
    {
      "epoch": 2.836262719703978,
      "grad_norm": 0.23718050122261047,
      "learning_rate": 6.987647093363503e-07,
      "loss": 0.3728,
      "step": 13797
    },
    {
      "epoch": 2.8364682906773564,
      "grad_norm": 0.24368955194950104,
      "learning_rate": 6.970124656325911e-07,
      "loss": 0.3852,
      "step": 13798
    },
    {
      "epoch": 2.836673861650735,
      "grad_norm": 0.2304588258266449,
      "learning_rate": 6.952624045299617e-07,
      "loss": 0.3809,
      "step": 13799
    },
    {
      "epoch": 2.8368794326241136,
      "grad_norm": 0.23114575445652008,
      "learning_rate": 6.935145261146731e-07,
      "loss": 0.3808,
      "step": 13800
    },
    {
      "epoch": 2.837085003597492,
      "grad_norm": 0.22746378183364868,
      "learning_rate": 6.917688304728315e-07,
      "loss": 0.3887,
      "step": 13801
    },
    {
      "epoch": 2.8372905745708707,
      "grad_norm": 0.22767049074172974,
      "learning_rate": 6.900253176904481e-07,
      "loss": 0.3729,
      "step": 13802
    },
    {
      "epoch": 2.8374961455442493,
      "grad_norm": 0.22864069044589996,
      "learning_rate": 6.882839878534092e-07,
      "loss": 0.3854,
      "step": 13803
    },
    {
      "epoch": 2.8377017165176275,
      "grad_norm": 0.22305408120155334,
      "learning_rate": 6.865448410475112e-07,
      "loss": 0.4005,
      "step": 13804
    },
    {
      "epoch": 2.8379072874910065,
      "grad_norm": 0.22816435992717743,
      "learning_rate": 6.848078773584255e-07,
      "loss": 0.3775,
      "step": 13805
    },
    {
      "epoch": 2.8381128584643847,
      "grad_norm": 0.23188713192939758,
      "learning_rate": 6.830730968717236e-07,
      "loss": 0.3879,
      "step": 13806
    },
    {
      "epoch": 2.8383184294377632,
      "grad_norm": 0.11994650214910507,
      "learning_rate": 6.813404996728823e-07,
      "loss": 0.4432,
      "step": 13807
    },
    {
      "epoch": 2.838524000411142,
      "grad_norm": 0.23941002786159515,
      "learning_rate": 6.796100858472382e-07,
      "loss": 0.3655,
      "step": 13808
    },
    {
      "epoch": 2.8387295713845204,
      "grad_norm": 0.12042734026908875,
      "learning_rate": 6.778818554800581e-07,
      "loss": 0.451,
      "step": 13809
    },
    {
      "epoch": 2.838935142357899,
      "grad_norm": 0.23225072026252747,
      "learning_rate": 6.76155808656479e-07,
      "loss": 0.3759,
      "step": 13810
    },
    {
      "epoch": 2.8391407133312776,
      "grad_norm": 0.23144301772117615,
      "learning_rate": 6.744319454615328e-07,
      "loss": 0.3922,
      "step": 13811
    },
    {
      "epoch": 2.839346284304656,
      "grad_norm": 0.24022118747234344,
      "learning_rate": 6.727102659801515e-07,
      "loss": 0.3847,
      "step": 13812
    },
    {
      "epoch": 2.8395518552780348,
      "grad_norm": 0.22620242834091187,
      "learning_rate": 6.709907702971474e-07,
      "loss": 0.3849,
      "step": 13813
    },
    {
      "epoch": 2.8397574262514134,
      "grad_norm": 0.2255433201789856,
      "learning_rate": 6.692734584972326e-07,
      "loss": 0.3737,
      "step": 13814
    },
    {
      "epoch": 2.839962997224792,
      "grad_norm": 0.2278052270412445,
      "learning_rate": 6.675583306650096e-07,
      "loss": 0.3742,
      "step": 13815
    },
    {
      "epoch": 2.8401685681981705,
      "grad_norm": 0.22527383267879486,
      "learning_rate": 6.658453868849857e-07,
      "loss": 0.3887,
      "step": 13816
    },
    {
      "epoch": 2.840374139171549,
      "grad_norm": 0.2278517484664917,
      "learning_rate": 6.641346272415383e-07,
      "loss": 0.3734,
      "step": 13817
    },
    {
      "epoch": 2.8405797101449277,
      "grad_norm": 0.23448723554611206,
      "learning_rate": 6.624260518189551e-07,
      "loss": 0.3784,
      "step": 13818
    },
    {
      "epoch": 2.840785281118306,
      "grad_norm": 0.24033266305923462,
      "learning_rate": 6.607196607014088e-07,
      "loss": 0.3812,
      "step": 13819
    },
    {
      "epoch": 2.840990852091685,
      "grad_norm": 0.22752645611763,
      "learning_rate": 6.590154539729621e-07,
      "loss": 0.3747,
      "step": 13820
    },
    {
      "epoch": 2.841196423065063,
      "grad_norm": 0.2382228821516037,
      "learning_rate": 6.573134317175728e-07,
      "loss": 0.3989,
      "step": 13821
    },
    {
      "epoch": 2.8414019940384416,
      "grad_norm": 0.23340356349945068,
      "learning_rate": 6.556135940190888e-07,
      "loss": 0.3767,
      "step": 13822
    },
    {
      "epoch": 2.84160756501182,
      "grad_norm": 0.12209226191043854,
      "learning_rate": 6.539159409612633e-07,
      "loss": 0.4466,
      "step": 13823
    },
    {
      "epoch": 2.841813135985199,
      "grad_norm": 0.22561949491500854,
      "learning_rate": 6.522204726277293e-07,
      "loss": 0.3758,
      "step": 13824
    },
    {
      "epoch": 2.8420187069585774,
      "grad_norm": 0.225555419921875,
      "learning_rate": 6.505271891020048e-07,
      "loss": 0.3724,
      "step": 13825
    },
    {
      "epoch": 2.842224277931956,
      "grad_norm": 0.2285340279340744,
      "learning_rate": 6.488360904675234e-07,
      "loss": 0.3866,
      "step": 13826
    },
    {
      "epoch": 2.8424298489053346,
      "grad_norm": 0.2325884997844696,
      "learning_rate": 6.471471768075882e-07,
      "loss": 0.3787,
      "step": 13827
    },
    {
      "epoch": 2.842635419878713,
      "grad_norm": 0.1197914183139801,
      "learning_rate": 6.454604482054077e-07,
      "loss": 0.4564,
      "step": 13828
    },
    {
      "epoch": 2.8428409908520917,
      "grad_norm": 0.24161775410175323,
      "learning_rate": 6.437759047440706e-07,
      "loss": 0.3779,
      "step": 13829
    },
    {
      "epoch": 2.8430465618254703,
      "grad_norm": 0.23106519877910614,
      "learning_rate": 6.420935465065853e-07,
      "loss": 0.3715,
      "step": 13830
    },
    {
      "epoch": 2.843252132798849,
      "grad_norm": 0.22928760945796967,
      "learning_rate": 6.404133735758156e-07,
      "loss": 0.3916,
      "step": 13831
    },
    {
      "epoch": 2.8434577037722275,
      "grad_norm": 0.22873489558696747,
      "learning_rate": 6.387353860345452e-07,
      "loss": 0.381,
      "step": 13832
    },
    {
      "epoch": 2.843663274745606,
      "grad_norm": 0.23243139684200287,
      "learning_rate": 6.370595839654431e-07,
      "loss": 0.3902,
      "step": 13833
    },
    {
      "epoch": 2.8438688457189842,
      "grad_norm": 0.2291172593832016,
      "learning_rate": 6.353859674510582e-07,
      "loss": 0.3911,
      "step": 13834
    },
    {
      "epoch": 2.8440744166923633,
      "grad_norm": 0.22925592958927155,
      "learning_rate": 6.337145365738495e-07,
      "loss": 0.3684,
      "step": 13835
    },
    {
      "epoch": 2.8442799876657414,
      "grad_norm": 0.22563427686691284,
      "learning_rate": 6.320452914161512e-07,
      "loss": 0.3863,
      "step": 13836
    },
    {
      "epoch": 2.84448555863912,
      "grad_norm": 0.23132719099521637,
      "learning_rate": 6.303782320602126e-07,
      "loss": 0.397,
      "step": 13837
    },
    {
      "epoch": 2.8446911296124986,
      "grad_norm": 0.12186164408922195,
      "learning_rate": 6.287133585881528e-07,
      "loss": 0.4323,
      "step": 13838
    },
    {
      "epoch": 2.844896700585877,
      "grad_norm": 0.1260182410478592,
      "learning_rate": 6.270506710819963e-07,
      "loss": 0.4418,
      "step": 13839
    },
    {
      "epoch": 2.8451022715592558,
      "grad_norm": 0.11887041479349136,
      "learning_rate": 6.253901696236575e-07,
      "loss": 0.4506,
      "step": 13840
    },
    {
      "epoch": 2.8453078425326344,
      "grad_norm": 0.23686912655830383,
      "learning_rate": 6.237318542949361e-07,
      "loss": 0.3608,
      "step": 13841
    },
    {
      "epoch": 2.845513413506013,
      "grad_norm": 0.2436566948890686,
      "learning_rate": 6.220757251775316e-07,
      "loss": 0.3661,
      "step": 13842
    },
    {
      "epoch": 2.8457189844793915,
      "grad_norm": 0.2323562502861023,
      "learning_rate": 6.20421782353034e-07,
      "loss": 0.3828,
      "step": 13843
    },
    {
      "epoch": 2.84592455545277,
      "grad_norm": 0.12596507370471954,
      "learning_rate": 6.187700259029227e-07,
      "loss": 0.4397,
      "step": 13844
    },
    {
      "epoch": 2.8461301264261487,
      "grad_norm": 0.243175208568573,
      "learning_rate": 6.17120455908578e-07,
      "loss": 0.3926,
      "step": 13845
    },
    {
      "epoch": 2.8463356973995273,
      "grad_norm": 0.24358853697776794,
      "learning_rate": 6.154730724512648e-07,
      "loss": 0.3934,
      "step": 13846
    },
    {
      "epoch": 2.846541268372906,
      "grad_norm": 0.23144344985485077,
      "learning_rate": 6.13827875612138e-07,
      "loss": 0.3733,
      "step": 13847
    },
    {
      "epoch": 2.8467468393462845,
      "grad_norm": 0.33637747168540955,
      "learning_rate": 6.121848654722528e-07,
      "loss": 0.3871,
      "step": 13848
    },
    {
      "epoch": 2.8469524103196626,
      "grad_norm": 0.24188685417175293,
      "learning_rate": 6.105440421125497e-07,
      "loss": 0.3871,
      "step": 13849
    },
    {
      "epoch": 2.8471579812930417,
      "grad_norm": 0.12031394243240356,
      "learning_rate": 6.089054056138687e-07,
      "loss": 0.441,
      "step": 13850
    },
    {
      "epoch": 2.84736355226642,
      "grad_norm": 0.23142001032829285,
      "learning_rate": 6.072689560569306e-07,
      "loss": 0.3923,
      "step": 13851
    },
    {
      "epoch": 2.8475691232397984,
      "grad_norm": 0.23788262903690338,
      "learning_rate": 6.056346935223656e-07,
      "loss": 0.3881,
      "step": 13852
    },
    {
      "epoch": 2.847774694213177,
      "grad_norm": 0.23109963536262512,
      "learning_rate": 6.040026180906744e-07,
      "loss": 0.3941,
      "step": 13853
    },
    {
      "epoch": 2.8479802651865556,
      "grad_norm": 0.23182469606399536,
      "learning_rate": 6.023727298422726e-07,
      "loss": 0.3771,
      "step": 13854
    },
    {
      "epoch": 2.848185836159934,
      "grad_norm": 0.23489411175251007,
      "learning_rate": 6.007450288574512e-07,
      "loss": 0.3841,
      "step": 13855
    },
    {
      "epoch": 2.8483914071333127,
      "grad_norm": 0.23740611970424652,
      "learning_rate": 5.991195152164009e-07,
      "loss": 0.3707,
      "step": 13856
    },
    {
      "epoch": 2.8485969781066913,
      "grad_norm": 0.23565572500228882,
      "learning_rate": 5.974961889992026e-07,
      "loss": 0.4023,
      "step": 13857
    },
    {
      "epoch": 2.84880254908007,
      "grad_norm": 0.23655489087104797,
      "learning_rate": 5.958750502858274e-07,
      "loss": 0.3848,
      "step": 13858
    },
    {
      "epoch": 2.8490081200534485,
      "grad_norm": 0.2304118573665619,
      "learning_rate": 5.942560991561464e-07,
      "loss": 0.3871,
      "step": 13859
    },
    {
      "epoch": 2.849213691026827,
      "grad_norm": 0.22532600164413452,
      "learning_rate": 5.926393356899207e-07,
      "loss": 0.3746,
      "step": 13860
    },
    {
      "epoch": 2.8494192620002057,
      "grad_norm": 0.22565500438213348,
      "learning_rate": 5.910247599667867e-07,
      "loss": 0.4012,
      "step": 13861
    },
    {
      "epoch": 2.8496248329735843,
      "grad_norm": 0.22938272356987,
      "learning_rate": 5.894123720663009e-07,
      "loss": 0.3793,
      "step": 13862
    },
    {
      "epoch": 2.849830403946963,
      "grad_norm": 0.2282402366399765,
      "learning_rate": 5.878021720678894e-07,
      "loss": 0.3631,
      "step": 13863
    },
    {
      "epoch": 2.850035974920341,
      "grad_norm": 0.23935887217521667,
      "learning_rate": 5.861941600508841e-07,
      "loss": 0.3811,
      "step": 13864
    },
    {
      "epoch": 2.85024154589372,
      "grad_norm": 0.12173505127429962,
      "learning_rate": 5.845883360945065e-07,
      "loss": 0.4352,
      "step": 13865
    },
    {
      "epoch": 2.850447116867098,
      "grad_norm": 0.12043416500091553,
      "learning_rate": 5.829847002778633e-07,
      "loss": 0.4488,
      "step": 13866
    },
    {
      "epoch": 2.8506526878404768,
      "grad_norm": 0.23177044093608856,
      "learning_rate": 5.813832526799562e-07,
      "loss": 0.3819,
      "step": 13867
    },
    {
      "epoch": 2.8508582588138554,
      "grad_norm": 0.12020587176084518,
      "learning_rate": 5.797839933796823e-07,
      "loss": 0.4398,
      "step": 13868
    },
    {
      "epoch": 2.851063829787234,
      "grad_norm": 0.2312840223312378,
      "learning_rate": 5.781869224558384e-07,
      "loss": 0.3687,
      "step": 13869
    },
    {
      "epoch": 2.8512694007606125,
      "grad_norm": 0.12858018279075623,
      "learning_rate": 5.765920399870917e-07,
      "loss": 0.4559,
      "step": 13870
    },
    {
      "epoch": 2.851474971733991,
      "grad_norm": 0.24785396456718445,
      "learning_rate": 5.749993460520242e-07,
      "loss": 0.3848,
      "step": 13871
    },
    {
      "epoch": 2.8516805427073697,
      "grad_norm": 0.23876793682575226,
      "learning_rate": 5.734088407290933e-07,
      "loss": 0.4002,
      "step": 13872
    },
    {
      "epoch": 2.8518861136807483,
      "grad_norm": 0.12341229617595673,
      "learning_rate": 5.718205240966662e-07,
      "loss": 0.4539,
      "step": 13873
    },
    {
      "epoch": 2.852091684654127,
      "grad_norm": 0.23897776007652283,
      "learning_rate": 5.702343962329803e-07,
      "loss": 0.3986,
      "step": 13874
    },
    {
      "epoch": 2.8522972556275055,
      "grad_norm": 0.11988009512424469,
      "learning_rate": 5.686504572161833e-07,
      "loss": 0.4562,
      "step": 13875
    },
    {
      "epoch": 2.852502826600884,
      "grad_norm": 0.23703759908676147,
      "learning_rate": 5.670687071243075e-07,
      "loss": 0.382,
      "step": 13876
    },
    {
      "epoch": 2.8527083975742626,
      "grad_norm": 0.23015399277210236,
      "learning_rate": 5.654891460352707e-07,
      "loss": 0.3671,
      "step": 13877
    },
    {
      "epoch": 2.8529139685476412,
      "grad_norm": 0.23037444055080414,
      "learning_rate": 5.639117740269056e-07,
      "loss": 0.3773,
      "step": 13878
    },
    {
      "epoch": 2.8531195395210194,
      "grad_norm": 0.2336786836385727,
      "learning_rate": 5.623365911769102e-07,
      "loss": 0.385,
      "step": 13879
    },
    {
      "epoch": 2.8533251104943984,
      "grad_norm": 0.24950271844863892,
      "learning_rate": 5.607635975628922e-07,
      "loss": 0.3763,
      "step": 13880
    },
    {
      "epoch": 2.8535306814677766,
      "grad_norm": 0.2312586009502411,
      "learning_rate": 5.591927932623397e-07,
      "loss": 0.3725,
      "step": 13881
    },
    {
      "epoch": 2.8537362524411556,
      "grad_norm": 0.23014506697654724,
      "learning_rate": 5.57624178352646e-07,
      "loss": 0.3614,
      "step": 13882
    },
    {
      "epoch": 2.8539418234145337,
      "grad_norm": 0.22436246275901794,
      "learning_rate": 5.560577529110839e-07,
      "loss": 0.3772,
      "step": 13883
    },
    {
      "epoch": 2.8541473943879123,
      "grad_norm": 0.12695522606372833,
      "learning_rate": 5.544935170148218e-07,
      "loss": 0.4635,
      "step": 13884
    },
    {
      "epoch": 2.854352965361291,
      "grad_norm": 0.24410668015480042,
      "learning_rate": 5.529314707409333e-07,
      "loss": 0.378,
      "step": 13885
    },
    {
      "epoch": 2.8545585363346695,
      "grad_norm": 0.12377558648586273,
      "learning_rate": 5.513716141663616e-07,
      "loss": 0.435,
      "step": 13886
    },
    {
      "epoch": 2.854764107308048,
      "grad_norm": 0.24002113938331604,
      "learning_rate": 5.498139473679603e-07,
      "loss": 0.3777,
      "step": 13887
    },
    {
      "epoch": 2.8549696782814267,
      "grad_norm": 0.23580054938793182,
      "learning_rate": 5.48258470422463e-07,
      "loss": 0.3832,
      "step": 13888
    },
    {
      "epoch": 2.8551752492548053,
      "grad_norm": 0.23273934423923492,
      "learning_rate": 5.467051834065084e-07,
      "loss": 0.3725,
      "step": 13889
    },
    {
      "epoch": 2.855380820228184,
      "grad_norm": 0.23366734385490417,
      "learning_rate": 5.451540863966103e-07,
      "loss": 0.3706,
      "step": 13890
    },
    {
      "epoch": 2.8555863912015624,
      "grad_norm": 0.11989044398069382,
      "learning_rate": 5.436051794691926e-07,
      "loss": 0.4374,
      "step": 13891
    },
    {
      "epoch": 2.855791962174941,
      "grad_norm": 0.22055114805698395,
      "learning_rate": 5.420584627005593e-07,
      "loss": 0.3711,
      "step": 13892
    },
    {
      "epoch": 2.8559975331483196,
      "grad_norm": 0.12336910516023636,
      "learning_rate": 5.405139361669093e-07,
      "loss": 0.444,
      "step": 13893
    },
    {
      "epoch": 2.8562031041216978,
      "grad_norm": 0.1187121644616127,
      "learning_rate": 5.389715999443318e-07,
      "loss": 0.4488,
      "step": 13894
    },
    {
      "epoch": 2.856408675095077,
      "grad_norm": 0.21668803691864014,
      "learning_rate": 5.37431454108816e-07,
      "loss": 0.3714,
      "step": 13895
    },
    {
      "epoch": 2.856614246068455,
      "grad_norm": 0.11917508393526077,
      "learning_rate": 5.358934987362363e-07,
      "loss": 0.4409,
      "step": 13896
    },
    {
      "epoch": 2.856819817041834,
      "grad_norm": 0.22866788506507874,
      "learning_rate": 5.34357733902357e-07,
      "loss": 0.3774,
      "step": 13897
    },
    {
      "epoch": 2.857025388015212,
      "grad_norm": 0.12167064100503922,
      "learning_rate": 5.328241596828376e-07,
      "loss": 0.452,
      "step": 13898
    },
    {
      "epoch": 2.8572309589885907,
      "grad_norm": 0.12296809256076813,
      "learning_rate": 5.312927761532377e-07,
      "loss": 0.4389,
      "step": 13899
    },
    {
      "epoch": 2.8574365299619693,
      "grad_norm": 0.24001666903495789,
      "learning_rate": 5.297635833889969e-07,
      "loss": 0.3771,
      "step": 13900
    },
    {
      "epoch": 2.857642100935348,
      "grad_norm": 0.22801834344863892,
      "learning_rate": 5.2823658146545e-07,
      "loss": 0.3763,
      "step": 13901
    },
    {
      "epoch": 2.8578476719087265,
      "grad_norm": 0.22676675021648407,
      "learning_rate": 5.267117704578267e-07,
      "loss": 0.3693,
      "step": 13902
    },
    {
      "epoch": 2.858053242882105,
      "grad_norm": 0.2277052402496338,
      "learning_rate": 5.251891504412421e-07,
      "loss": 0.3509,
      "step": 13903
    },
    {
      "epoch": 2.8582588138554836,
      "grad_norm": 0.22454136610031128,
      "learning_rate": 5.23668721490716e-07,
      "loss": 0.3813,
      "step": 13904
    },
    {
      "epoch": 2.8584643848288622,
      "grad_norm": 0.2237093299627304,
      "learning_rate": 5.221504836811486e-07,
      "loss": 0.3734,
      "step": 13905
    },
    {
      "epoch": 2.858669955802241,
      "grad_norm": 0.24160228669643402,
      "learning_rate": 5.2063443708734e-07,
      "loss": 0.3786,
      "step": 13906
    },
    {
      "epoch": 2.8588755267756194,
      "grad_norm": 0.2331501841545105,
      "learning_rate": 5.191205817839806e-07,
      "loss": 0.3789,
      "step": 13907
    },
    {
      "epoch": 2.859081097748998,
      "grad_norm": 0.24461065232753754,
      "learning_rate": 5.176089178456406e-07,
      "loss": 0.3826,
      "step": 13908
    },
    {
      "epoch": 2.859286668722376,
      "grad_norm": 0.22187209129333496,
      "learning_rate": 5.160994453468055e-07,
      "loss": 0.364,
      "step": 13909
    },
    {
      "epoch": 2.859492239695755,
      "grad_norm": 0.232316792011261,
      "learning_rate": 5.145921643618257e-07,
      "loss": 0.3813,
      "step": 13910
    },
    {
      "epoch": 2.8596978106691333,
      "grad_norm": 0.22536687552928925,
      "learning_rate": 5.130870749649669e-07,
      "loss": 0.3738,
      "step": 13911
    },
    {
      "epoch": 2.8599033816425123,
      "grad_norm": 0.2332964688539505,
      "learning_rate": 5.115841772303798e-07,
      "loss": 0.376,
      "step": 13912
    },
    {
      "epoch": 2.8601089526158905,
      "grad_norm": 0.23040318489074707,
      "learning_rate": 5.100834712321001e-07,
      "loss": 0.3887,
      "step": 13913
    },
    {
      "epoch": 2.860314523589269,
      "grad_norm": 0.2240133285522461,
      "learning_rate": 5.085849570440638e-07,
      "loss": 0.3693,
      "step": 13914
    },
    {
      "epoch": 2.8605200945626477,
      "grad_norm": 0.2326270490884781,
      "learning_rate": 5.070886347400966e-07,
      "loss": 0.3749,
      "step": 13915
    },
    {
      "epoch": 2.8607256655360263,
      "grad_norm": 0.12496310472488403,
      "learning_rate": 5.055945043939098e-07,
      "loss": 0.4531,
      "step": 13916
    },
    {
      "epoch": 2.860931236509405,
      "grad_norm": 0.12099100649356842,
      "learning_rate": 5.041025660791193e-07,
      "loss": 0.4613,
      "step": 13917
    },
    {
      "epoch": 2.8611368074827834,
      "grad_norm": 0.23122435808181763,
      "learning_rate": 5.026128198692165e-07,
      "loss": 0.3912,
      "step": 13918
    },
    {
      "epoch": 2.861342378456162,
      "grad_norm": 0.24232856929302216,
      "learning_rate": 5.011252658376025e-07,
      "loss": 0.3617,
      "step": 13919
    },
    {
      "epoch": 2.8615479494295406,
      "grad_norm": 0.2327503263950348,
      "learning_rate": 4.996399040575589e-07,
      "loss": 0.3817,
      "step": 13920
    },
    {
      "epoch": 2.861753520402919,
      "grad_norm": 0.2326626479625702,
      "learning_rate": 4.981567346022619e-07,
      "loss": 0.3987,
      "step": 13921
    },
    {
      "epoch": 2.861959091376298,
      "grad_norm": 0.22813312709331512,
      "learning_rate": 4.966757575447833e-07,
      "loss": 0.3884,
      "step": 13922
    },
    {
      "epoch": 2.8621646623496764,
      "grad_norm": 0.22625859081745148,
      "learning_rate": 4.951969729580846e-07,
      "loss": 0.3947,
      "step": 13923
    },
    {
      "epoch": 2.8623702333230545,
      "grad_norm": 0.23106195032596588,
      "learning_rate": 4.937203809150126e-07,
      "loss": 0.376,
      "step": 13924
    },
    {
      "epoch": 2.8625758042964335,
      "grad_norm": 0.1207781508564949,
      "learning_rate": 4.92245981488319e-07,
      "loss": 0.4405,
      "step": 13925
    },
    {
      "epoch": 2.8627813752698117,
      "grad_norm": 0.232728511095047,
      "learning_rate": 4.907737747506308e-07,
      "loss": 0.3792,
      "step": 13926
    },
    {
      "epoch": 2.8629869462431907,
      "grad_norm": 0.2338234782218933,
      "learning_rate": 4.893037607744849e-07,
      "loss": 0.3716,
      "step": 13927
    },
    {
      "epoch": 2.863192517216569,
      "grad_norm": 0.24571533501148224,
      "learning_rate": 4.878359396323035e-07,
      "loss": 0.3928,
      "step": 13928
    },
    {
      "epoch": 2.8633980881899475,
      "grad_norm": 0.23208092153072357,
      "learning_rate": 4.863703113963986e-07,
      "loss": 0.3748,
      "step": 13929
    },
    {
      "epoch": 2.863603659163326,
      "grad_norm": 0.23107780516147614,
      "learning_rate": 4.849068761389675e-07,
      "loss": 0.3716,
      "step": 13930
    },
    {
      "epoch": 2.8638092301367046,
      "grad_norm": 0.12082730978727341,
      "learning_rate": 4.834456339321075e-07,
      "loss": 0.4541,
      "step": 13931
    },
    {
      "epoch": 2.864014801110083,
      "grad_norm": 0.12191561609506607,
      "learning_rate": 4.819865848478212e-07,
      "loss": 0.4471,
      "step": 13932
    },
    {
      "epoch": 2.864220372083462,
      "grad_norm": 0.23875342309474945,
      "learning_rate": 4.805297289579708e-07,
      "loss": 0.4194,
      "step": 13933
    },
    {
      "epoch": 2.8644259430568404,
      "grad_norm": 0.22163498401641846,
      "learning_rate": 4.790750663343391e-07,
      "loss": 0.3613,
      "step": 13934
    },
    {
      "epoch": 2.864631514030219,
      "grad_norm": 0.24136824905872345,
      "learning_rate": 4.776225970485937e-07,
      "loss": 0.3839,
      "step": 13935
    },
    {
      "epoch": 2.8648370850035976,
      "grad_norm": 0.22400477528572083,
      "learning_rate": 4.761723211722824e-07,
      "loss": 0.3655,
      "step": 13936
    },
    {
      "epoch": 2.865042655976976,
      "grad_norm": 0.23349706828594208,
      "learning_rate": 4.7472423877685804e-07,
      "loss": 0.3814,
      "step": 13937
    },
    {
      "epoch": 2.8652482269503547,
      "grad_norm": 0.24638283252716064,
      "learning_rate": 4.732783499336585e-07,
      "loss": 0.3953,
      "step": 13938
    },
    {
      "epoch": 2.865453797923733,
      "grad_norm": 0.23078061640262604,
      "learning_rate": 4.718346547139119e-07,
      "loss": 0.3858,
      "step": 13939
    },
    {
      "epoch": 2.865659368897112,
      "grad_norm": 0.23065340518951416,
      "learning_rate": 4.7039315318875623e-07,
      "loss": 0.3522,
      "step": 13940
    },
    {
      "epoch": 2.86586493987049,
      "grad_norm": 0.22871986031532288,
      "learning_rate": 4.6895384542919477e-07,
      "loss": 0.3913,
      "step": 13941
    },
    {
      "epoch": 2.866070510843869,
      "grad_norm": 0.23301458358764648,
      "learning_rate": 4.6751673150614575e-07,
      "loss": 0.3834,
      "step": 13942
    },
    {
      "epoch": 2.8662760818172472,
      "grad_norm": 0.22655089199543,
      "learning_rate": 4.6608181149039757e-07,
      "loss": 0.3899,
      "step": 13943
    },
    {
      "epoch": 2.866481652790626,
      "grad_norm": 0.12195513397455215,
      "learning_rate": 4.646490854526486e-07,
      "loss": 0.4349,
      "step": 13944
    },
    {
      "epoch": 2.8666872237640044,
      "grad_norm": 0.23551727831363678,
      "learning_rate": 4.6321855346348254e-07,
      "loss": 0.3738,
      "step": 13945
    },
    {
      "epoch": 2.866892794737383,
      "grad_norm": 0.23190248012542725,
      "learning_rate": 4.617902155933679e-07,
      "loss": 0.3944,
      "step": 13946
    },
    {
      "epoch": 2.8670983657107616,
      "grad_norm": 0.22424408793449402,
      "learning_rate": 4.6036407191268337e-07,
      "loss": 0.3904,
      "step": 13947
    },
    {
      "epoch": 2.86730393668414,
      "grad_norm": 0.11816349625587463,
      "learning_rate": 4.5894012249168285e-07,
      "loss": 0.4426,
      "step": 13948
    },
    {
      "epoch": 2.8675095076575188,
      "grad_norm": 0.22937704622745514,
      "learning_rate": 4.5751836740052015e-07,
      "loss": 0.3796,
      "step": 13949
    },
    {
      "epoch": 2.8677150786308974,
      "grad_norm": 0.11853787302970886,
      "learning_rate": 4.560988067092342e-07,
      "loss": 0.4408,
      "step": 13950
    },
    {
      "epoch": 2.867920649604276,
      "grad_norm": 0.23124562203884125,
      "learning_rate": 4.5468144048776416e-07,
      "loss": 0.3838,
      "step": 13951
    },
    {
      "epoch": 2.8681262205776545,
      "grad_norm": 0.23542582988739014,
      "learning_rate": 4.5326626880593416e-07,
      "loss": 0.3749,
      "step": 13952
    },
    {
      "epoch": 2.868331791551033,
      "grad_norm": 0.22498956322669983,
      "learning_rate": 4.5185329173346334e-07,
      "loss": 0.3877,
      "step": 13953
    },
    {
      "epoch": 2.8685373625244117,
      "grad_norm": 0.12203694880008698,
      "learning_rate": 4.5044250933996615e-07,
      "loss": 0.4589,
      "step": 13954
    },
    {
      "epoch": 2.8687429334977903,
      "grad_norm": 0.22876019775867462,
      "learning_rate": 4.490339216949369e-07,
      "loss": 0.3773,
      "step": 13955
    },
    {
      "epoch": 2.8689485044711684,
      "grad_norm": 0.22930005192756653,
      "learning_rate": 4.4762752886778004e-07,
      "loss": 0.3838,
      "step": 13956
    },
    {
      "epoch": 2.8691540754445475,
      "grad_norm": 0.2380819171667099,
      "learning_rate": 4.4622333092777524e-07,
      "loss": 0.3939,
      "step": 13957
    },
    {
      "epoch": 2.8693596464179256,
      "grad_norm": 0.24039901793003082,
      "learning_rate": 4.4482132794410714e-07,
      "loss": 0.3881,
      "step": 13958
    },
    {
      "epoch": 2.869565217391304,
      "grad_norm": 0.2359398603439331,
      "learning_rate": 4.434215199858355e-07,
      "loss": 0.386,
      "step": 13959
    },
    {
      "epoch": 2.869770788364683,
      "grad_norm": 0.12011504173278809,
      "learning_rate": 4.420239071219301e-07,
      "loss": 0.4551,
      "step": 13960
    },
    {
      "epoch": 2.8699763593380614,
      "grad_norm": 0.2287997305393219,
      "learning_rate": 4.406284894212459e-07,
      "loss": 0.3777,
      "step": 13961
    },
    {
      "epoch": 2.87018193031144,
      "grad_norm": 0.21278510987758636,
      "learning_rate": 4.392352669525279e-07,
      "loss": 0.3631,
      "step": 13962
    },
    {
      "epoch": 2.8703875012848186,
      "grad_norm": 0.23229098320007324,
      "learning_rate": 4.3784423978441125e-07,
      "loss": 0.384,
      "step": 13963
    },
    {
      "epoch": 2.870593072258197,
      "grad_norm": 0.2308778017759323,
      "learning_rate": 4.3645540798542605e-07,
      "loss": 0.394,
      "step": 13964
    },
    {
      "epoch": 2.8707986432315757,
      "grad_norm": 0.23160767555236816,
      "learning_rate": 4.3506877162399263e-07,
      "loss": 0.3779,
      "step": 13965
    },
    {
      "epoch": 2.8710042142049543,
      "grad_norm": 0.23534901440143585,
      "learning_rate": 4.336843307684213e-07,
      "loss": 0.365,
      "step": 13966
    },
    {
      "epoch": 2.871209785178333,
      "grad_norm": 0.11934797465801239,
      "learning_rate": 4.323020854869225e-07,
      "loss": 0.4542,
      "step": 13967
    },
    {
      "epoch": 2.8714153561517115,
      "grad_norm": 0.11757402122020721,
      "learning_rate": 4.3092203584759185e-07,
      "loss": 0.4468,
      "step": 13968
    },
    {
      "epoch": 2.87162092712509,
      "grad_norm": 0.22173817455768585,
      "learning_rate": 4.2954418191841484e-07,
      "loss": 0.3748,
      "step": 13969
    },
    {
      "epoch": 2.8718264980984687,
      "grad_norm": 0.23279330134391785,
      "learning_rate": 4.281685237672772e-07,
      "loss": 0.3775,
      "step": 13970
    },
    {
      "epoch": 2.872032069071847,
      "grad_norm": 0.23133385181427002,
      "learning_rate": 4.267950614619498e-07,
      "loss": 0.3657,
      "step": 13971
    },
    {
      "epoch": 2.872237640045226,
      "grad_norm": 0.2283874899148941,
      "learning_rate": 4.2542379507009347e-07,
      "loss": 0.3612,
      "step": 13972
    },
    {
      "epoch": 2.872443211018604,
      "grad_norm": 0.12400206178426743,
      "learning_rate": 4.240547246592641e-07,
      "loss": 0.4621,
      "step": 13973
    },
    {
      "epoch": 2.8726487819919826,
      "grad_norm": 0.22691883146762848,
      "learning_rate": 4.2268785029690783e-07,
      "loss": 0.362,
      "step": 13974
    },
    {
      "epoch": 2.872854352965361,
      "grad_norm": 0.23167765140533447,
      "learning_rate": 4.2132317205037573e-07,
      "loss": 0.3854,
      "step": 13975
    },
    {
      "epoch": 2.8730599239387398,
      "grad_norm": 0.26033303141593933,
      "learning_rate": 4.199606899868841e-07,
      "loss": 0.3508,
      "step": 13976
    },
    {
      "epoch": 2.8732654949121184,
      "grad_norm": 0.22448518872261047,
      "learning_rate": 4.186004041735642e-07,
      "loss": 0.3895,
      "step": 13977
    },
    {
      "epoch": 2.873471065885497,
      "grad_norm": 0.11807616800069809,
      "learning_rate": 4.1724231467743236e-07,
      "loss": 0.4393,
      "step": 13978
    },
    {
      "epoch": 2.8736766368588755,
      "grad_norm": 0.23837019503116608,
      "learning_rate": 4.1588642156539014e-07,
      "loss": 0.4048,
      "step": 13979
    },
    {
      "epoch": 2.873882207832254,
      "grad_norm": 0.24100029468536377,
      "learning_rate": 4.145327249042391e-07,
      "loss": 0.3877,
      "step": 13980
    },
    {
      "epoch": 2.8740877788056327,
      "grad_norm": 0.23236291110515594,
      "learning_rate": 4.131812247606659e-07,
      "loss": 0.3805,
      "step": 13981
    },
    {
      "epoch": 2.8742933497790113,
      "grad_norm": 0.234677255153656,
      "learning_rate": 4.1183192120125723e-07,
      "loss": 0.3882,
      "step": 13982
    },
    {
      "epoch": 2.87449892075239,
      "grad_norm": 0.22873461246490479,
      "learning_rate": 4.10484814292485e-07,
      "loss": 0.3691,
      "step": 13983
    },
    {
      "epoch": 2.8747044917257685,
      "grad_norm": 0.22885732352733612,
      "learning_rate": 4.09139904100716e-07,
      "loss": 0.3814,
      "step": 13984
    },
    {
      "epoch": 2.874910062699147,
      "grad_norm": 0.23706702888011932,
      "learning_rate": 4.0779719069220735e-07,
      "loss": 0.3747,
      "step": 13985
    },
    {
      "epoch": 2.875115633672525,
      "grad_norm": 0.22555503249168396,
      "learning_rate": 4.0645667413310605e-07,
      "loss": 0.3678,
      "step": 13986
    },
    {
      "epoch": 2.8753212046459042,
      "grad_norm": 0.11815163493156433,
      "learning_rate": 4.0511835448945934e-07,
      "loss": 0.4461,
      "step": 13987
    },
    {
      "epoch": 2.8755267756192824,
      "grad_norm": 0.23131482303142548,
      "learning_rate": 4.0378223182718943e-07,
      "loss": 0.3946,
      "step": 13988
    },
    {
      "epoch": 2.875732346592661,
      "grad_norm": 0.22287005186080933,
      "learning_rate": 4.024483062121287e-07,
      "loss": 0.3732,
      "step": 13989
    },
    {
      "epoch": 2.8759379175660396,
      "grad_norm": 0.22222553193569183,
      "learning_rate": 4.011165777099896e-07,
      "loss": 0.3618,
      "step": 13990
    },
    {
      "epoch": 2.876143488539418,
      "grad_norm": 0.22416678071022034,
      "learning_rate": 3.9978704638638455e-07,
      "loss": 0.3859,
      "step": 13991
    },
    {
      "epoch": 2.8763490595127967,
      "grad_norm": 0.23659634590148926,
      "learning_rate": 3.984597123068112e-07,
      "loss": 0.3624,
      "step": 13992
    },
    {
      "epoch": 2.8765546304861753,
      "grad_norm": 0.12456272542476654,
      "learning_rate": 3.971345755366623e-07,
      "loss": 0.4535,
      "step": 13993
    },
    {
      "epoch": 2.876760201459554,
      "grad_norm": 0.23349931836128235,
      "learning_rate": 3.9581163614121564e-07,
      "loss": 0.3767,
      "step": 13994
    },
    {
      "epoch": 2.8769657724329325,
      "grad_norm": 0.2434905469417572,
      "learning_rate": 3.94490894185649e-07,
      "loss": 0.3731,
      "step": 13995
    },
    {
      "epoch": 2.877171343406311,
      "grad_norm": 0.12112405896186829,
      "learning_rate": 3.9317234973503536e-07,
      "loss": 0.4481,
      "step": 13996
    },
    {
      "epoch": 2.8773769143796897,
      "grad_norm": 0.22560545802116394,
      "learning_rate": 3.9185600285432777e-07,
      "loss": 0.3906,
      "step": 13997
    },
    {
      "epoch": 2.8775824853530683,
      "grad_norm": 0.12590011954307556,
      "learning_rate": 3.905418536083744e-07,
      "loss": 0.4603,
      "step": 13998
    },
    {
      "epoch": 2.877788056326447,
      "grad_norm": 0.11752758920192719,
      "learning_rate": 3.8922990206191833e-07,
      "loss": 0.4465,
      "step": 13999
    },
    {
      "epoch": 2.8779936272998254,
      "grad_norm": 0.22191815078258514,
      "learning_rate": 3.87920148279598e-07,
      "loss": 0.3697,
      "step": 14000
    },
    {
      "epoch": 2.8781991982732036,
      "grad_norm": 0.23301634192466736,
      "learning_rate": 3.866125923259367e-07,
      "loss": 0.3553,
      "step": 14001
    },
    {
      "epoch": 2.8784047692465826,
      "grad_norm": 0.22838152945041656,
      "learning_rate": 3.8530723426534797e-07,
      "loss": 0.3772,
      "step": 14002
    },
    {
      "epoch": 2.8786103402199608,
      "grad_norm": 0.2294638454914093,
      "learning_rate": 3.840040741621404e-07,
      "loss": 0.3832,
      "step": 14003
    },
    {
      "epoch": 2.8788159111933393,
      "grad_norm": 0.24881219863891602,
      "learning_rate": 3.8270311208052246e-07,
      "loss": 0.3631,
      "step": 14004
    },
    {
      "epoch": 2.879021482166718,
      "grad_norm": 0.2229405790567398,
      "learning_rate": 3.81404348084583e-07,
      "loss": 0.3767,
      "step": 14005
    },
    {
      "epoch": 2.8792270531400965,
      "grad_norm": 0.11796759814023972,
      "learning_rate": 3.801077822383009e-07,
      "loss": 0.4422,
      "step": 14006
    },
    {
      "epoch": 2.879432624113475,
      "grad_norm": 0.23424452543258667,
      "learning_rate": 3.7881341460555496e-07,
      "loss": 0.3664,
      "step": 14007
    },
    {
      "epoch": 2.8796381950868537,
      "grad_norm": 0.23670734465122223,
      "learning_rate": 3.775212452501192e-07,
      "loss": 0.3929,
      "step": 14008
    },
    {
      "epoch": 2.8798437660602323,
      "grad_norm": 0.12096056342124939,
      "learning_rate": 3.762312742356378e-07,
      "loss": 0.4595,
      "step": 14009
    },
    {
      "epoch": 2.880049337033611,
      "grad_norm": 0.2295764833688736,
      "learning_rate": 3.749435016256747e-07,
      "loss": 0.3821,
      "step": 14010
    },
    {
      "epoch": 2.8802549080069895,
      "grad_norm": 0.2285950481891632,
      "learning_rate": 3.7365792748366934e-07,
      "loss": 0.3757,
      "step": 14011
    },
    {
      "epoch": 2.880460478980368,
      "grad_norm": 0.12199006229639053,
      "learning_rate": 3.72374551872956e-07,
      "loss": 0.4473,
      "step": 14012
    },
    {
      "epoch": 2.8806660499537466,
      "grad_norm": 0.22347088158130646,
      "learning_rate": 3.710933748567541e-07,
      "loss": 0.3702,
      "step": 14013
    },
    {
      "epoch": 2.8808716209271252,
      "grad_norm": 0.23266130685806274,
      "learning_rate": 3.698143964981932e-07,
      "loss": 0.3802,
      "step": 14014
    },
    {
      "epoch": 2.881077191900504,
      "grad_norm": 0.23003004491329193,
      "learning_rate": 3.6853761686026776e-07,
      "loss": 0.3668,
      "step": 14015
    },
    {
      "epoch": 2.881282762873882,
      "grad_norm": 0.22506079077720642,
      "learning_rate": 3.672630360058926e-07,
      "loss": 0.3672,
      "step": 14016
    },
    {
      "epoch": 2.881488333847261,
      "grad_norm": 0.23392482101917267,
      "learning_rate": 3.659906539978575e-07,
      "loss": 0.3907,
      "step": 14017
    },
    {
      "epoch": 2.881693904820639,
      "grad_norm": 0.22708185017108917,
      "learning_rate": 3.647204708988422e-07,
      "loss": 0.3736,
      "step": 14018
    },
    {
      "epoch": 2.8818994757940177,
      "grad_norm": 0.11717811226844788,
      "learning_rate": 3.6345248677142176e-07,
      "loss": 0.4522,
      "step": 14019
    },
    {
      "epoch": 2.8821050467673963,
      "grad_norm": 0.22868549823760986,
      "learning_rate": 3.621867016780661e-07,
      "loss": 0.3855,
      "step": 14020
    },
    {
      "epoch": 2.882310617740775,
      "grad_norm": 0.12395808845758438,
      "learning_rate": 3.6092311568113546e-07,
      "loss": 0.4369,
      "step": 14021
    },
    {
      "epoch": 2.8825161887141535,
      "grad_norm": 0.22594808042049408,
      "learning_rate": 3.5966172884287995e-07,
      "loss": 0.3708,
      "step": 14022
    },
    {
      "epoch": 2.882721759687532,
      "grad_norm": 0.11887579411268234,
      "learning_rate": 3.5840254122544495e-07,
      "loss": 0.4554,
      "step": 14023
    },
    {
      "epoch": 2.8829273306609107,
      "grad_norm": 0.12510953843593597,
      "learning_rate": 3.571455528908657e-07,
      "loss": 0.4457,
      "step": 14024
    },
    {
      "epoch": 2.8831329016342893,
      "grad_norm": 0.22904570400714874,
      "learning_rate": 3.558907639010628e-07,
      "loss": 0.3703,
      "step": 14025
    },
    {
      "epoch": 2.883338472607668,
      "grad_norm": 0.24266590178012848,
      "learning_rate": 3.5463817431785176e-07,
      "loss": 0.3713,
      "step": 14026
    },
    {
      "epoch": 2.8835440435810464,
      "grad_norm": 0.22441810369491577,
      "learning_rate": 3.5338778420294817e-07,
      "loss": 0.4028,
      "step": 14027
    },
    {
      "epoch": 2.883749614554425,
      "grad_norm": 0.23846034705638885,
      "learning_rate": 3.521395936179528e-07,
      "loss": 0.3993,
      "step": 14028
    },
    {
      "epoch": 2.8839551855278036,
      "grad_norm": 0.2247145175933838,
      "learning_rate": 3.5089360262435146e-07,
      "loss": 0.3895,
      "step": 14029
    },
    {
      "epoch": 2.884160756501182,
      "grad_norm": 0.2352132946252823,
      "learning_rate": 3.4964981128354e-07,
      "loss": 0.3754,
      "step": 14030
    },
    {
      "epoch": 2.8843663274745603,
      "grad_norm": 0.22683286666870117,
      "learning_rate": 3.484082196567795e-07,
      "loss": 0.3893,
      "step": 14031
    },
    {
      "epoch": 2.8845718984479394,
      "grad_norm": 0.2301369607448578,
      "learning_rate": 3.4716882780525097e-07,
      "loss": 0.3909,
      "step": 14032
    },
    {
      "epoch": 2.8847774694213175,
      "grad_norm": 0.23967629671096802,
      "learning_rate": 3.4593163579000553e-07,
      "loss": 0.3981,
      "step": 14033
    },
    {
      "epoch": 2.884983040394696,
      "grad_norm": 0.2322077453136444,
      "learning_rate": 3.446966436719945e-07,
      "loss": 0.3826,
      "step": 14034
    },
    {
      "epoch": 2.8851886113680747,
      "grad_norm": 0.12146010994911194,
      "learning_rate": 3.4346385151206416e-07,
      "loss": 0.4504,
      "step": 14035
    },
    {
      "epoch": 2.8853941823414533,
      "grad_norm": 0.24295859038829803,
      "learning_rate": 3.4223325937094096e-07,
      "loss": 0.369,
      "step": 14036
    },
    {
      "epoch": 2.885599753314832,
      "grad_norm": 0.24125894904136658,
      "learning_rate": 3.410048673092614e-07,
      "loss": 0.3895,
      "step": 14037
    },
    {
      "epoch": 2.8858053242882105,
      "grad_norm": 0.11830901354551315,
      "learning_rate": 3.397786753875321e-07,
      "loss": 0.4409,
      "step": 14038
    },
    {
      "epoch": 2.886010895261589,
      "grad_norm": 0.2366967350244522,
      "learning_rate": 3.385546836661696e-07,
      "loss": 0.3942,
      "step": 14039
    },
    {
      "epoch": 2.8862164662349676,
      "grad_norm": 0.22245019674301147,
      "learning_rate": 3.373328922054658e-07,
      "loss": 0.3795,
      "step": 14040
    },
    {
      "epoch": 2.8864220372083462,
      "grad_norm": 0.12539364397525787,
      "learning_rate": 3.3611330106561754e-07,
      "loss": 0.4422,
      "step": 14041
    },
    {
      "epoch": 2.886627608181725,
      "grad_norm": 0.22733426094055176,
      "learning_rate": 3.3489591030671174e-07,
      "loss": 0.3805,
      "step": 14042
    },
    {
      "epoch": 2.8868331791551034,
      "grad_norm": 0.24280138313770294,
      "learning_rate": 3.336807199887204e-07,
      "loss": 0.3993,
      "step": 14043
    },
    {
      "epoch": 2.887038750128482,
      "grad_norm": 0.12910622358322144,
      "learning_rate": 3.3246773017151066e-07,
      "loss": 0.4552,
      "step": 14044
    },
    {
      "epoch": 2.8872443211018606,
      "grad_norm": 0.11929771304130554,
      "learning_rate": 3.3125694091483474e-07,
      "loss": 0.4486,
      "step": 14045
    },
    {
      "epoch": 2.8874498920752387,
      "grad_norm": 0.23444950580596924,
      "learning_rate": 3.3004835227835485e-07,
      "loss": 0.3619,
      "step": 14046
    },
    {
      "epoch": 2.8876554630486178,
      "grad_norm": 0.2314281016588211,
      "learning_rate": 3.2884196432160343e-07,
      "loss": 0.3573,
      "step": 14047
    },
    {
      "epoch": 2.887861034021996,
      "grad_norm": 0.22594213485717773,
      "learning_rate": 3.276377771040179e-07,
      "loss": 0.3828,
      "step": 14048
    },
    {
      "epoch": 2.8880666049953745,
      "grad_norm": 0.2312646061182022,
      "learning_rate": 3.264357906849208e-07,
      "loss": 0.3858,
      "step": 14049
    },
    {
      "epoch": 2.888272175968753,
      "grad_norm": 0.23432159423828125,
      "learning_rate": 3.252360051235248e-07,
      "loss": 0.3754,
      "step": 14050
    },
    {
      "epoch": 2.8884777469421317,
      "grad_norm": 0.23932310938835144,
      "learning_rate": 3.240384204789426e-07,
      "loss": 0.3918,
      "step": 14051
    },
    {
      "epoch": 2.8886833179155103,
      "grad_norm": 0.2506803572177887,
      "learning_rate": 3.2284303681017203e-07,
      "loss": 0.368,
      "step": 14052
    },
    {
      "epoch": 2.888888888888889,
      "grad_norm": 0.22862713038921356,
      "learning_rate": 3.2164985417610596e-07,
      "loss": 0.3896,
      "step": 14053
    },
    {
      "epoch": 2.8890944598622674,
      "grad_norm": 0.23301179707050323,
      "learning_rate": 3.204588726355273e-07,
      "loss": 0.3869,
      "step": 14054
    },
    {
      "epoch": 2.889300030835646,
      "grad_norm": 0.2313561588525772,
      "learning_rate": 3.1927009224710925e-07,
      "loss": 0.3629,
      "step": 14055
    },
    {
      "epoch": 2.8895056018090246,
      "grad_norm": 0.22642727196216583,
      "learning_rate": 3.1808351306941486e-07,
      "loss": 0.3816,
      "step": 14056
    },
    {
      "epoch": 2.889711172782403,
      "grad_norm": 0.2348901927471161,
      "learning_rate": 3.1689913516089743e-07,
      "loss": 0.3855,
      "step": 14057
    },
    {
      "epoch": 2.889916743755782,
      "grad_norm": 0.24844767153263092,
      "learning_rate": 3.1571695857991523e-07,
      "loss": 0.3891,
      "step": 14058
    },
    {
      "epoch": 2.8901223147291604,
      "grad_norm": 0.226862832903862,
      "learning_rate": 3.145369833847067e-07,
      "loss": 0.3812,
      "step": 14059
    },
    {
      "epoch": 2.890327885702539,
      "grad_norm": 0.22782935202121735,
      "learning_rate": 3.1335920963340037e-07,
      "loss": 0.3698,
      "step": 14060
    },
    {
      "epoch": 2.890533456675917,
      "grad_norm": 0.22575967013835907,
      "learning_rate": 3.121836373840198e-07,
      "loss": 0.3807,
      "step": 14061
    },
    {
      "epoch": 2.890739027649296,
      "grad_norm": 0.24145731329917908,
      "learning_rate": 3.110102666944836e-07,
      "loss": 0.3619,
      "step": 14062
    },
    {
      "epoch": 2.8909445986226743,
      "grad_norm": 0.24116384983062744,
      "learning_rate": 3.0983909762259567e-07,
      "loss": 0.3831,
      "step": 14063
    },
    {
      "epoch": 2.8911501695960533,
      "grad_norm": 0.21999165415763855,
      "learning_rate": 3.0867013022604977e-07,
      "loss": 0.3963,
      "step": 14064
    },
    {
      "epoch": 2.8913557405694315,
      "grad_norm": 0.23448392748832703,
      "learning_rate": 3.075033645624448e-07,
      "loss": 0.3707,
      "step": 14065
    },
    {
      "epoch": 2.89156131154281,
      "grad_norm": 0.11776132136583328,
      "learning_rate": 3.063388006892548e-07,
      "loss": 0.4614,
      "step": 14066
    },
    {
      "epoch": 2.8917668825161886,
      "grad_norm": 0.12120406329631805,
      "learning_rate": 3.0517643866385395e-07,
      "loss": 0.4609,
      "step": 14067
    },
    {
      "epoch": 2.891972453489567,
      "grad_norm": 0.22066402435302734,
      "learning_rate": 3.0401627854351133e-07,
      "loss": 0.3709,
      "step": 14068
    },
    {
      "epoch": 2.892178024462946,
      "grad_norm": 0.22971779108047485,
      "learning_rate": 3.0285832038537134e-07,
      "loss": 0.3811,
      "step": 14069
    },
    {
      "epoch": 2.8923835954363244,
      "grad_norm": 0.12074688076972961,
      "learning_rate": 3.0170256424649325e-07,
      "loss": 0.4428,
      "step": 14070
    },
    {
      "epoch": 2.892589166409703,
      "grad_norm": 0.23068879544734955,
      "learning_rate": 3.0054901018380656e-07,
      "loss": 0.3824,
      "step": 14071
    },
    {
      "epoch": 2.8927947373830816,
      "grad_norm": 0.23140643537044525,
      "learning_rate": 2.993976582541458e-07,
      "loss": 0.3776,
      "step": 14072
    },
    {
      "epoch": 2.89300030835646,
      "grad_norm": 0.2334955334663391,
      "learning_rate": 2.982485085142356e-07,
      "loss": 0.3668,
      "step": 14073
    },
    {
      "epoch": 2.8932058793298387,
      "grad_norm": 0.22583140432834625,
      "learning_rate": 2.9710156102068563e-07,
      "loss": 0.3872,
      "step": 14074
    },
    {
      "epoch": 2.8934114503032173,
      "grad_norm": 0.23303750157356262,
      "learning_rate": 2.959568158300008e-07,
      "loss": 0.383,
      "step": 14075
    },
    {
      "epoch": 2.8936170212765955,
      "grad_norm": 0.2299990952014923,
      "learning_rate": 2.948142729985759e-07,
      "loss": 0.36,
      "step": 14076
    },
    {
      "epoch": 2.8938225922499745,
      "grad_norm": 0.12316111475229263,
      "learning_rate": 2.9367393258270094e-07,
      "loss": 0.4644,
      "step": 14077
    },
    {
      "epoch": 2.8940281632233527,
      "grad_norm": 0.24173006415367126,
      "learning_rate": 2.9253579463855097e-07,
      "loss": 0.3787,
      "step": 14078
    },
    {
      "epoch": 2.8942337341967317,
      "grad_norm": 0.12229252606630325,
      "learning_rate": 2.9139985922220114e-07,
      "loss": 0.4535,
      "step": 14079
    },
    {
      "epoch": 2.89443930517011,
      "grad_norm": 0.22947286069393158,
      "learning_rate": 2.9026612638961673e-07,
      "loss": 0.3694,
      "step": 14080
    },
    {
      "epoch": 2.8946448761434884,
      "grad_norm": 0.2314113825559616,
      "learning_rate": 2.8913459619664795e-07,
      "loss": 0.3772,
      "step": 14081
    },
    {
      "epoch": 2.894850447116867,
      "grad_norm": 0.23245009779930115,
      "learning_rate": 2.880052686990353e-07,
      "loss": 0.3879,
      "step": 14082
    },
    {
      "epoch": 2.8950560180902456,
      "grad_norm": 0.23955170810222626,
      "learning_rate": 2.868781439524193e-07,
      "loss": 0.3769,
      "step": 14083
    },
    {
      "epoch": 2.895261589063624,
      "grad_norm": 0.22946025431156158,
      "learning_rate": 2.857532220123305e-07,
      "loss": 0.3739,
      "step": 14084
    },
    {
      "epoch": 2.8954671600370028,
      "grad_norm": 0.22186554968357086,
      "learning_rate": 2.8463050293418946e-07,
      "loss": 0.3714,
      "step": 14085
    },
    {
      "epoch": 2.8956727310103814,
      "grad_norm": 0.24299030005931854,
      "learning_rate": 2.835099867733021e-07,
      "loss": 0.384,
      "step": 14086
    },
    {
      "epoch": 2.89587830198376,
      "grad_norm": 0.24568887054920197,
      "learning_rate": 2.823916735848742e-07,
      "loss": 0.3973,
      "step": 14087
    },
    {
      "epoch": 2.8960838729571385,
      "grad_norm": 0.23442420363426208,
      "learning_rate": 2.812755634239966e-07,
      "loss": 0.3832,
      "step": 14088
    },
    {
      "epoch": 2.896289443930517,
      "grad_norm": 0.22998051345348358,
      "learning_rate": 2.801616563456605e-07,
      "loss": 0.394,
      "step": 14089
    },
    {
      "epoch": 2.8964950149038957,
      "grad_norm": 0.2347511351108551,
      "learning_rate": 2.7904995240473684e-07,
      "loss": 0.3739,
      "step": 14090
    },
    {
      "epoch": 2.896700585877274,
      "grad_norm": 0.11745678633451462,
      "learning_rate": 2.779404516559969e-07,
      "loss": 0.4466,
      "step": 14091
    },
    {
      "epoch": 2.896906156850653,
      "grad_norm": 0.23240487277507782,
      "learning_rate": 2.7683315415410195e-07,
      "loss": 0.3759,
      "step": 14092
    },
    {
      "epoch": 2.897111727824031,
      "grad_norm": 0.23751090466976166,
      "learning_rate": 2.757280599535983e-07,
      "loss": 0.3839,
      "step": 14093
    },
    {
      "epoch": 2.89731729879741,
      "grad_norm": 0.12017631530761719,
      "learning_rate": 2.7462516910893745e-07,
      "loss": 0.4444,
      "step": 14094
    },
    {
      "epoch": 2.897522869770788,
      "grad_norm": 0.22520147264003754,
      "learning_rate": 2.735244816744459e-07,
      "loss": 0.3866,
      "step": 14095
    },
    {
      "epoch": 2.897728440744167,
      "grad_norm": 0.23022042214870453,
      "learning_rate": 2.7242599770435527e-07,
      "loss": 0.3813,
      "step": 14096
    },
    {
      "epoch": 2.8979340117175454,
      "grad_norm": 0.23134584724903107,
      "learning_rate": 2.7132971725277736e-07,
      "loss": 0.3583,
      "step": 14097
    },
    {
      "epoch": 2.898139582690924,
      "grad_norm": 0.23426006734371185,
      "learning_rate": 2.7023564037372383e-07,
      "loss": 0.3785,
      "step": 14098
    },
    {
      "epoch": 2.8983451536643026,
      "grad_norm": 0.11582779884338379,
      "learning_rate": 2.6914376712109166e-07,
      "loss": 0.4577,
      "step": 14099
    },
    {
      "epoch": 2.898550724637681,
      "grad_norm": 0.12305998057126999,
      "learning_rate": 2.6805409754867783e-07,
      "loss": 0.4363,
      "step": 14100
    },
    {
      "epoch": 2.8987562956110597,
      "grad_norm": 0.21655605733394623,
      "learning_rate": 2.6696663171015933e-07,
      "loss": 0.3598,
      "step": 14101
    },
    {
      "epoch": 2.8989618665844383,
      "grad_norm": 0.2362840622663498,
      "learning_rate": 2.658813696591134e-07,
      "loss": 0.3895,
      "step": 14102
    },
    {
      "epoch": 2.899167437557817,
      "grad_norm": 0.11493504792451859,
      "learning_rate": 2.6479831144900714e-07,
      "loss": 0.4464,
      "step": 14103
    },
    {
      "epoch": 2.8993730085311955,
      "grad_norm": 0.2330009937286377,
      "learning_rate": 2.63717457133193e-07,
      "loss": 0.3753,
      "step": 14104
    },
    {
      "epoch": 2.899578579504574,
      "grad_norm": 0.23272541165351868,
      "learning_rate": 2.6263880676492823e-07,
      "loss": 0.3787,
      "step": 14105
    },
    {
      "epoch": 2.8997841504779522,
      "grad_norm": 0.12182939052581787,
      "learning_rate": 2.615623603973405e-07,
      "loss": 0.4519,
      "step": 14106
    },
    {
      "epoch": 2.8999897214513313,
      "grad_norm": 0.23827330768108368,
      "learning_rate": 2.6048811808347227e-07,
      "loss": 0.3878,
      "step": 14107
    },
    {
      "epoch": 2.9001952924247094,
      "grad_norm": 0.22565053403377533,
      "learning_rate": 2.5941607987624626e-07,
      "loss": 0.3737,
      "step": 14108
    },
    {
      "epoch": 2.9004008633980884,
      "grad_norm": 0.22274649143218994,
      "learning_rate": 2.583462458284652e-07,
      "loss": 0.3767,
      "step": 14109
    },
    {
      "epoch": 2.9006064343714666,
      "grad_norm": 0.2283952683210373,
      "learning_rate": 2.57278615992852e-07,
      "loss": 0.3775,
      "step": 14110
    },
    {
      "epoch": 2.900812005344845,
      "grad_norm": 0.2287856638431549,
      "learning_rate": 2.5621319042198945e-07,
      "loss": 0.3912,
      "step": 14111
    },
    {
      "epoch": 2.9010175763182238,
      "grad_norm": 0.23402291536331177,
      "learning_rate": 2.5514996916836564e-07,
      "loss": 0.3875,
      "step": 14112
    },
    {
      "epoch": 2.9012231472916024,
      "grad_norm": 0.23402421176433563,
      "learning_rate": 2.5408895228437366e-07,
      "loss": 0.3955,
      "step": 14113
    },
    {
      "epoch": 2.901428718264981,
      "grad_norm": 0.13076432049274445,
      "learning_rate": 2.530301398222767e-07,
      "loss": 0.4522,
      "step": 14114
    },
    {
      "epoch": 2.9016342892383595,
      "grad_norm": 0.23658651113510132,
      "learning_rate": 2.519735318342331e-07,
      "loss": 0.3845,
      "step": 14115
    },
    {
      "epoch": 2.901839860211738,
      "grad_norm": 0.12689454853534698,
      "learning_rate": 2.509191283723061e-07,
      "loss": 0.4543,
      "step": 14116
    },
    {
      "epoch": 2.9020454311851167,
      "grad_norm": 0.12162572145462036,
      "learning_rate": 2.4986692948843925e-07,
      "loss": 0.4385,
      "step": 14117
    },
    {
      "epoch": 2.9022510021584953,
      "grad_norm": 0.23482230305671692,
      "learning_rate": 2.48816935234461e-07,
      "loss": 0.381,
      "step": 14118
    },
    {
      "epoch": 2.902456573131874,
      "grad_norm": 0.22458134591579437,
      "learning_rate": 2.477691456621051e-07,
      "loss": 0.3733,
      "step": 14119
    },
    {
      "epoch": 2.9026621441052525,
      "grad_norm": 0.22772455215454102,
      "learning_rate": 2.467235608230001e-07,
      "loss": 0.373,
      "step": 14120
    },
    {
      "epoch": 2.902867715078631,
      "grad_norm": 0.22812238335609436,
      "learning_rate": 2.4568018076864484e-07,
      "loss": 0.3561,
      "step": 14121
    },
    {
      "epoch": 2.9030732860520096,
      "grad_norm": 0.24260129034519196,
      "learning_rate": 2.446390055504433e-07,
      "loss": 0.3947,
      "step": 14122
    },
    {
      "epoch": 2.903278857025388,
      "grad_norm": 0.224505215883255,
      "learning_rate": 2.436000352196943e-07,
      "loss": 0.374,
      "step": 14123
    },
    {
      "epoch": 2.903484427998767,
      "grad_norm": 0.22481678426265717,
      "learning_rate": 2.42563269827582e-07,
      "loss": 0.3731,
      "step": 14124
    },
    {
      "epoch": 2.903689998972145,
      "grad_norm": 0.2302400767803192,
      "learning_rate": 2.415287094251756e-07,
      "loss": 0.3861,
      "step": 14125
    },
    {
      "epoch": 2.9038955699455236,
      "grad_norm": 0.12294553965330124,
      "learning_rate": 2.404963540634542e-07,
      "loss": 0.4498,
      "step": 14126
    },
    {
      "epoch": 2.904101140918902,
      "grad_norm": 0.23122653365135193,
      "learning_rate": 2.3946620379327214e-07,
      "loss": 0.3611,
      "step": 14127
    },
    {
      "epoch": 2.9043067118922807,
      "grad_norm": 0.2358085960149765,
      "learning_rate": 2.3843825866537883e-07,
      "loss": 0.396,
      "step": 14128
    },
    {
      "epoch": 2.9045122828656593,
      "grad_norm": 0.12024319916963577,
      "learning_rate": 2.374125187304188e-07,
      "loss": 0.4486,
      "step": 14129
    },
    {
      "epoch": 2.904717853839038,
      "grad_norm": 0.22727903723716736,
      "learning_rate": 2.3638898403892162e-07,
      "loss": 0.355,
      "step": 14130
    },
    {
      "epoch": 2.9049234248124165,
      "grad_norm": 0.23375868797302246,
      "learning_rate": 2.3536765464131695e-07,
      "loss": 0.3874,
      "step": 14131
    },
    {
      "epoch": 2.905128995785795,
      "grad_norm": 0.23008592426776886,
      "learning_rate": 2.343485305879195e-07,
      "loss": 0.4048,
      "step": 14132
    },
    {
      "epoch": 2.9053345667591737,
      "grad_norm": 0.12159692496061325,
      "learning_rate": 2.3333161192893416e-07,
      "loss": 0.4508,
      "step": 14133
    },
    {
      "epoch": 2.9055401377325523,
      "grad_norm": 0.22369949519634247,
      "learning_rate": 2.3231689871446083e-07,
      "loss": 0.3681,
      "step": 14134
    },
    {
      "epoch": 2.905745708705931,
      "grad_norm": 0.23495550453662872,
      "learning_rate": 2.3130439099448953e-07,
      "loss": 0.3923,
      "step": 14135
    },
    {
      "epoch": 2.9059512796793094,
      "grad_norm": 0.23136933147907257,
      "learning_rate": 2.3029408881890535e-07,
      "loss": 0.3752,
      "step": 14136
    },
    {
      "epoch": 2.906156850652688,
      "grad_norm": 0.12260935455560684,
      "learning_rate": 2.292859922374785e-07,
      "loss": 0.4599,
      "step": 14137
    },
    {
      "epoch": 2.906362421626066,
      "grad_norm": 0.22997353971004486,
      "learning_rate": 2.2828010129986922e-07,
      "loss": 0.4108,
      "step": 14138
    },
    {
      "epoch": 2.906567992599445,
      "grad_norm": 0.23019467294216156,
      "learning_rate": 2.2727641605564287e-07,
      "loss": 0.391,
      "step": 14139
    },
    {
      "epoch": 2.9067735635728233,
      "grad_norm": 0.219661682844162,
      "learning_rate": 2.2627493655423492e-07,
      "loss": 0.3711,
      "step": 14140
    },
    {
      "epoch": 2.906979134546202,
      "grad_norm": 0.23242846131324768,
      "learning_rate": 2.252756628449909e-07,
      "loss": 0.3808,
      "step": 14141
    },
    {
      "epoch": 2.9071847055195805,
      "grad_norm": 0.21850700676441193,
      "learning_rate": 2.2427859497713644e-07,
      "loss": 0.3812,
      "step": 14142
    },
    {
      "epoch": 2.907390276492959,
      "grad_norm": 0.22501927614212036,
      "learning_rate": 2.2328373299979723e-07,
      "loss": 0.3597,
      "step": 14143
    },
    {
      "epoch": 2.9075958474663377,
      "grad_norm": 0.22534947097301483,
      "learning_rate": 2.2229107696198403e-07,
      "loss": 0.3821,
      "step": 14144
    },
    {
      "epoch": 2.9078014184397163,
      "grad_norm": 0.22857366502285004,
      "learning_rate": 2.213006269125978e-07,
      "loss": 0.3938,
      "step": 14145
    },
    {
      "epoch": 2.908006989413095,
      "grad_norm": 0.23802757263183594,
      "learning_rate": 2.2031238290042943e-07,
      "loss": 0.3871,
      "step": 14146
    },
    {
      "epoch": 2.9082125603864735,
      "grad_norm": 0.2242707461118698,
      "learning_rate": 2.1932634497417505e-07,
      "loss": 0.3684,
      "step": 14147
    },
    {
      "epoch": 2.908418131359852,
      "grad_norm": 0.12156816571950912,
      "learning_rate": 2.1834251318240573e-07,
      "loss": 0.4328,
      "step": 14148
    },
    {
      "epoch": 2.9086237023332306,
      "grad_norm": 0.22905348241329193,
      "learning_rate": 2.1736088757359274e-07,
      "loss": 0.3772,
      "step": 14149
    },
    {
      "epoch": 2.9088292733066092,
      "grad_norm": 0.2287713587284088,
      "learning_rate": 2.163814681960924e-07,
      "loss": 0.3649,
      "step": 14150
    },
    {
      "epoch": 2.909034844279988,
      "grad_norm": 0.2233857661485672,
      "learning_rate": 2.1540425509816608e-07,
      "loss": 0.3777,
      "step": 14151
    },
    {
      "epoch": 2.9092404152533664,
      "grad_norm": 0.23792994022369385,
      "learning_rate": 2.1442924832794532e-07,
      "loss": 0.3854,
      "step": 14152
    },
    {
      "epoch": 2.9094459862267446,
      "grad_norm": 0.22729872167110443,
      "learning_rate": 2.1345644793346663e-07,
      "loss": 0.3748,
      "step": 14153
    },
    {
      "epoch": 2.9096515572001236,
      "grad_norm": 0.2334190458059311,
      "learning_rate": 2.1248585396265674e-07,
      "loss": 0.3878,
      "step": 14154
    },
    {
      "epoch": 2.9098571281735017,
      "grad_norm": 0.11780460178852081,
      "learning_rate": 2.1151746646333237e-07,
      "loss": 0.4551,
      "step": 14155
    },
    {
      "epoch": 2.9100626991468803,
      "grad_norm": 0.12107131630182266,
      "learning_rate": 2.1055128548320534e-07,
      "loss": 0.4406,
      "step": 14156
    },
    {
      "epoch": 2.910268270120259,
      "grad_norm": 0.12362342327833176,
      "learning_rate": 2.0958731106986762e-07,
      "loss": 0.4345,
      "step": 14157
    },
    {
      "epoch": 2.9104738410936375,
      "grad_norm": 0.12129798531532288,
      "learning_rate": 2.086255432708162e-07,
      "loss": 0.4588,
      "step": 14158
    },
    {
      "epoch": 2.910679412067016,
      "grad_norm": 0.2197551131248474,
      "learning_rate": 2.0766598213342814e-07,
      "loss": 0.3835,
      "step": 14159
    },
    {
      "epoch": 2.9108849830403947,
      "grad_norm": 0.23252207040786743,
      "learning_rate": 2.0670862770498068e-07,
      "loss": 0.3737,
      "step": 14160
    },
    {
      "epoch": 2.9110905540137733,
      "grad_norm": 0.23787905275821686,
      "learning_rate": 2.0575348003263107e-07,
      "loss": 0.3891,
      "step": 14161
    },
    {
      "epoch": 2.911296124987152,
      "grad_norm": 0.3696140944957733,
      "learning_rate": 2.0480053916344666e-07,
      "loss": 0.3845,
      "step": 14162
    },
    {
      "epoch": 2.9115016959605304,
      "grad_norm": 0.22981351613998413,
      "learning_rate": 2.0384980514435993e-07,
      "loss": 0.3624,
      "step": 14163
    },
    {
      "epoch": 2.911707266933909,
      "grad_norm": 0.24888327717781067,
      "learning_rate": 2.0290127802222337e-07,
      "loss": 0.3841,
      "step": 14164
    },
    {
      "epoch": 2.9119128379072876,
      "grad_norm": 0.22164028882980347,
      "learning_rate": 2.0195495784375463e-07,
      "loss": 0.3711,
      "step": 14165
    },
    {
      "epoch": 2.912118408880666,
      "grad_norm": 0.23485776782035828,
      "learning_rate": 2.0101084465558141e-07,
      "loss": 0.3845,
      "step": 14166
    },
    {
      "epoch": 2.912323979854045,
      "grad_norm": 0.23392504453659058,
      "learning_rate": 2.000689385042115e-07,
      "loss": 0.374,
      "step": 14167
    },
    {
      "epoch": 2.912529550827423,
      "grad_norm": 0.23598188161849976,
      "learning_rate": 1.9912923943605278e-07,
      "loss": 0.384,
      "step": 14168
    },
    {
      "epoch": 2.912735121800802,
      "grad_norm": 0.23599591851234436,
      "learning_rate": 1.9819174749739822e-07,
      "loss": 0.3803,
      "step": 14169
    },
    {
      "epoch": 2.91294069277418,
      "grad_norm": 0.22775763273239136,
      "learning_rate": 1.972564627344359e-07,
      "loss": 0.3712,
      "step": 14170
    },
    {
      "epoch": 2.9131462637475587,
      "grad_norm": 0.22463448345661163,
      "learning_rate": 1.9632338519323391e-07,
      "loss": 0.3685,
      "step": 14171
    },
    {
      "epoch": 2.9133518347209373,
      "grad_norm": 0.12167978286743164,
      "learning_rate": 1.9539251491977052e-07,
      "loss": 0.4519,
      "step": 14172
    },
    {
      "epoch": 2.913557405694316,
      "grad_norm": 0.22514131665229797,
      "learning_rate": 1.9446385195990403e-07,
      "loss": 0.368,
      "step": 14173
    },
    {
      "epoch": 2.9137629766676945,
      "grad_norm": 0.22279466688632965,
      "learning_rate": 1.9353739635937784e-07,
      "loss": 0.3641,
      "step": 14174
    },
    {
      "epoch": 2.913968547641073,
      "grad_norm": 0.11997415125370026,
      "learning_rate": 1.9261314816384046e-07,
      "loss": 0.4445,
      "step": 14175
    },
    {
      "epoch": 2.9141741186144516,
      "grad_norm": 0.12249033898115158,
      "learning_rate": 1.9169110741882546e-07,
      "loss": 0.4391,
      "step": 14176
    },
    {
      "epoch": 2.9143796895878302,
      "grad_norm": 0.2306622713804245,
      "learning_rate": 1.907712741697565e-07,
      "loss": 0.3835,
      "step": 14177
    },
    {
      "epoch": 2.914585260561209,
      "grad_norm": 0.11753799021244049,
      "learning_rate": 1.898536484619473e-07,
      "loss": 0.4275,
      "step": 14178
    },
    {
      "epoch": 2.9147908315345874,
      "grad_norm": 0.2338220775127411,
      "learning_rate": 1.8893823034061176e-07,
      "loss": 0.4019,
      "step": 14179
    },
    {
      "epoch": 2.914996402507966,
      "grad_norm": 0.1225576251745224,
      "learning_rate": 1.8802501985083875e-07,
      "loss": 0.4512,
      "step": 14180
    },
    {
      "epoch": 2.9152019734813446,
      "grad_norm": 0.11842867732048035,
      "learning_rate": 1.8711401703762232e-07,
      "loss": 0.4508,
      "step": 14181
    },
    {
      "epoch": 2.915407544454723,
      "grad_norm": 0.22266638278961182,
      "learning_rate": 1.8620522194584156e-07,
      "loss": 0.377,
      "step": 14182
    },
    {
      "epoch": 2.9156131154281013,
      "grad_norm": 0.2360514998435974,
      "learning_rate": 1.8529863462027563e-07,
      "loss": 0.3851,
      "step": 14183
    },
    {
      "epoch": 2.9158186864014803,
      "grad_norm": 0.24209058284759521,
      "learning_rate": 1.8439425510557885e-07,
      "loss": 0.3813,
      "step": 14184
    },
    {
      "epoch": 2.9160242573748585,
      "grad_norm": 0.22401560842990875,
      "learning_rate": 1.8349208344631052e-07,
      "loss": 0.381,
      "step": 14185
    },
    {
      "epoch": 2.916229828348237,
      "grad_norm": 0.12131255865097046,
      "learning_rate": 1.8259211968691514e-07,
      "loss": 0.462,
      "step": 14186
    },
    {
      "epoch": 2.9164353993216157,
      "grad_norm": 0.22824054956436157,
      "learning_rate": 1.8169436387173222e-07,
      "loss": 0.3783,
      "step": 14187
    },
    {
      "epoch": 2.9166409702949943,
      "grad_norm": 0.2334217131137848,
      "learning_rate": 1.807988160449864e-07,
      "loss": 0.3739,
      "step": 14188
    },
    {
      "epoch": 2.916846541268373,
      "grad_norm": 0.22181300818920135,
      "learning_rate": 1.7990547625079735e-07,
      "loss": 0.3881,
      "step": 14189
    },
    {
      "epoch": 2.9170521122417514,
      "grad_norm": 0.22861194610595703,
      "learning_rate": 1.790143445331749e-07,
      "loss": 0.3828,
      "step": 14190
    },
    {
      "epoch": 2.91725768321513,
      "grad_norm": 0.2301216721534729,
      "learning_rate": 1.781254209360289e-07,
      "loss": 0.3711,
      "step": 14191
    },
    {
      "epoch": 2.9174632541885086,
      "grad_norm": 0.22555860877037048,
      "learning_rate": 1.7723870550313938e-07,
      "loss": 0.3734,
      "step": 14192
    },
    {
      "epoch": 2.917668825161887,
      "grad_norm": 0.23354589939117432,
      "learning_rate": 1.7635419827820132e-07,
      "loss": 0.3687,
      "step": 14193
    },
    {
      "epoch": 2.917874396135266,
      "grad_norm": 0.23408174514770508,
      "learning_rate": 1.754718993047899e-07,
      "loss": 0.3708,
      "step": 14194
    },
    {
      "epoch": 2.9180799671086444,
      "grad_norm": 0.23139835894107819,
      "learning_rate": 1.7459180862636037e-07,
      "loss": 0.3577,
      "step": 14195
    },
    {
      "epoch": 2.918285538082023,
      "grad_norm": 0.11874835938215256,
      "learning_rate": 1.7371392628628802e-07,
      "loss": 0.4303,
      "step": 14196
    },
    {
      "epoch": 2.9184911090554015,
      "grad_norm": 0.23826338350772858,
      "learning_rate": 1.7283825232780825e-07,
      "loss": 0.3858,
      "step": 14197
    },
    {
      "epoch": 2.9186966800287797,
      "grad_norm": 0.13164587318897247,
      "learning_rate": 1.7196478679406658e-07,
      "loss": 0.447,
      "step": 14198
    },
    {
      "epoch": 2.9189022510021587,
      "grad_norm": 0.22203896939754486,
      "learning_rate": 1.7109352972809856e-07,
      "loss": 0.3562,
      "step": 14199
    },
    {
      "epoch": 2.919107821975537,
      "grad_norm": 0.24076960980892181,
      "learning_rate": 1.7022448117281487e-07,
      "loss": 0.3789,
      "step": 14200
    },
    {
      "epoch": 2.9193133929489155,
      "grad_norm": 0.24343958497047424,
      "learning_rate": 1.6935764117104125e-07,
      "loss": 0.3937,
      "step": 14201
    },
    {
      "epoch": 2.919518963922294,
      "grad_norm": 0.23502768576145172,
      "learning_rate": 1.6849300976547856e-07,
      "loss": 0.3881,
      "step": 14202
    },
    {
      "epoch": 2.9197245348956726,
      "grad_norm": 0.24000953137874603,
      "learning_rate": 1.6763058699872269e-07,
      "loss": 0.376,
      "step": 14203
    },
    {
      "epoch": 2.919930105869051,
      "grad_norm": 0.12082278728485107,
      "learning_rate": 1.667703729132647e-07,
      "loss": 0.4705,
      "step": 14204
    },
    {
      "epoch": 2.92013567684243,
      "grad_norm": 0.22615081071853638,
      "learning_rate": 1.6591236755148064e-07,
      "loss": 0.3811,
      "step": 14205
    },
    {
      "epoch": 2.9203412478158084,
      "grad_norm": 0.23181886970996857,
      "learning_rate": 1.6505657095563675e-07,
      "loss": 0.3656,
      "step": 14206
    },
    {
      "epoch": 2.920546818789187,
      "grad_norm": 0.22532041370868683,
      "learning_rate": 1.642029831678993e-07,
      "loss": 0.375,
      "step": 14207
    },
    {
      "epoch": 2.9207523897625656,
      "grad_norm": 0.11956392228603363,
      "learning_rate": 1.633516042303196e-07,
      "loss": 0.4629,
      "step": 14208
    },
    {
      "epoch": 2.920957960735944,
      "grad_norm": 0.22714190185070038,
      "learning_rate": 1.6250243418483412e-07,
      "loss": 0.3596,
      "step": 14209
    },
    {
      "epoch": 2.9211635317093227,
      "grad_norm": 0.2288563847541809,
      "learning_rate": 1.6165547307328944e-07,
      "loss": 0.3806,
      "step": 14210
    },
    {
      "epoch": 2.9213691026827013,
      "grad_norm": 0.21944314241409302,
      "learning_rate": 1.6081072093740711e-07,
      "loss": 0.3687,
      "step": 14211
    },
    {
      "epoch": 2.92157467365608,
      "grad_norm": 0.11778556555509567,
      "learning_rate": 1.599681778187989e-07,
      "loss": 0.4448,
      "step": 14212
    },
    {
      "epoch": 2.921780244629458,
      "grad_norm": 0.24057716131210327,
      "learning_rate": 1.591278437589816e-07,
      "loss": 0.39,
      "step": 14213
    },
    {
      "epoch": 2.921985815602837,
      "grad_norm": 0.11888077110052109,
      "learning_rate": 1.5828971879934706e-07,
      "loss": 0.4486,
      "step": 14214
    },
    {
      "epoch": 2.9221913865762152,
      "grad_norm": 0.23414359986782074,
      "learning_rate": 1.574538029811873e-07,
      "loss": 0.3829,
      "step": 14215
    },
    {
      "epoch": 2.922396957549594,
      "grad_norm": 0.2228407859802246,
      "learning_rate": 1.5662009634568432e-07,
      "loss": 0.3908,
      "step": 14216
    },
    {
      "epoch": 2.9226025285229724,
      "grad_norm": 0.23232321441173553,
      "learning_rate": 1.557885989339103e-07,
      "loss": 0.3872,
      "step": 14217
    },
    {
      "epoch": 2.922808099496351,
      "grad_norm": 0.23603259027004242,
      "learning_rate": 1.5495931078683746e-07,
      "loss": 0.3729,
      "step": 14218
    },
    {
      "epoch": 2.9230136704697296,
      "grad_norm": 0.23010489344596863,
      "learning_rate": 1.5413223194530813e-07,
      "loss": 0.3702,
      "step": 14219
    },
    {
      "epoch": 2.923219241443108,
      "grad_norm": 0.22785669565200806,
      "learning_rate": 1.5330736245007972e-07,
      "loss": 0.3773,
      "step": 14220
    },
    {
      "epoch": 2.9234248124164868,
      "grad_norm": 0.2386084794998169,
      "learning_rate": 1.524847023417797e-07,
      "loss": 0.3803,
      "step": 14221
    },
    {
      "epoch": 2.9236303833898654,
      "grad_norm": 0.23408401012420654,
      "learning_rate": 1.5166425166094567e-07,
      "loss": 0.3836,
      "step": 14222
    },
    {
      "epoch": 2.923835954363244,
      "grad_norm": 0.23765285313129425,
      "learning_rate": 1.508460104479903e-07,
      "loss": 0.376,
      "step": 14223
    },
    {
      "epoch": 2.9240415253366225,
      "grad_norm": 0.23104673624038696,
      "learning_rate": 1.5002997874323134e-07,
      "loss": 0.3758,
      "step": 14224
    },
    {
      "epoch": 2.924247096310001,
      "grad_norm": 0.2328345626592636,
      "learning_rate": 1.492161565868616e-07,
      "loss": 0.3718,
      "step": 14225
    },
    {
      "epoch": 2.9244526672833797,
      "grad_norm": 0.22445005178451538,
      "learning_rate": 1.4840454401898407e-07,
      "loss": 0.363,
      "step": 14226
    },
    {
      "epoch": 2.9246582382567583,
      "grad_norm": 0.22506146132946014,
      "learning_rate": 1.4759514107957673e-07,
      "loss": 0.3583,
      "step": 14227
    },
    {
      "epoch": 2.9248638092301364,
      "grad_norm": 0.22295387089252472,
      "learning_rate": 1.4678794780852267e-07,
      "loss": 0.3901,
      "step": 14228
    },
    {
      "epoch": 2.9250693802035155,
      "grad_norm": 0.22863556444644928,
      "learning_rate": 1.4598296424557512e-07,
      "loss": 0.3554,
      "step": 14229
    },
    {
      "epoch": 2.9252749511768936,
      "grad_norm": 0.23237614333629608,
      "learning_rate": 1.4518019043040233e-07,
      "loss": 0.3843,
      "step": 14230
    },
    {
      "epoch": 2.9254805221502727,
      "grad_norm": 0.22150248289108276,
      "learning_rate": 1.4437962640255264e-07,
      "loss": 0.3708,
      "step": 14231
    },
    {
      "epoch": 2.925686093123651,
      "grad_norm": 0.2305610030889511,
      "learning_rate": 1.4358127220146456e-07,
      "loss": 0.3922,
      "step": 14232
    },
    {
      "epoch": 2.9258916640970294,
      "grad_norm": 0.2294863909482956,
      "learning_rate": 1.4278512786646658e-07,
      "loss": 0.3815,
      "step": 14233
    },
    {
      "epoch": 2.926097235070408,
      "grad_norm": 0.22797244787216187,
      "learning_rate": 1.4199119343678236e-07,
      "loss": 0.3751,
      "step": 14234
    },
    {
      "epoch": 2.9263028060437866,
      "grad_norm": 0.23715586960315704,
      "learning_rate": 1.4119946895153058e-07,
      "loss": 0.3842,
      "step": 14235
    },
    {
      "epoch": 2.926508377017165,
      "grad_norm": 0.22145721316337585,
      "learning_rate": 1.4040995444970505e-07,
      "loss": 0.3793,
      "step": 14236
    },
    {
      "epoch": 2.9267139479905437,
      "grad_norm": 0.2366815060377121,
      "learning_rate": 1.396226499702097e-07,
      "loss": 0.3848,
      "step": 14237
    },
    {
      "epoch": 2.9269195189639223,
      "grad_norm": 0.24056269228458405,
      "learning_rate": 1.3883755555183343e-07,
      "loss": 0.3725,
      "step": 14238
    },
    {
      "epoch": 2.927125089937301,
      "grad_norm": 0.22281573712825775,
      "learning_rate": 1.3805467123325035e-07,
      "loss": 0.3695,
      "step": 14239
    },
    {
      "epoch": 2.9273306609106795,
      "grad_norm": 0.2354237586259842,
      "learning_rate": 1.3727399705302458e-07,
      "loss": 0.3834,
      "step": 14240
    },
    {
      "epoch": 2.927536231884058,
      "grad_norm": 0.121092788875103,
      "learning_rate": 1.3649553304962536e-07,
      "loss": 0.4405,
      "step": 14241
    },
    {
      "epoch": 2.9277418028574367,
      "grad_norm": 0.23735617101192474,
      "learning_rate": 1.3571927926139705e-07,
      "loss": 0.3735,
      "step": 14242
    },
    {
      "epoch": 2.927947373830815,
      "grad_norm": 0.24596528708934784,
      "learning_rate": 1.3494523572658402e-07,
      "loss": 0.3967,
      "step": 14243
    },
    {
      "epoch": 2.928152944804194,
      "grad_norm": 0.21962697803974152,
      "learning_rate": 1.3417340248332578e-07,
      "loss": 0.3747,
      "step": 14244
    },
    {
      "epoch": 2.928358515777572,
      "grad_norm": 0.2285209596157074,
      "learning_rate": 1.334037795696369e-07,
      "loss": 0.3752,
      "step": 14245
    },
    {
      "epoch": 2.928564086750951,
      "grad_norm": 0.22602157294750214,
      "learning_rate": 1.3263636702344207e-07,
      "loss": 0.3754,
      "step": 14246
    },
    {
      "epoch": 2.928769657724329,
      "grad_norm": 0.22371745109558105,
      "learning_rate": 1.3187116488254103e-07,
      "loss": 0.3879,
      "step": 14247
    },
    {
      "epoch": 2.9289752286977078,
      "grad_norm": 0.23446328938007355,
      "learning_rate": 1.3110817318463365e-07,
      "loss": 0.3569,
      "step": 14248
    },
    {
      "epoch": 2.9291807996710864,
      "grad_norm": 0.22867922484874725,
      "learning_rate": 1.3034739196730984e-07,
      "loss": 0.397,
      "step": 14249
    },
    {
      "epoch": 2.929386370644465,
      "grad_norm": 0.23485369980335236,
      "learning_rate": 1.295888212680496e-07,
      "loss": 0.3934,
      "step": 14250
    },
    {
      "epoch": 2.9295919416178435,
      "grad_norm": 0.23388779163360596,
      "learning_rate": 1.2883246112422808e-07,
      "loss": 0.3878,
      "step": 14251
    },
    {
      "epoch": 2.929797512591222,
      "grad_norm": 0.23058055341243744,
      "learning_rate": 1.2807831157310046e-07,
      "loss": 0.3728,
      "step": 14252
    },
    {
      "epoch": 2.9300030835646007,
      "grad_norm": 0.12012367695569992,
      "learning_rate": 1.2732637265182702e-07,
      "loss": 0.428,
      "step": 14253
    },
    {
      "epoch": 2.9302086545379793,
      "grad_norm": 0.11733004450798035,
      "learning_rate": 1.265766443974431e-07,
      "loss": 0.4467,
      "step": 14254
    },
    {
      "epoch": 2.930414225511358,
      "grad_norm": 0.23373596370220184,
      "learning_rate": 1.2582912684689418e-07,
      "loss": 0.3774,
      "step": 14255
    },
    {
      "epoch": 2.9306197964847365,
      "grad_norm": 0.22442536056041718,
      "learning_rate": 1.250838200370008e-07,
      "loss": 0.3723,
      "step": 14256
    },
    {
      "epoch": 2.930825367458115,
      "grad_norm": 0.119273342192173,
      "learning_rate": 1.243407240044836e-07,
      "loss": 0.4466,
      "step": 14257
    },
    {
      "epoch": 2.931030938431493,
      "grad_norm": 0.12564511597156525,
      "learning_rate": 1.2359983878595329e-07,
      "loss": 0.4453,
      "step": 14258
    },
    {
      "epoch": 2.9312365094048722,
      "grad_norm": 0.2270507961511612,
      "learning_rate": 1.2286116441790064e-07,
      "loss": 0.3577,
      "step": 14259
    },
    {
      "epoch": 2.9314420803782504,
      "grad_norm": 0.24136748909950256,
      "learning_rate": 1.2212470093673155e-07,
      "loss": 0.3874,
      "step": 14260
    },
    {
      "epoch": 2.9316476513516294,
      "grad_norm": 0.22944435477256775,
      "learning_rate": 1.2139044837871204e-07,
      "loss": 0.3783,
      "step": 14261
    },
    {
      "epoch": 2.9318532223250076,
      "grad_norm": 0.2328665405511856,
      "learning_rate": 1.2065840678002815e-07,
      "loss": 0.3704,
      "step": 14262
    },
    {
      "epoch": 2.932058793298386,
      "grad_norm": 0.23235177993774414,
      "learning_rate": 1.1992857617674103e-07,
      "loss": 0.3927,
      "step": 14263
    },
    {
      "epoch": 2.9322643642717647,
      "grad_norm": 0.22136935591697693,
      "learning_rate": 1.1920095660479691e-07,
      "loss": 0.3554,
      "step": 14264
    },
    {
      "epoch": 2.9324699352451433,
      "grad_norm": 0.23103518784046173,
      "learning_rate": 1.1847554810005212e-07,
      "loss": 0.3751,
      "step": 14265
    },
    {
      "epoch": 2.932675506218522,
      "grad_norm": 0.1267227828502655,
      "learning_rate": 1.177523506982431e-07,
      "loss": 0.4482,
      "step": 14266
    },
    {
      "epoch": 2.9328810771919005,
      "grad_norm": 0.23589691519737244,
      "learning_rate": 1.1703136443499629e-07,
      "loss": 0.3912,
      "step": 14267
    },
    {
      "epoch": 2.933086648165279,
      "grad_norm": 0.22941534221172333,
      "learning_rate": 1.1631258934583333e-07,
      "loss": 0.3815,
      "step": 14268
    },
    {
      "epoch": 2.9332922191386577,
      "grad_norm": 0.2415175586938858,
      "learning_rate": 1.1559602546616089e-07,
      "loss": 0.3837,
      "step": 14269
    },
    {
      "epoch": 2.9334977901120363,
      "grad_norm": 0.22201284766197205,
      "learning_rate": 1.148816728312857e-07,
      "loss": 0.3859,
      "step": 14270
    },
    {
      "epoch": 2.933703361085415,
      "grad_norm": 0.23160016536712646,
      "learning_rate": 1.1416953147639464e-07,
      "loss": 0.378,
      "step": 14271
    },
    {
      "epoch": 2.9339089320587934,
      "grad_norm": 0.23736536502838135,
      "learning_rate": 1.1345960143657463e-07,
      "loss": 0.363,
      "step": 14272
    },
    {
      "epoch": 2.9341145030321716,
      "grad_norm": 0.12271010130643845,
      "learning_rate": 1.127518827468027e-07,
      "loss": 0.4492,
      "step": 14273
    },
    {
      "epoch": 2.9343200740055506,
      "grad_norm": 0.2341691106557846,
      "learning_rate": 1.1204637544194097e-07,
      "loss": 0.3787,
      "step": 14274
    },
    {
      "epoch": 2.9345256449789288,
      "grad_norm": 0.23392406105995178,
      "learning_rate": 1.1134307955675161e-07,
      "loss": 0.3873,
      "step": 14275
    },
    {
      "epoch": 2.934731215952308,
      "grad_norm": 0.2216750532388687,
      "learning_rate": 1.1064199512587692e-07,
      "loss": 0.3777,
      "step": 14276
    },
    {
      "epoch": 2.934936786925686,
      "grad_norm": 0.23249836266040802,
      "learning_rate": 1.0994312218385927e-07,
      "loss": 0.365,
      "step": 14277
    },
    {
      "epoch": 2.9351423578990645,
      "grad_norm": 0.23422518372535706,
      "learning_rate": 1.0924646076513112e-07,
      "loss": 0.3889,
      "step": 14278
    },
    {
      "epoch": 2.935347928872443,
      "grad_norm": 0.23376347124576569,
      "learning_rate": 1.0855201090401002e-07,
      "loss": 0.3868,
      "step": 14279
    },
    {
      "epoch": 2.9355534998458217,
      "grad_norm": 0.21998612582683563,
      "learning_rate": 1.078597726347086e-07,
      "loss": 0.3909,
      "step": 14280
    },
    {
      "epoch": 2.9357590708192003,
      "grad_norm": 0.23854362964630127,
      "learning_rate": 1.0716974599132956e-07,
      "loss": 0.3771,
      "step": 14281
    },
    {
      "epoch": 2.935964641792579,
      "grad_norm": 0.11935044080018997,
      "learning_rate": 1.0648193100787074e-07,
      "loss": 0.4408,
      "step": 14282
    },
    {
      "epoch": 2.9361702127659575,
      "grad_norm": 0.23328512907028198,
      "learning_rate": 1.0579632771821502e-07,
      "loss": 0.3917,
      "step": 14283
    },
    {
      "epoch": 2.936375783739336,
      "grad_norm": 0.2255300134420395,
      "learning_rate": 1.0511293615613539e-07,
      "loss": 0.3756,
      "step": 14284
    },
    {
      "epoch": 2.9365813547127146,
      "grad_norm": 0.2301304042339325,
      "learning_rate": 1.0443175635530489e-07,
      "loss": 0.3927,
      "step": 14285
    },
    {
      "epoch": 2.9367869256860932,
      "grad_norm": 0.11829908192157745,
      "learning_rate": 1.037527883492817e-07,
      "loss": 0.4427,
      "step": 14286
    },
    {
      "epoch": 2.936992496659472,
      "grad_norm": 0.23846930265426636,
      "learning_rate": 1.0307603217151906e-07,
      "loss": 0.3758,
      "step": 14287
    },
    {
      "epoch": 2.9371980676328504,
      "grad_norm": 0.22976188361644745,
      "learning_rate": 1.0240148785534532e-07,
      "loss": 0.3702,
      "step": 14288
    },
    {
      "epoch": 2.937403638606229,
      "grad_norm": 0.235699862241745,
      "learning_rate": 1.0172915543400386e-07,
      "loss": 0.3791,
      "step": 14289
    },
    {
      "epoch": 2.937609209579607,
      "grad_norm": 0.23126575350761414,
      "learning_rate": 1.0105903494060821e-07,
      "loss": 0.3925,
      "step": 14290
    },
    {
      "epoch": 2.937814780552986,
      "grad_norm": 0.12287239730358124,
      "learning_rate": 1.0039112640818193e-07,
      "loss": 0.4534,
      "step": 14291
    },
    {
      "epoch": 2.9380203515263643,
      "grad_norm": 0.22776830196380615,
      "learning_rate": 9.972542986961875e-08,
      "loss": 0.3802,
      "step": 14292
    },
    {
      "epoch": 2.938225922499743,
      "grad_norm": 0.23235289752483368,
      "learning_rate": 9.906194535772739e-08,
      "loss": 0.3922,
      "step": 14293
    },
    {
      "epoch": 2.9384314934731215,
      "grad_norm": 0.12463247776031494,
      "learning_rate": 9.840067290518173e-08,
      "loss": 0.4412,
      "step": 14294
    },
    {
      "epoch": 2.9386370644465,
      "grad_norm": 0.12007234990596771,
      "learning_rate": 9.77416125445707e-08,
      "loss": 0.4616,
      "step": 14295
    },
    {
      "epoch": 2.9388426354198787,
      "grad_norm": 0.121745266020298,
      "learning_rate": 9.708476430835333e-08,
      "loss": 0.4576,
      "step": 14296
    },
    {
      "epoch": 2.9390482063932573,
      "grad_norm": 0.23362316191196442,
      "learning_rate": 9.643012822889375e-08,
      "loss": 0.387,
      "step": 14297
    },
    {
      "epoch": 2.939253777366636,
      "grad_norm": 0.21919940412044525,
      "learning_rate": 9.577770433844613e-08,
      "loss": 0.3776,
      "step": 14298
    },
    {
      "epoch": 2.9394593483400144,
      "grad_norm": 0.23474140465259552,
      "learning_rate": 9.512749266914978e-08,
      "loss": 0.3797,
      "step": 14299
    },
    {
      "epoch": 2.939664919313393,
      "grad_norm": 0.22480328381061554,
      "learning_rate": 9.447949325303407e-08,
      "loss": 0.3901,
      "step": 14300
    },
    {
      "epoch": 2.9398704902867716,
      "grad_norm": 0.11820299923419952,
      "learning_rate": 9.383370612202347e-08,
      "loss": 0.4407,
      "step": 14301
    },
    {
      "epoch": 2.94007606126015,
      "grad_norm": 0.23817752301692963,
      "learning_rate": 9.319013130794252e-08,
      "loss": 0.4009,
      "step": 14302
    },
    {
      "epoch": 2.940281632233529,
      "grad_norm": 0.23195527493953705,
      "learning_rate": 9.254876884248587e-08,
      "loss": 0.373,
      "step": 14303
    },
    {
      "epoch": 2.9404872032069074,
      "grad_norm": 0.22849521040916443,
      "learning_rate": 9.190961875725324e-08,
      "loss": 0.387,
      "step": 14304
    },
    {
      "epoch": 2.9406927741802855,
      "grad_norm": 0.2443472295999527,
      "learning_rate": 9.127268108373444e-08,
      "loss": 0.395,
      "step": 14305
    },
    {
      "epoch": 2.9408983451536646,
      "grad_norm": 0.12250496447086334,
      "learning_rate": 9.063795585330937e-08,
      "loss": 0.4541,
      "step": 14306
    },
    {
      "epoch": 2.9411039161270427,
      "grad_norm": 0.23145142197608948,
      "learning_rate": 9.000544309724302e-08,
      "loss": 0.37,
      "step": 14307
    },
    {
      "epoch": 2.9413094871004213,
      "grad_norm": 0.2310493439435959,
      "learning_rate": 8.937514284670545e-08,
      "loss": 0.3729,
      "step": 14308
    },
    {
      "epoch": 2.9415150580738,
      "grad_norm": 0.2356126606464386,
      "learning_rate": 8.874705513273685e-08,
      "loss": 0.3647,
      "step": 14309
    },
    {
      "epoch": 2.9417206290471785,
      "grad_norm": 0.1224084421992302,
      "learning_rate": 8.812117998629244e-08,
      "loss": 0.4391,
      "step": 14310
    },
    {
      "epoch": 2.941926200020557,
      "grad_norm": 0.23388880491256714,
      "learning_rate": 8.749751743819257e-08,
      "loss": 0.4037,
      "step": 14311
    },
    {
      "epoch": 2.9421317709939356,
      "grad_norm": 0.1348462849855423,
      "learning_rate": 8.687606751917766e-08,
      "loss": 0.4476,
      "step": 14312
    },
    {
      "epoch": 2.9423373419673142,
      "grad_norm": 0.11941714584827423,
      "learning_rate": 8.625683025984821e-08,
      "loss": 0.4535,
      "step": 14313
    },
    {
      "epoch": 2.942542912940693,
      "grad_norm": 0.2301827371120453,
      "learning_rate": 8.563980569071983e-08,
      "loss": 0.373,
      "step": 14314
    },
    {
      "epoch": 2.9427484839140714,
      "grad_norm": 0.23292043805122375,
      "learning_rate": 8.50249938421932e-08,
      "loss": 0.3848,
      "step": 14315
    },
    {
      "epoch": 2.94295405488745,
      "grad_norm": 0.23747049272060394,
      "learning_rate": 8.44123947445491e-08,
      "loss": 0.3767,
      "step": 14316
    },
    {
      "epoch": 2.9431596258608286,
      "grad_norm": 0.23186716437339783,
      "learning_rate": 8.380200842797336e-08,
      "loss": 0.3821,
      "step": 14317
    },
    {
      "epoch": 2.943365196834207,
      "grad_norm": 0.23163893818855286,
      "learning_rate": 8.319383492253696e-08,
      "loss": 0.3671,
      "step": 14318
    },
    {
      "epoch": 2.9435707678075858,
      "grad_norm": 0.12333094328641891,
      "learning_rate": 8.258787425819592e-08,
      "loss": 0.4495,
      "step": 14319
    },
    {
      "epoch": 2.943776338780964,
      "grad_norm": 0.12259241193532944,
      "learning_rate": 8.198412646480636e-08,
      "loss": 0.46,
      "step": 14320
    },
    {
      "epoch": 2.943981909754343,
      "grad_norm": 0.23415526747703552,
      "learning_rate": 8.138259157211447e-08,
      "loss": 0.3829,
      "step": 14321
    },
    {
      "epoch": 2.944187480727721,
      "grad_norm": 0.23561497032642365,
      "learning_rate": 8.078326960975158e-08,
      "loss": 0.3766,
      "step": 14322
    },
    {
      "epoch": 2.9443930517010997,
      "grad_norm": 0.22486624121665955,
      "learning_rate": 8.018616060724904e-08,
      "loss": 0.3748,
      "step": 14323
    },
    {
      "epoch": 2.9445986226744782,
      "grad_norm": 0.22078227996826172,
      "learning_rate": 7.959126459401834e-08,
      "loss": 0.3688,
      "step": 14324
    },
    {
      "epoch": 2.944804193647857,
      "grad_norm": 0.23797355592250824,
      "learning_rate": 7.899858159936601e-08,
      "loss": 0.387,
      "step": 14325
    },
    {
      "epoch": 2.9450097646212354,
      "grad_norm": 0.2293400913476944,
      "learning_rate": 7.840811165249373e-08,
      "loss": 0.373,
      "step": 14326
    },
    {
      "epoch": 2.945215335594614,
      "grad_norm": 0.12413428723812103,
      "learning_rate": 7.781985478249321e-08,
      "loss": 0.4478,
      "step": 14327
    },
    {
      "epoch": 2.9454209065679926,
      "grad_norm": 0.22216647863388062,
      "learning_rate": 7.723381101834126e-08,
      "loss": 0.3686,
      "step": 14328
    },
    {
      "epoch": 2.945626477541371,
      "grad_norm": 0.23056413233280182,
      "learning_rate": 7.66499803889098e-08,
      "loss": 0.3739,
      "step": 14329
    },
    {
      "epoch": 2.94583204851475,
      "grad_norm": 0.23194332420825958,
      "learning_rate": 7.606836292296582e-08,
      "loss": 0.3727,
      "step": 14330
    },
    {
      "epoch": 2.9460376194881284,
      "grad_norm": 0.24576567113399506,
      "learning_rate": 7.548895864915639e-08,
      "loss": 0.3977,
      "step": 14331
    },
    {
      "epoch": 2.946243190461507,
      "grad_norm": 0.22875289618968964,
      "learning_rate": 7.491176759602869e-08,
      "loss": 0.3842,
      "step": 14332
    },
    {
      "epoch": 2.9464487614348855,
      "grad_norm": 0.2283722311258316,
      "learning_rate": 7.433678979201997e-08,
      "loss": 0.3824,
      "step": 14333
    },
    {
      "epoch": 2.946654332408264,
      "grad_norm": 0.23309841752052307,
      "learning_rate": 7.376402526545755e-08,
      "loss": 0.3859,
      "step": 14334
    },
    {
      "epoch": 2.9468599033816423,
      "grad_norm": 0.23801040649414062,
      "learning_rate": 7.31934740445589e-08,
      "loss": 0.3857,
      "step": 14335
    },
    {
      "epoch": 2.9470654743550213,
      "grad_norm": 0.23424702882766724,
      "learning_rate": 7.26251361574265e-08,
      "loss": 0.3835,
      "step": 14336
    },
    {
      "epoch": 2.9472710453283995,
      "grad_norm": 0.23089328408241272,
      "learning_rate": 7.205901163206297e-08,
      "loss": 0.3669,
      "step": 14337
    },
    {
      "epoch": 2.947476616301778,
      "grad_norm": 0.22902965545654297,
      "learning_rate": 7.149510049636099e-08,
      "loss": 0.3738,
      "step": 14338
    },
    {
      "epoch": 2.9476821872751566,
      "grad_norm": 0.22217592597007751,
      "learning_rate": 7.093340277809834e-08,
      "loss": 0.3853,
      "step": 14339
    },
    {
      "epoch": 2.947887758248535,
      "grad_norm": 0.24186544120311737,
      "learning_rate": 7.03739185049529e-08,
      "loss": 0.3741,
      "step": 14340
    },
    {
      "epoch": 2.948093329221914,
      "grad_norm": 0.2368420511484146,
      "learning_rate": 6.98166477044826e-08,
      "loss": 0.3809,
      "step": 14341
    },
    {
      "epoch": 2.9482989001952924,
      "grad_norm": 0.11286085844039917,
      "learning_rate": 6.926159040414049e-08,
      "loss": 0.4515,
      "step": 14342
    },
    {
      "epoch": 2.948504471168671,
      "grad_norm": 0.23017874360084534,
      "learning_rate": 6.870874663127469e-08,
      "loss": 0.3689,
      "step": 14343
    },
    {
      "epoch": 2.9487100421420496,
      "grad_norm": 0.22851766645908356,
      "learning_rate": 6.815811641312342e-08,
      "loss": 0.3729,
      "step": 14344
    },
    {
      "epoch": 2.948915613115428,
      "grad_norm": 0.22411444783210754,
      "learning_rate": 6.760969977680498e-08,
      "loss": 0.3666,
      "step": 14345
    },
    {
      "epoch": 2.9491211840888067,
      "grad_norm": 0.22939811646938324,
      "learning_rate": 6.706349674934776e-08,
      "loss": 0.3816,
      "step": 14346
    },
    {
      "epoch": 2.9493267550621853,
      "grad_norm": 0.2309289425611496,
      "learning_rate": 6.651950735765522e-08,
      "loss": 0.3866,
      "step": 14347
    },
    {
      "epoch": 2.949532326035564,
      "grad_norm": 0.25561413168907166,
      "learning_rate": 6.597773162853094e-08,
      "loss": 0.4024,
      "step": 14348
    },
    {
      "epoch": 2.9497378970089425,
      "grad_norm": 0.23126906156539917,
      "learning_rate": 6.543816958865857e-08,
      "loss": 0.3858,
      "step": 14349
    },
    {
      "epoch": 2.9499434679823207,
      "grad_norm": 0.23696114122867584,
      "learning_rate": 6.490082126462682e-08,
      "loss": 0.3707,
      "step": 14350
    },
    {
      "epoch": 2.9501490389556997,
      "grad_norm": 0.12179608643054962,
      "learning_rate": 6.436568668290455e-08,
      "loss": 0.4631,
      "step": 14351
    },
    {
      "epoch": 2.950354609929078,
      "grad_norm": 0.22677427530288696,
      "learning_rate": 6.383276586985565e-08,
      "loss": 0.3663,
      "step": 14352
    },
    {
      "epoch": 2.9505601809024564,
      "grad_norm": 0.23234906792640686,
      "learning_rate": 6.330205885173413e-08,
      "loss": 0.3811,
      "step": 14353
    },
    {
      "epoch": 2.950765751875835,
      "grad_norm": 0.23495686054229736,
      "learning_rate": 6.277356565468906e-08,
      "loss": 0.3994,
      "step": 14354
    },
    {
      "epoch": 2.9509713228492136,
      "grad_norm": 0.23368287086486816,
      "learning_rate": 6.224728630474964e-08,
      "loss": 0.3713,
      "step": 14355
    },
    {
      "epoch": 2.951176893822592,
      "grad_norm": 0.11765862256288528,
      "learning_rate": 6.17232208278551e-08,
      "loss": 0.445,
      "step": 14356
    },
    {
      "epoch": 2.9513824647959708,
      "grad_norm": 0.23425832390785217,
      "learning_rate": 6.12013692498098e-08,
      "loss": 0.3983,
      "step": 14357
    },
    {
      "epoch": 2.9515880357693494,
      "grad_norm": 0.11689037829637527,
      "learning_rate": 6.068173159633317e-08,
      "loss": 0.4463,
      "step": 14358
    },
    {
      "epoch": 2.951793606742728,
      "grad_norm": 0.2250240296125412,
      "learning_rate": 6.016430789302474e-08,
      "loss": 0.3852,
      "step": 14359
    },
    {
      "epoch": 2.9519991777161065,
      "grad_norm": 0.23186476528644562,
      "learning_rate": 5.964909816536912e-08,
      "loss": 0.3659,
      "step": 14360
    },
    {
      "epoch": 2.952204748689485,
      "grad_norm": 0.2178521603345871,
      "learning_rate": 5.913610243875101e-08,
      "loss": 0.3706,
      "step": 14361
    },
    {
      "epoch": 2.9524103196628637,
      "grad_norm": 0.23056325316429138,
      "learning_rate": 5.8625320738445176e-08,
      "loss": 0.3751,
      "step": 14362
    },
    {
      "epoch": 2.9526158906362423,
      "grad_norm": 0.2350500226020813,
      "learning_rate": 5.811675308961151e-08,
      "loss": 0.3784,
      "step": 14363
    },
    {
      "epoch": 2.952821461609621,
      "grad_norm": 0.22323279082775116,
      "learning_rate": 5.7610399517309956e-08,
      "loss": 0.3732,
      "step": 14364
    },
    {
      "epoch": 2.953027032582999,
      "grad_norm": 0.23257021605968475,
      "learning_rate": 5.7106260046485564e-08,
      "loss": 0.3833,
      "step": 14365
    },
    {
      "epoch": 2.953232603556378,
      "grad_norm": 0.22943510115146637,
      "learning_rate": 5.6604334701968466e-08,
      "loss": 0.3664,
      "step": 14366
    },
    {
      "epoch": 2.953438174529756,
      "grad_norm": 0.25284644961357117,
      "learning_rate": 5.6104623508493883e-08,
      "loss": 0.3844,
      "step": 14367
    },
    {
      "epoch": 2.953643745503135,
      "grad_norm": 0.23901039361953735,
      "learning_rate": 5.560712649067712e-08,
      "loss": 0.3866,
      "step": 14368
    },
    {
      "epoch": 2.9538493164765134,
      "grad_norm": 0.23246188461780548,
      "learning_rate": 5.5111843673028574e-08,
      "loss": 0.3791,
      "step": 14369
    },
    {
      "epoch": 2.954054887449892,
      "grad_norm": 0.22920754551887512,
      "learning_rate": 5.4618775079948725e-08,
      "loss": 0.3846,
      "step": 14370
    },
    {
      "epoch": 2.9542604584232706,
      "grad_norm": 0.23537150025367737,
      "learning_rate": 5.412792073572315e-08,
      "loss": 0.3787,
      "step": 14371
    },
    {
      "epoch": 2.954466029396649,
      "grad_norm": 0.23101921379566193,
      "learning_rate": 5.363928066454249e-08,
      "loss": 0.3592,
      "step": 14372
    },
    {
      "epoch": 2.9546716003700277,
      "grad_norm": 0.11517384648323059,
      "learning_rate": 5.31528548904775e-08,
      "loss": 0.4418,
      "step": 14373
    },
    {
      "epoch": 2.9548771713434063,
      "grad_norm": 0.24331872165203094,
      "learning_rate": 5.266864343748401e-08,
      "loss": 0.3696,
      "step": 14374
    },
    {
      "epoch": 2.955082742316785,
      "grad_norm": 0.12557660043239594,
      "learning_rate": 5.218664632942794e-08,
      "loss": 0.4405,
      "step": 14375
    },
    {
      "epoch": 2.9552883132901635,
      "grad_norm": 0.2361089289188385,
      "learning_rate": 5.170686359005028e-08,
      "loss": 0.399,
      "step": 14376
    },
    {
      "epoch": 2.955493884263542,
      "grad_norm": 0.23642629384994507,
      "learning_rate": 5.122929524298215e-08,
      "loss": 0.3766,
      "step": 14377
    },
    {
      "epoch": 2.9556994552369207,
      "grad_norm": 0.22410228848457336,
      "learning_rate": 5.07539413117647e-08,
      "loss": 0.369,
      "step": 14378
    },
    {
      "epoch": 2.9559050262102993,
      "grad_norm": 0.11725395172834396,
      "learning_rate": 5.028080181980421e-08,
      "loss": 0.4579,
      "step": 14379
    },
    {
      "epoch": 2.9561105971836774,
      "grad_norm": 0.11997832357883453,
      "learning_rate": 4.9809876790412045e-08,
      "loss": 0.4409,
      "step": 14380
    },
    {
      "epoch": 2.9563161681570564,
      "grad_norm": 0.22873012721538544,
      "learning_rate": 4.9341166246794635e-08,
      "loss": 0.3836,
      "step": 14381
    },
    {
      "epoch": 2.9565217391304346,
      "grad_norm": 0.24191080033779144,
      "learning_rate": 4.8874670212033516e-08,
      "loss": 0.408,
      "step": 14382
    },
    {
      "epoch": 2.956727310103813,
      "grad_norm": 0.23186847567558289,
      "learning_rate": 4.841038870912029e-08,
      "loss": 0.4031,
      "step": 14383
    },
    {
      "epoch": 2.9569328810771918,
      "grad_norm": 0.22565621137619019,
      "learning_rate": 4.7948321760926675e-08,
      "loss": 0.3672,
      "step": 14384
    },
    {
      "epoch": 2.9571384520505704,
      "grad_norm": 0.23073460161685944,
      "learning_rate": 4.748846939020946e-08,
      "loss": 0.3797,
      "step": 14385
    },
    {
      "epoch": 2.957344023023949,
      "grad_norm": 0.23532749712467194,
      "learning_rate": 4.703083161963051e-08,
      "loss": 0.3721,
      "step": 14386
    },
    {
      "epoch": 2.9575495939973275,
      "grad_norm": 0.27987563610076904,
      "learning_rate": 4.657540847173181e-08,
      "loss": 0.368,
      "step": 14387
    },
    {
      "epoch": 2.957755164970706,
      "grad_norm": 0.2355928122997284,
      "learning_rate": 4.61221999689504e-08,
      "loss": 0.3909,
      "step": 14388
    },
    {
      "epoch": 2.9579607359440847,
      "grad_norm": 0.22605665028095245,
      "learning_rate": 4.567120613361342e-08,
      "loss": 0.3669,
      "step": 14389
    },
    {
      "epoch": 2.9581663069174633,
      "grad_norm": 0.22839273512363434,
      "learning_rate": 4.52224269879431e-08,
      "loss": 0.3821,
      "step": 14390
    },
    {
      "epoch": 2.958371877890842,
      "grad_norm": 0.23111465573310852,
      "learning_rate": 4.477586255404176e-08,
      "loss": 0.3917,
      "step": 14391
    },
    {
      "epoch": 2.9585774488642205,
      "grad_norm": 0.2226291000843048,
      "learning_rate": 4.433151285391679e-08,
      "loss": 0.374,
      "step": 14392
    },
    {
      "epoch": 2.958783019837599,
      "grad_norm": 0.235224187374115,
      "learning_rate": 4.388937790945569e-08,
      "loss": 0.3761,
      "step": 14393
    },
    {
      "epoch": 2.9589885908109776,
      "grad_norm": 0.22255218029022217,
      "learning_rate": 4.3449457742441025e-08,
      "loss": 0.3554,
      "step": 14394
    },
    {
      "epoch": 2.959194161784356,
      "grad_norm": 0.23567332327365875,
      "learning_rate": 4.3011752374545464e-08,
      "loss": 0.3757,
      "step": 14395
    },
    {
      "epoch": 2.959399732757735,
      "grad_norm": 0.2175855040550232,
      "learning_rate": 4.257626182732677e-08,
      "loss": 0.3868,
      "step": 14396
    },
    {
      "epoch": 2.959605303731113,
      "grad_norm": 0.22269536554813385,
      "learning_rate": 4.214298612225276e-08,
      "loss": 0.3789,
      "step": 14397
    },
    {
      "epoch": 2.959810874704492,
      "grad_norm": 0.2323738932609558,
      "learning_rate": 4.1711925280656376e-08,
      "loss": 0.3916,
      "step": 14398
    },
    {
      "epoch": 2.96001644567787,
      "grad_norm": 0.23786410689353943,
      "learning_rate": 4.1283079323780616e-08,
      "loss": 0.3622,
      "step": 14399
    },
    {
      "epoch": 2.9602220166512487,
      "grad_norm": 0.22747676074504852,
      "learning_rate": 4.085644827275359e-08,
      "loss": 0.3803,
      "step": 14400
    },
    {
      "epoch": 2.9604275876246273,
      "grad_norm": 0.23511525988578796,
      "learning_rate": 4.043203214858848e-08,
      "loss": 0.3931,
      "step": 14401
    },
    {
      "epoch": 2.960633158598006,
      "grad_norm": 0.225963294506073,
      "learning_rate": 4.000983097219358e-08,
      "loss": 0.3679,
      "step": 14402
    },
    {
      "epoch": 2.9608387295713845,
      "grad_norm": 0.23282590508460999,
      "learning_rate": 3.958984476437722e-08,
      "loss": 0.3793,
      "step": 14403
    },
    {
      "epoch": 2.961044300544763,
      "grad_norm": 0.2279476523399353,
      "learning_rate": 3.917207354581787e-08,
      "loss": 0.3647,
      "step": 14404
    },
    {
      "epoch": 2.9612498715181417,
      "grad_norm": 0.239571213722229,
      "learning_rate": 3.875651733710906e-08,
      "loss": 0.3865,
      "step": 14405
    },
    {
      "epoch": 2.9614554424915203,
      "grad_norm": 0.2309008538722992,
      "learning_rate": 3.834317615871941e-08,
      "loss": 0.3594,
      "step": 14406
    },
    {
      "epoch": 2.961661013464899,
      "grad_norm": 0.22634616494178772,
      "learning_rate": 3.793205003100764e-08,
      "loss": 0.3762,
      "step": 14407
    },
    {
      "epoch": 2.9618665844382774,
      "grad_norm": 0.24357974529266357,
      "learning_rate": 3.752313897423754e-08,
      "loss": 0.3808,
      "step": 14408
    },
    {
      "epoch": 2.962072155411656,
      "grad_norm": 0.2598305642604828,
      "learning_rate": 3.7116443008543e-08,
      "loss": 0.3751,
      "step": 14409
    },
    {
      "epoch": 2.962277726385034,
      "grad_norm": 0.2315262258052826,
      "learning_rate": 3.6711962153963e-08,
      "loss": 0.3667,
      "step": 14410
    },
    {
      "epoch": 2.962483297358413,
      "grad_norm": 0.22729608416557312,
      "learning_rate": 3.6309696430431586e-08,
      "loss": 0.3758,
      "step": 14411
    },
    {
      "epoch": 2.9626888683317913,
      "grad_norm": 0.23362228274345398,
      "learning_rate": 3.590964585776291e-08,
      "loss": 0.402,
      "step": 14412
    },
    {
      "epoch": 2.9628944393051704,
      "grad_norm": 0.24321232736110687,
      "learning_rate": 3.551181045566121e-08,
      "loss": 0.3867,
      "step": 14413
    },
    {
      "epoch": 2.9631000102785485,
      "grad_norm": 0.2254071682691574,
      "learning_rate": 3.511619024373081e-08,
      "loss": 0.3674,
      "step": 14414
    },
    {
      "epoch": 2.963305581251927,
      "grad_norm": 0.23968133330345154,
      "learning_rate": 3.472278524145611e-08,
      "loss": 0.3507,
      "step": 14415
    },
    {
      "epoch": 2.9635111522253057,
      "grad_norm": 0.22927747666835785,
      "learning_rate": 3.433159546822662e-08,
      "loss": 0.3699,
      "step": 14416
    },
    {
      "epoch": 2.9637167231986843,
      "grad_norm": 0.4465451240539551,
      "learning_rate": 3.394262094331191e-08,
      "loss": 0.3874,
      "step": 14417
    },
    {
      "epoch": 2.963922294172063,
      "grad_norm": 0.23466768860816956,
      "learning_rate": 3.355586168587166e-08,
      "loss": 0.376,
      "step": 14418
    },
    {
      "epoch": 2.9641278651454415,
      "grad_norm": 0.11790206283330917,
      "learning_rate": 3.3171317714960624e-08,
      "loss": 0.4474,
      "step": 14419
    },
    {
      "epoch": 2.96433343611882,
      "grad_norm": 0.23741118609905243,
      "learning_rate": 3.278898904952366e-08,
      "loss": 0.3897,
      "step": 14420
    },
    {
      "epoch": 2.9645390070921986,
      "grad_norm": 0.2253805696964264,
      "learning_rate": 3.240887570840068e-08,
      "loss": 0.3498,
      "step": 14421
    },
    {
      "epoch": 2.9647445780655772,
      "grad_norm": 0.22657155990600586,
      "learning_rate": 3.203097771031172e-08,
      "loss": 0.3677,
      "step": 14422
    },
    {
      "epoch": 2.964950149038956,
      "grad_norm": 0.22919400036334991,
      "learning_rate": 3.165529507387188e-08,
      "loss": 0.3756,
      "step": 14423
    },
    {
      "epoch": 2.9651557200123344,
      "grad_norm": 0.12317074835300446,
      "learning_rate": 3.128182781760136e-08,
      "loss": 0.4487,
      "step": 14424
    },
    {
      "epoch": 2.9653612909857126,
      "grad_norm": 0.12205608189105988,
      "learning_rate": 3.0910575959890444e-08,
      "loss": 0.4527,
      "step": 14425
    },
    {
      "epoch": 2.9655668619590916,
      "grad_norm": 0.22888796031475067,
      "learning_rate": 3.0541539519029495e-08,
      "loss": 0.3703,
      "step": 14426
    },
    {
      "epoch": 2.9657724329324697,
      "grad_norm": 0.24040739238262177,
      "learning_rate": 3.017471851319897e-08,
      "loss": 0.3859,
      "step": 14427
    },
    {
      "epoch": 2.9659780039058488,
      "grad_norm": 0.120720773935318,
      "learning_rate": 2.9810112960474425e-08,
      "loss": 0.4494,
      "step": 14428
    },
    {
      "epoch": 2.966183574879227,
      "grad_norm": 0.23405267298221588,
      "learning_rate": 2.944772287881148e-08,
      "loss": 0.3848,
      "step": 14429
    },
    {
      "epoch": 2.9663891458526055,
      "grad_norm": 0.23550044000148773,
      "learning_rate": 2.9087548286070853e-08,
      "loss": 0.38,
      "step": 14430
    },
    {
      "epoch": 2.966594716825984,
      "grad_norm": 0.22151748836040497,
      "learning_rate": 2.8729589199993357e-08,
      "loss": 0.3612,
      "step": 14431
    },
    {
      "epoch": 2.9668002877993627,
      "grad_norm": 0.2364836186170578,
      "learning_rate": 2.837384563821488e-08,
      "loss": 0.3892,
      "step": 14432
    },
    {
      "epoch": 2.9670058587727413,
      "grad_norm": 0.23630112409591675,
      "learning_rate": 2.802031761825641e-08,
      "loss": 0.3829,
      "step": 14433
    },
    {
      "epoch": 2.96721142974612,
      "grad_norm": 0.23327064514160156,
      "learning_rate": 2.766900515753901e-08,
      "loss": 0.3951,
      "step": 14434
    },
    {
      "epoch": 2.9674170007194984,
      "grad_norm": 0.12371329218149185,
      "learning_rate": 2.7319908273373828e-08,
      "loss": 0.4328,
      "step": 14435
    },
    {
      "epoch": 2.967622571692877,
      "grad_norm": 0.11786897480487823,
      "learning_rate": 2.697302698295212e-08,
      "loss": 0.4559,
      "step": 14436
    },
    {
      "epoch": 2.9678281426662556,
      "grad_norm": 0.23964469134807587,
      "learning_rate": 2.6628361303365212e-08,
      "loss": 0.3775,
      "step": 14437
    },
    {
      "epoch": 2.968033713639634,
      "grad_norm": 0.11859652400016785,
      "learning_rate": 2.628591125159452e-08,
      "loss": 0.4539,
      "step": 14438
    },
    {
      "epoch": 2.968239284613013,
      "grad_norm": 0.24552220106124878,
      "learning_rate": 2.594567684450655e-08,
      "loss": 0.3925,
      "step": 14439
    },
    {
      "epoch": 2.968444855586391,
      "grad_norm": 0.2301911562681198,
      "learning_rate": 2.560765809887289e-08,
      "loss": 0.3591,
      "step": 14440
    },
    {
      "epoch": 2.96865042655977,
      "grad_norm": 0.23525011539459229,
      "learning_rate": 2.527185503134022e-08,
      "loss": 0.3905,
      "step": 14441
    },
    {
      "epoch": 2.968855997533148,
      "grad_norm": 0.123292475938797,
      "learning_rate": 2.493826765845031e-08,
      "loss": 0.4365,
      "step": 14442
    },
    {
      "epoch": 2.969061568506527,
      "grad_norm": 0.23423157632350922,
      "learning_rate": 2.4606895996635016e-08,
      "loss": 0.3867,
      "step": 14443
    },
    {
      "epoch": 2.9692671394799053,
      "grad_norm": 0.2355274260044098,
      "learning_rate": 2.4277740062226274e-08,
      "loss": 0.392,
      "step": 14444
    },
    {
      "epoch": 2.969472710453284,
      "grad_norm": 0.12355451285839081,
      "learning_rate": 2.395079987144111e-08,
      "loss": 0.4473,
      "step": 14445
    },
    {
      "epoch": 2.9696782814266625,
      "grad_norm": 0.23346541821956635,
      "learning_rate": 2.362607544037665e-08,
      "loss": 0.3761,
      "step": 14446
    },
    {
      "epoch": 2.969883852400041,
      "grad_norm": 0.23978963494300842,
      "learning_rate": 2.3303566785040087e-08,
      "loss": 0.366,
      "step": 14447
    },
    {
      "epoch": 2.9700894233734196,
      "grad_norm": 0.22105932235717773,
      "learning_rate": 2.298327392131372e-08,
      "loss": 0.3697,
      "step": 14448
    },
    {
      "epoch": 2.9702949943467982,
      "grad_norm": 0.23754067718982697,
      "learning_rate": 2.2665196864984918e-08,
      "loss": 0.3773,
      "step": 14449
    },
    {
      "epoch": 2.970500565320177,
      "grad_norm": 0.23977546393871307,
      "learning_rate": 2.2349335631711155e-08,
      "loss": 0.4008,
      "step": 14450
    },
    {
      "epoch": 2.9707061362935554,
      "grad_norm": 0.22875571250915527,
      "learning_rate": 2.2035690237064977e-08,
      "loss": 0.3623,
      "step": 14451
    },
    {
      "epoch": 2.970911707266934,
      "grad_norm": 0.23122116923332214,
      "learning_rate": 2.1724260696494027e-08,
      "loss": 0.369,
      "step": 14452
    },
    {
      "epoch": 2.9711172782403126,
      "grad_norm": 0.12258761376142502,
      "learning_rate": 2.141504702533603e-08,
      "loss": 0.4432,
      "step": 14453
    },
    {
      "epoch": 2.971322849213691,
      "grad_norm": 0.2527145445346832,
      "learning_rate": 2.1108049238833806e-08,
      "loss": 0.3691,
      "step": 14454
    },
    {
      "epoch": 2.9715284201870698,
      "grad_norm": 0.22957132756710052,
      "learning_rate": 2.080326735210525e-08,
      "loss": 0.3863,
      "step": 14455
    },
    {
      "epoch": 2.9717339911604483,
      "grad_norm": 0.11908449977636337,
      "learning_rate": 2.050070138016835e-08,
      "loss": 0.4381,
      "step": 14456
    },
    {
      "epoch": 2.9719395621338265,
      "grad_norm": 0.23832279443740845,
      "learning_rate": 2.020035133793119e-08,
      "loss": 0.3762,
      "step": 14457
    },
    {
      "epoch": 2.9721451331072055,
      "grad_norm": 0.22900496423244476,
      "learning_rate": 1.990221724018193e-08,
      "loss": 0.3593,
      "step": 14458
    },
    {
      "epoch": 2.9723507040805837,
      "grad_norm": 0.23259581625461578,
      "learning_rate": 1.960629910161882e-08,
      "loss": 0.3722,
      "step": 14459
    },
    {
      "epoch": 2.9725562750539622,
      "grad_norm": 0.2373165637254715,
      "learning_rate": 1.93125969368102e-08,
      "loss": 0.3792,
      "step": 14460
    },
    {
      "epoch": 2.972761846027341,
      "grad_norm": 0.23144948482513428,
      "learning_rate": 1.9021110760234494e-08,
      "loss": 0.3921,
      "step": 14461
    },
    {
      "epoch": 2.9729674170007194,
      "grad_norm": 0.22227592766284943,
      "learning_rate": 1.8731840586250217e-08,
      "loss": 0.3614,
      "step": 14462
    },
    {
      "epoch": 2.973172987974098,
      "grad_norm": 0.231735497713089,
      "learning_rate": 1.844478642910097e-08,
      "loss": 0.39,
      "step": 14463
    },
    {
      "epoch": 2.9733785589474766,
      "grad_norm": 0.23499642312526703,
      "learning_rate": 1.8159948302940432e-08,
      "loss": 0.3636,
      "step": 14464
    },
    {
      "epoch": 2.973584129920855,
      "grad_norm": 0.12244053184986115,
      "learning_rate": 1.7877326221787395e-08,
      "loss": 0.449,
      "step": 14465
    },
    {
      "epoch": 2.973789700894234,
      "grad_norm": 0.2361563742160797,
      "learning_rate": 1.7596920199575706e-08,
      "loss": 0.3727,
      "step": 14466
    },
    {
      "epoch": 2.9739952718676124,
      "grad_norm": 0.22725822031497955,
      "learning_rate": 1.731873025011932e-08,
      "loss": 0.3663,
      "step": 14467
    },
    {
      "epoch": 2.974200842840991,
      "grad_norm": 0.23438185453414917,
      "learning_rate": 1.7042756387117275e-08,
      "loss": 0.3793,
      "step": 14468
    },
    {
      "epoch": 2.9744064138143695,
      "grad_norm": 0.24105405807495117,
      "learning_rate": 1.6768998624168698e-08,
      "loss": 0.393,
      "step": 14469
    },
    {
      "epoch": 2.974611984787748,
      "grad_norm": 0.23841865360736847,
      "learning_rate": 1.6497456974762794e-08,
      "loss": 0.389,
      "step": 14470
    },
    {
      "epoch": 2.9748175557611267,
      "grad_norm": 0.23006348311901093,
      "learning_rate": 1.6228131452273864e-08,
      "loss": 0.3752,
      "step": 14471
    },
    {
      "epoch": 2.975023126734505,
      "grad_norm": 0.22141233086585999,
      "learning_rate": 1.5961022069971298e-08,
      "loss": 0.3466,
      "step": 14472
    },
    {
      "epoch": 2.975228697707884,
      "grad_norm": 0.2314436435699463,
      "learning_rate": 1.5696128841014568e-08,
      "loss": 0.3913,
      "step": 14473
    },
    {
      "epoch": 2.975434268681262,
      "grad_norm": 0.2326936423778534,
      "learning_rate": 1.5433451778448238e-08,
      "loss": 0.3836,
      "step": 14474
    },
    {
      "epoch": 2.9756398396546406,
      "grad_norm": 0.22499439120292664,
      "learning_rate": 1.5172990895226948e-08,
      "loss": 0.3827,
      "step": 14475
    },
    {
      "epoch": 2.975845410628019,
      "grad_norm": 0.23209989070892334,
      "learning_rate": 1.4914746204165443e-08,
      "loss": 0.3626,
      "step": 14476
    },
    {
      "epoch": 2.976050981601398,
      "grad_norm": 0.2376868724822998,
      "learning_rate": 1.4658717718003535e-08,
      "loss": 0.3645,
      "step": 14477
    },
    {
      "epoch": 2.9762565525747764,
      "grad_norm": 0.2345684915781021,
      "learning_rate": 1.4404905449336149e-08,
      "loss": 0.3789,
      "step": 14478
    },
    {
      "epoch": 2.976462123548155,
      "grad_norm": 0.11508353054523468,
      "learning_rate": 1.415330941068327e-08,
      "loss": 0.4672,
      "step": 14479
    },
    {
      "epoch": 2.9766676945215336,
      "grad_norm": 0.11564578115940094,
      "learning_rate": 1.3903929614434986e-08,
      "loss": 0.444,
      "step": 14480
    },
    {
      "epoch": 2.976873265494912,
      "grad_norm": 0.22484390437602997,
      "learning_rate": 1.3656766072871475e-08,
      "loss": 0.3825,
      "step": 14481
    },
    {
      "epoch": 2.9770788364682907,
      "grad_norm": 0.22326096892356873,
      "learning_rate": 1.3411818798172993e-08,
      "loss": 0.3728,
      "step": 14482
    },
    {
      "epoch": 2.9772844074416693,
      "grad_norm": 0.22746115922927856,
      "learning_rate": 1.3169087802409885e-08,
      "loss": 0.3882,
      "step": 14483
    },
    {
      "epoch": 2.977489978415048,
      "grad_norm": 0.23284806311130524,
      "learning_rate": 1.2928573097537588e-08,
      "loss": 0.3927,
      "step": 14484
    },
    {
      "epoch": 2.9776955493884265,
      "grad_norm": 0.23383115231990814,
      "learning_rate": 1.2690274695406623e-08,
      "loss": 0.3897,
      "step": 14485
    },
    {
      "epoch": 2.977901120361805,
      "grad_norm": 0.23091500997543335,
      "learning_rate": 1.2454192607752602e-08,
      "loss": 0.3783,
      "step": 14486
    },
    {
      "epoch": 2.9781066913351832,
      "grad_norm": 0.23954810202121735,
      "learning_rate": 1.2220326846211217e-08,
      "loss": 0.3963,
      "step": 14487
    },
    {
      "epoch": 2.9783122623085623,
      "grad_norm": 0.23919789493083954,
      "learning_rate": 1.1988677422303251e-08,
      "loss": 0.3777,
      "step": 14488
    },
    {
      "epoch": 2.9785178332819404,
      "grad_norm": 0.2225130796432495,
      "learning_rate": 1.1759244347434584e-08,
      "loss": 0.3658,
      "step": 14489
    },
    {
      "epoch": 2.978723404255319,
      "grad_norm": 0.2334238588809967,
      "learning_rate": 1.153202763292116e-08,
      "loss": 0.3906,
      "step": 14490
    },
    {
      "epoch": 2.9789289752286976,
      "grad_norm": 0.2455325573682785,
      "learning_rate": 1.1307027289944038e-08,
      "loss": 0.3829,
      "step": 14491
    },
    {
      "epoch": 2.979134546202076,
      "grad_norm": 0.2270069569349289,
      "learning_rate": 1.1084243329594347e-08,
      "loss": 0.3802,
      "step": 14492
    },
    {
      "epoch": 2.9793401171754548,
      "grad_norm": 0.22187288105487823,
      "learning_rate": 1.0863675762843306e-08,
      "loss": 0.3832,
      "step": 14493
    },
    {
      "epoch": 2.9795456881488334,
      "grad_norm": 0.2417239248752594,
      "learning_rate": 1.0645324600562223e-08,
      "loss": 0.3624,
      "step": 14494
    },
    {
      "epoch": 2.979751259122212,
      "grad_norm": 0.11916260421276093,
      "learning_rate": 1.0429189853507493e-08,
      "loss": 0.4491,
      "step": 14495
    },
    {
      "epoch": 2.9799568300955905,
      "grad_norm": 0.23340509831905365,
      "learning_rate": 1.02152715323256e-08,
      "loss": 0.3903,
      "step": 14496
    },
    {
      "epoch": 2.980162401068969,
      "grad_norm": 0.2338993400335312,
      "learning_rate": 1.0003569647558109e-08,
      "loss": 0.3733,
      "step": 14497
    },
    {
      "epoch": 2.9803679720423477,
      "grad_norm": 0.23980024456977844,
      "learning_rate": 9.794084209626687e-09,
      "loss": 0.3745,
      "step": 14498
    },
    {
      "epoch": 2.9805735430157263,
      "grad_norm": 0.22634217143058777,
      "learning_rate": 9.58681522885807e-09,
      "loss": 0.3821,
      "step": 14499
    },
    {
      "epoch": 2.980779113989105,
      "grad_norm": 0.2323930710554123,
      "learning_rate": 9.381762715464093e-09,
      "loss": 0.3854,
      "step": 14500
    },
    {
      "epoch": 2.9809846849624835,
      "grad_norm": 0.23172056674957275,
      "learning_rate": 9.178926679546673e-09,
      "loss": 0.3735,
      "step": 14501
    },
    {
      "epoch": 2.9811902559358616,
      "grad_norm": 0.23236456513404846,
      "learning_rate": 8.978307131097818e-09,
      "loss": 0.3731,
      "step": 14502
    },
    {
      "epoch": 2.9813958269092407,
      "grad_norm": 0.23568768799304962,
      "learning_rate": 8.779904079994628e-09,
      "loss": 0.3654,
      "step": 14503
    },
    {
      "epoch": 2.981601397882619,
      "grad_norm": 0.22454003989696503,
      "learning_rate": 8.583717536019276e-09,
      "loss": 0.4069,
      "step": 14504
    },
    {
      "epoch": 2.9818069688559974,
      "grad_norm": 0.2187877893447876,
      "learning_rate": 8.38974750883903e-09,
      "loss": 0.3814,
      "step": 14505
    },
    {
      "epoch": 2.982012539829376,
      "grad_norm": 0.2329930067062378,
      "learning_rate": 8.197994008001253e-09,
      "loss": 0.3876,
      "step": 14506
    },
    {
      "epoch": 2.9822181108027546,
      "grad_norm": 0.22449320554733276,
      "learning_rate": 8.008457042958384e-09,
      "loss": 0.3766,
      "step": 14507
    },
    {
      "epoch": 2.982423681776133,
      "grad_norm": 0.12521809339523315,
      "learning_rate": 7.821136623047953e-09,
      "loss": 0.4432,
      "step": 14508
    },
    {
      "epoch": 2.9826292527495117,
      "grad_norm": 0.12057320028543472,
      "learning_rate": 7.636032757492583e-09,
      "loss": 0.4466,
      "step": 14509
    },
    {
      "epoch": 2.9828348237228903,
      "grad_norm": 0.22894617915153503,
      "learning_rate": 7.453145455419975e-09,
      "loss": 0.378,
      "step": 14510
    },
    {
      "epoch": 2.983040394696269,
      "grad_norm": 0.22911518812179565,
      "learning_rate": 7.272474725837919e-09,
      "loss": 0.3645,
      "step": 14511
    },
    {
      "epoch": 2.9832459656696475,
      "grad_norm": 0.23008479177951813,
      "learning_rate": 7.0940205776443004e-09,
      "loss": 0.3793,
      "step": 14512
    },
    {
      "epoch": 2.983451536643026,
      "grad_norm": 0.23247113823890686,
      "learning_rate": 6.917783019627089e-09,
      "loss": 0.3744,
      "step": 14513
    },
    {
      "epoch": 2.9836571076164047,
      "grad_norm": 0.12385281175374985,
      "learning_rate": 6.7437620604793304e-09,
      "loss": 0.4215,
      "step": 14514
    },
    {
      "epoch": 2.9838626785897833,
      "grad_norm": 0.23346978425979614,
      "learning_rate": 6.571957708764176e-09,
      "loss": 0.3934,
      "step": 14515
    },
    {
      "epoch": 2.984068249563162,
      "grad_norm": 0.24097535014152527,
      "learning_rate": 6.402369972954847e-09,
      "loss": 0.3913,
      "step": 14516
    },
    {
      "epoch": 2.98427382053654,
      "grad_norm": 0.2249852418899536,
      "learning_rate": 6.234998861399666e-09,
      "loss": 0.3629,
      "step": 14517
    },
    {
      "epoch": 2.984479391509919,
      "grad_norm": 0.22656574845314026,
      "learning_rate": 6.069844382342038e-09,
      "loss": 0.3682,
      "step": 14518
    },
    {
      "epoch": 2.984684962483297,
      "grad_norm": 0.2244207262992859,
      "learning_rate": 5.90690654392545e-09,
      "loss": 0.3743,
      "step": 14519
    },
    {
      "epoch": 2.9848905334566758,
      "grad_norm": 0.2316739857196808,
      "learning_rate": 5.746185354173484e-09,
      "loss": 0.3774,
      "step": 14520
    },
    {
      "epoch": 2.9850961044300544,
      "grad_norm": 0.1228955090045929,
      "learning_rate": 5.587680821004803e-09,
      "loss": 0.4535,
      "step": 14521
    },
    {
      "epoch": 2.985301675403433,
      "grad_norm": 0.2285931259393692,
      "learning_rate": 5.431392952228165e-09,
      "loss": 0.3802,
      "step": 14522
    },
    {
      "epoch": 2.9855072463768115,
      "grad_norm": 0.21995897591114044,
      "learning_rate": 5.2773217555424086e-09,
      "loss": 0.3836,
      "step": 14523
    },
    {
      "epoch": 2.98571281735019,
      "grad_norm": 0.23392610251903534,
      "learning_rate": 5.125467238536463e-09,
      "loss": 0.3909,
      "step": 14524
    },
    {
      "epoch": 2.9859183883235687,
      "grad_norm": 0.23672394454479218,
      "learning_rate": 4.975829408694344e-09,
      "loss": 0.3759,
      "step": 14525
    },
    {
      "epoch": 2.9861239592969473,
      "grad_norm": 0.12258664518594742,
      "learning_rate": 4.828408273385154e-09,
      "loss": 0.4437,
      "step": 14526
    },
    {
      "epoch": 2.986329530270326,
      "grad_norm": 0.23568691313266754,
      "learning_rate": 4.683203839878081e-09,
      "loss": 0.3702,
      "step": 14527
    },
    {
      "epoch": 2.9865351012437045,
      "grad_norm": 0.2551220953464508,
      "learning_rate": 4.540216115317409e-09,
      "loss": 0.3785,
      "step": 14528
    },
    {
      "epoch": 2.986740672217083,
      "grad_norm": 0.22651293873786926,
      "learning_rate": 4.399445106752498e-09,
      "loss": 0.3663,
      "step": 14529
    },
    {
      "epoch": 2.9869462431904616,
      "grad_norm": 0.2356937676668167,
      "learning_rate": 4.260890821117802e-09,
      "loss": 0.3979,
      "step": 14530
    },
    {
      "epoch": 2.9871518141638402,
      "grad_norm": 0.2306637316942215,
      "learning_rate": 4.124553265242859e-09,
      "loss": 0.3723,
      "step": 14531
    },
    {
      "epoch": 2.9873573851372184,
      "grad_norm": 0.1231551244854927,
      "learning_rate": 3.9904324458373e-09,
      "loss": 0.4615,
      "step": 14532
    },
    {
      "epoch": 2.9875629561105974,
      "grad_norm": 0.23342475295066833,
      "learning_rate": 3.8585283695158345e-09,
      "loss": 0.376,
      "step": 14533
    },
    {
      "epoch": 2.9877685270839756,
      "grad_norm": 0.23796530067920685,
      "learning_rate": 3.728841042768272e-09,
      "loss": 0.3981,
      "step": 14534
    },
    {
      "epoch": 2.987974098057354,
      "grad_norm": 0.2273947149515152,
      "learning_rate": 3.601370471994492e-09,
      "loss": 0.3752,
      "step": 14535
    },
    {
      "epoch": 2.9881796690307327,
      "grad_norm": 0.22759398818016052,
      "learning_rate": 3.4761166634644795e-09,
      "loss": 0.3688,
      "step": 14536
    },
    {
      "epoch": 2.9883852400041113,
      "grad_norm": 0.22332710027694702,
      "learning_rate": 3.353079623353295e-09,
      "loss": 0.3744,
      "step": 14537
    },
    {
      "epoch": 2.98859081097749,
      "grad_norm": 0.22932596504688263,
      "learning_rate": 3.232259357726086e-09,
      "loss": 0.3762,
      "step": 14538
    },
    {
      "epoch": 2.9887963819508685,
      "grad_norm": 0.22627981007099152,
      "learning_rate": 3.1136558725280986e-09,
      "loss": 0.37,
      "step": 14539
    },
    {
      "epoch": 2.989001952924247,
      "grad_norm": 0.23626869916915894,
      "learning_rate": 2.9972691736046556e-09,
      "loss": 0.3708,
      "step": 14540
    },
    {
      "epoch": 2.9892075238976257,
      "grad_norm": 0.2292552888393402,
      "learning_rate": 2.8830992666911696e-09,
      "loss": 0.3854,
      "step": 14541
    },
    {
      "epoch": 2.9894130948710043,
      "grad_norm": 0.13139550387859344,
      "learning_rate": 2.7711461574081443e-09,
      "loss": 0.4664,
      "step": 14542
    },
    {
      "epoch": 2.989618665844383,
      "grad_norm": 0.2349810004234314,
      "learning_rate": 2.6614098512811603e-09,
      "loss": 0.394,
      "step": 14543
    },
    {
      "epoch": 2.9898242368177614,
      "grad_norm": 0.23067308962345123,
      "learning_rate": 2.553890353700905e-09,
      "loss": 0.3803,
      "step": 14544
    },
    {
      "epoch": 2.99002980779114,
      "grad_norm": 0.238302543759346,
      "learning_rate": 2.448587669978131e-09,
      "loss": 0.3568,
      "step": 14545
    },
    {
      "epoch": 2.9902353787645186,
      "grad_norm": 0.11440926790237427,
      "learning_rate": 2.345501805298689e-09,
      "loss": 0.4259,
      "step": 14546
    },
    {
      "epoch": 2.9904409497378968,
      "grad_norm": 0.23391030728816986,
      "learning_rate": 2.244632764733523e-09,
      "loss": 0.3822,
      "step": 14547
    },
    {
      "epoch": 2.990646520711276,
      "grad_norm": 0.2159079611301422,
      "learning_rate": 2.145980553253657e-09,
      "loss": 0.3729,
      "step": 14548
    },
    {
      "epoch": 2.990852091684654,
      "grad_norm": 0.2323864847421646,
      "learning_rate": 2.0495451757251983e-09,
      "loss": 0.3891,
      "step": 14549
    },
    {
      "epoch": 2.9910576626580325,
      "grad_norm": 0.23049965500831604,
      "learning_rate": 1.955326636899346e-09,
      "loss": 0.3736,
      "step": 14550
    },
    {
      "epoch": 2.991263233631411,
      "grad_norm": 0.227107435464859,
      "learning_rate": 1.8633249414073963e-09,
      "loss": 0.3669,
      "step": 14551
    },
    {
      "epoch": 2.9914688046047897,
      "grad_norm": 0.22962632775306702,
      "learning_rate": 1.7735400937957114e-09,
      "loss": 0.3931,
      "step": 14552
    },
    {
      "epoch": 2.9916743755781683,
      "grad_norm": 0.22868715226650238,
      "learning_rate": 1.6859720984757631e-09,
      "loss": 0.3688,
      "step": 14553
    },
    {
      "epoch": 2.991879946551547,
      "grad_norm": 0.22757934033870697,
      "learning_rate": 1.6006209597640986e-09,
      "loss": 0.3572,
      "step": 14554
    },
    {
      "epoch": 2.9920855175249255,
      "grad_norm": 0.22525522112846375,
      "learning_rate": 1.5174866818723487e-09,
      "loss": 0.3889,
      "step": 14555
    },
    {
      "epoch": 2.992291088498304,
      "grad_norm": 0.22742381691932678,
      "learning_rate": 1.4365692688922405e-09,
      "loss": 0.3883,
      "step": 14556
    },
    {
      "epoch": 2.9924966594716826,
      "grad_norm": 0.2328689843416214,
      "learning_rate": 1.3578687248055888e-09,
      "loss": 0.3777,
      "step": 14557
    },
    {
      "epoch": 2.9927022304450612,
      "grad_norm": 0.2241593301296234,
      "learning_rate": 1.2813850534992843e-09,
      "loss": 0.373,
      "step": 14558
    },
    {
      "epoch": 2.99290780141844,
      "grad_norm": 0.14074623584747314,
      "learning_rate": 1.207118258730322e-09,
      "loss": 0.4593,
      "step": 14559
    },
    {
      "epoch": 2.9931133723918184,
      "grad_norm": 0.22643250226974487,
      "learning_rate": 1.1350683441657684e-09,
      "loss": 0.3729,
      "step": 14560
    },
    {
      "epoch": 2.993318943365197,
      "grad_norm": 0.23651181161403656,
      "learning_rate": 1.06523531334779e-09,
      "loss": 0.3741,
      "step": 14561
    },
    {
      "epoch": 2.993524514338575,
      "grad_norm": 0.23334497213363647,
      "learning_rate": 9.976191697286253e-10,
      "loss": 0.3901,
      "step": 14562
    },
    {
      "epoch": 2.993730085311954,
      "grad_norm": 0.2361806035041809,
      "learning_rate": 9.322199166256207e-10,
      "loss": 0.3956,
      "step": 14563
    },
    {
      "epoch": 2.9939356562853323,
      "grad_norm": 0.23388999700546265,
      "learning_rate": 8.690375572711906e-10,
      "loss": 0.3776,
      "step": 14564
    },
    {
      "epoch": 2.9941412272587113,
      "grad_norm": 0.12321368604898453,
      "learning_rate": 8.080720947678533e-10,
      "loss": 0.4526,
      "step": 14565
    },
    {
      "epoch": 2.9943467982320895,
      "grad_norm": 0.23372013866901398,
      "learning_rate": 7.493235321331948e-10,
      "loss": 0.3964,
      "step": 14566
    },
    {
      "epoch": 2.994552369205468,
      "grad_norm": 0.22259008884429932,
      "learning_rate": 6.927918722499093e-10,
      "loss": 0.3632,
      "step": 14567
    },
    {
      "epoch": 2.9947579401788467,
      "grad_norm": 0.22694145143032074,
      "learning_rate": 6.384771179057669e-10,
      "loss": 0.3826,
      "step": 14568
    },
    {
      "epoch": 2.9949635111522253,
      "grad_norm": 0.22071406245231628,
      "learning_rate": 5.863792717736293e-10,
      "loss": 0.3715,
      "step": 14569
    },
    {
      "epoch": 2.995169082125604,
      "grad_norm": 0.2295757234096527,
      "learning_rate": 5.364983364314347e-10,
      "loss": 0.348,
      "step": 14570
    },
    {
      "epoch": 2.9953746530989824,
      "grad_norm": 0.11967863142490387,
      "learning_rate": 4.888343143222285e-10,
      "loss": 0.4525,
      "step": 14571
    },
    {
      "epoch": 2.995580224072361,
      "grad_norm": 0.23142680525779724,
      "learning_rate": 4.4338720780412456e-10,
      "loss": 0.3912,
      "step": 14572
    },
    {
      "epoch": 2.9957857950457396,
      "grad_norm": 0.12113110721111298,
      "learning_rate": 4.0015701911533256e-10,
      "loss": 0.4394,
      "step": 14573
    },
    {
      "epoch": 2.995991366019118,
      "grad_norm": 0.1214088723063469,
      "learning_rate": 3.591437503791539e-10,
      "loss": 0.4441,
      "step": 14574
    },
    {
      "epoch": 2.996196936992497,
      "grad_norm": 0.24138882756233215,
      "learning_rate": 3.203474036239662e-10,
      "loss": 0.381,
      "step": 14575
    },
    {
      "epoch": 2.9964025079658754,
      "grad_norm": 0.22098585963249207,
      "learning_rate": 2.8376798075324673e-10,
      "loss": 0.379,
      "step": 14576
    },
    {
      "epoch": 2.9966080789392535,
      "grad_norm": 0.22628125548362732,
      "learning_rate": 2.4940548357554884e-10,
      "loss": 0.385,
      "step": 14577
    },
    {
      "epoch": 2.9968136499126325,
      "grad_norm": 0.23406754434108734,
      "learning_rate": 2.1725991378451772e-10,
      "loss": 0.3906,
      "step": 14578
    },
    {
      "epoch": 2.9970192208860107,
      "grad_norm": 0.2328203022480011,
      "learning_rate": 1.8733127295389452e-10,
      "loss": 0.393,
      "step": 14579
    },
    {
      "epoch": 2.9972247918593897,
      "grad_norm": 0.11977335065603256,
      "learning_rate": 1.5961956256749233e-10,
      "loss": 0.4311,
      "step": 14580
    },
    {
      "epoch": 2.997430362832768,
      "grad_norm": 0.22989055514335632,
      "learning_rate": 1.3412478398922012e-10,
      "loss": 0.3797,
      "step": 14581
    },
    {
      "epoch": 2.9976359338061465,
      "grad_norm": 0.23267367482185364,
      "learning_rate": 1.1084693847307482e-10,
      "loss": 0.3734,
      "step": 14582
    },
    {
      "epoch": 2.997841504779525,
      "grad_norm": 0.22674784064292908,
      "learning_rate": 8.978602716813722e-11,
      "loss": 0.385,
      "step": 14583
    },
    {
      "epoch": 2.9980470757529036,
      "grad_norm": 0.24478791654109955,
      "learning_rate": 7.09420511085801e-11,
      "loss": 0.3813,
      "step": 14584
    },
    {
      "epoch": 2.9982526467262822,
      "grad_norm": 0.23882992565631866,
      "learning_rate": 5.431501122366012e-11,
      "loss": 0.3949,
      "step": 14585
    },
    {
      "epoch": 2.998458217699661,
      "grad_norm": 0.2423866242170334,
      "learning_rate": 3.990490833771787e-11,
      "loss": 0.3893,
      "step": 14586
    },
    {
      "epoch": 2.9986637886730394,
      "grad_norm": 0.24024717509746552,
      "learning_rate": 2.771174315019387e-11,
      "loss": 0.3785,
      "step": 14587
    },
    {
      "epoch": 2.998869359646418,
      "grad_norm": 0.23211045563220978,
      "learning_rate": 1.773551627060055e-11,
      "loss": 0.3623,
      "step": 14588
    },
    {
      "epoch": 2.9990749306197966,
      "grad_norm": 0.22888019680976868,
      "learning_rate": 9.976228188546265e-12,
      "loss": 0.3982,
      "step": 14589
    },
    {
      "epoch": 2.999280501593175,
      "grad_norm": 0.23154133558273315,
      "learning_rate": 4.433879288723298e-12,
      "loss": 0.3689,
      "step": 14590
    },
    {
      "epoch": 2.9994860725665538,
      "grad_norm": 0.22858844697475433,
      "learning_rate": 1.1084698359198341e-12,
      "loss": 0.3763,
      "step": 14591
    },
    {
      "epoch": 2.999691643539932,
      "grad_norm": 0.22756846249103546,
      "learning_rate": 0.0,
      "loss": 0.3625,
      "step": 14592
    },
    {
      "epoch": 2.999691643539932,
      "step": 14592,
      "total_flos": 6.439866310377226e+20,
      "train_loss": 0.5454843599418701,
      "train_runtime": 158902.6427,
      "train_samples_per_second": 188.085,
      "train_steps_per_second": 0.092
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 14592,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 50000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 6.439866310377226e+20,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}